Messing with audio
This commit is contained in:
parent
bf46a45f89
commit
d69037ff41
@ -362,46 +362,121 @@ class SyntheticAudioTrack(MediaStreamTrack):
|
||||
return pts, time_base
|
||||
|
||||
async def recv(self):
|
||||
"""
|
||||
Generate audio frame with position-based tone and bounce effects.
|
||||
|
||||
Audio Processing Pipeline:
|
||||
1. Base tone generation (frequency based on ball Y position)
|
||||
2. Bounce effect generation (separate, centered audio)
|
||||
3. Stereo panning (applied to base tone only)
|
||||
4. Volume compensation (based on ball Y position)
|
||||
5. Audio mixing and clipping prevention
|
||||
6. Final conversion to int16 stereo format
|
||||
"""
|
||||
pts, time_base = await self.next_timestamp()
|
||||
|
||||
# --- 1. Generate base tone based on ball Y position ---
|
||||
# --- 1. TONE GENERATION: Create base frequency tone based on ball Y position ---
|
||||
# Frequency mapping: Top of screen = high pitch (400Hz), bottom = low pitch (200Hz)
|
||||
if self.video_track:
|
||||
base_freq = self._get_ball_frequency()
|
||||
base_freq = self._get_ball_frequency() # 200-400Hz range
|
||||
else:
|
||||
base_freq = 440.0 # default if no video track
|
||||
base_freq = 440.0 # default A4 if no video track
|
||||
|
||||
# Generate sine wave at calculated frequency
|
||||
t = (np.arange(self.samples_per_frame) + pts) / self.sample_rate
|
||||
samples = np.sin(2 * np.pi * base_freq * t).astype(np.float32)
|
||||
base_samples = np.sin(2 * np.pi * base_freq * t).astype(np.float32)
|
||||
|
||||
# --- 2. Add bounce sound effect if triggered ---
|
||||
if getattr(self, "just_bounced", False):
|
||||
logger.info("Audio: Generating bounce sound effect")
|
||||
tb = np.arange(self.samples_per_frame) / self.sample_rate
|
||||
bounce_freq = 600.0 # Hz
|
||||
bounce_env = np.exp(-tb * 20.0) # fast exponential decay
|
||||
bounce_wave = 0.4 * np.sin(2 * np.pi * bounce_freq * tb) * bounce_env
|
||||
samples = samples + bounce_wave.astype(np.float32)
|
||||
self.just_bounced = False
|
||||
# --- 2. BOUNCE EFFECTS: Generate separate bounce sound effects (centered audio) ---
|
||||
# Bounce effects are generated independently to avoid being affected by panning
|
||||
bounce_samples = np.zeros(self.samples_per_frame, dtype=np.float32)
|
||||
current_time_s = self.clock.now()
|
||||
current_sample = int(current_time_s * self.sample_rate)
|
||||
|
||||
# --- 3. Stereo panning based on X position ---
|
||||
for bounce in self._active_bounces:
|
||||
if bounce["start_sample"] <= current_sample < bounce["end_sample"]:
|
||||
# Calculate relative time within this specific bounce event
|
||||
sample_offset = current_sample - bounce["start_sample"]
|
||||
bounce_t = sample_offset / self.sample_rate
|
||||
|
||||
# Generate bounce waveform: 600Hz tone with exponential decay envelope
|
||||
tb = np.arange(self.samples_per_frame) / self.sample_rate + bounce_t
|
||||
bounce_freq = 600.0 # Hz (higher than base tone for clarity)
|
||||
bounce_env = np.exp(
|
||||
-tb * 20.0
|
||||
) # Fast exponential decay (20.0 = decay rate)
|
||||
bounce_wave = (
|
||||
0.8 * np.sin(2 * np.pi * bounce_freq * tb) * bounce_env
|
||||
) # 0.8 = bounce amplitude (80% of full scale)
|
||||
|
||||
# Limit bounce duration to prevent runaway effects
|
||||
valid_samples = tb < 0.2 # 200ms maximum bounce duration
|
||||
bounce_wave[~valid_samples] = 0
|
||||
|
||||
# Accumulate bounce effects (multiple bounces can overlap)
|
||||
bounce_samples = bounce_samples + bounce_wave.astype(np.float32)
|
||||
|
||||
# Clean up expired bounce events to prevent memory accumulation
|
||||
self._active_bounces = [
|
||||
bounce
|
||||
for bounce in self._active_bounces
|
||||
if bounce["end_sample"] > current_sample
|
||||
]
|
||||
|
||||
# --- 3. STEREO PANNING: Apply left/right positioning to base tone only ---
|
||||
# Pan calculation: 0.0 = full left, 0.5 = center, 1.0 = full right
|
||||
if self.video_track:
|
||||
pan = self.video_track.ball["x"] / self.video_track.width
|
||||
pan = (
|
||||
self.video_track.ball["x"] / self.video_track.width
|
||||
) # Normalize to 0-1
|
||||
else:
|
||||
pan = 0.5 # center if no video
|
||||
left_gain = math.cos(pan * math.pi / 2)
|
||||
right_gain = math.sin(pan * math.pi / 2)
|
||||
pan = 0.5 # Center positioning if no video track
|
||||
|
||||
# --- 4. Volume scaling based on Y position ---
|
||||
# Equal-power panning: maintains perceived loudness across stereo field
|
||||
left_gain = math.cos(pan * math.pi / 2) # Left channel gain (1.0 to 0.0)
|
||||
right_gain = math.sin(pan * math.pi / 2) # Right channel gain (0.0 to 1.0)
|
||||
|
||||
# --- 4. VOLUME COMPENSATION: Apply Y-position based volume scaling ---
|
||||
# Volume scaling compensates for perceptual frequency/amplitude relationship
|
||||
if self.video_track:
|
||||
volume = (1.0 - (self.video_track.ball["y"] / self.video_track.height)) ** 2
|
||||
# Quadratic scaling: top = loud (1.0), bottom = quiet (approaching 0.0)
|
||||
# Formula: (1 - normalized_y)² provides smooth, natural volume curve
|
||||
normalized_y = self.video_track.ball["y"] / self.video_track.height
|
||||
volume = (1.0 - normalized_y) ** 2 # Squared for more dramatic effect
|
||||
else:
|
||||
volume = 1.0
|
||||
volume = 1.0 # Full volume if no video track
|
||||
|
||||
# --- 5. Apply gain and convert to int16 ---
|
||||
left = (samples * left_gain * volume * 32767).astype(np.int16)
|
||||
right = (samples * right_gain * volume * 32767).astype(np.int16)
|
||||
# --- 5. AUDIO MIXING: Combine panned base tone with centered bounce effects ---
|
||||
# Base tone: Apply stereo panning and volume compensation
|
||||
left_base = base_samples * left_gain * volume
|
||||
right_base = base_samples * right_gain * volume
|
||||
|
||||
# --- 6. Interleave channels for s16 format (samples arranged as [L, R, L, R, ...]) ---
|
||||
# Final mix: Add bounce effects equally to both channels (no panning)
|
||||
# This keeps bounce effects prominent and centered regardless of ball position
|
||||
left_total = left_base + bounce_samples
|
||||
right_total = right_base + bounce_samples
|
||||
|
||||
# --- 6. CLIPPING PREVENTION: Dynamic normalization with headroom management ---
|
||||
# Check peak amplitude across both channels to detect potential clipping
|
||||
max_left = np.max(np.abs(left_total))
|
||||
max_right = np.max(np.abs(right_total))
|
||||
max_amplitude = max(max_left, max_right)
|
||||
|
||||
# HEADROOM: Maintain 5% safety margin (0.95 threshold) to prevent digital artifacts
|
||||
if max_amplitude > 0.95: # Threshold chosen to leave headroom for codec/DAC
|
||||
# NORMALIZATION: Scale down entire signal to prevent clipping while preserving dynamics
|
||||
normalization_factor = 0.95 / max_amplitude # Proportional scaling
|
||||
left_total *= normalization_factor
|
||||
right_total *= normalization_factor
|
||||
logger.debug(
|
||||
f"Audio normalization applied: peak={max_amplitude:.3f}, factor={normalization_factor:.3f}"
|
||||
)
|
||||
|
||||
# FINAL CONVERSION: Convert to int16 with hard clipping as ultimate safety net
|
||||
# np.clip ensures values never exceed int16 range (-32768 to 32767)
|
||||
left = np.clip(left_total * 32767, -32767, 32767).astype(np.int16)
|
||||
right = np.clip(right_total * 32767, -32767, 32767).astype(np.int16)
|
||||
|
||||
# --- 7. Interleave channels for s16 format (samples arranged as [L, R, L, R, ...]) ---
|
||||
# Create interleaved array: [left[0], right[0], left[1], right[1], ...]
|
||||
interleaved = np.empty(self.samples_per_frame * 2, dtype=np.int16)
|
||||
interleaved[0::2] = left # Even indices get left channel
|
||||
|
Loading…
x
Reference in New Issue
Block a user