Messing with audio
This commit is contained in:
parent
bf46a45f89
commit
d69037ff41
@ -362,46 +362,121 @@ class SyntheticAudioTrack(MediaStreamTrack):
|
|||||||
return pts, time_base
|
return pts, time_base
|
||||||
|
|
||||||
async def recv(self):
|
async def recv(self):
|
||||||
|
"""
|
||||||
|
Generate audio frame with position-based tone and bounce effects.
|
||||||
|
|
||||||
|
Audio Processing Pipeline:
|
||||||
|
1. Base tone generation (frequency based on ball Y position)
|
||||||
|
2. Bounce effect generation (separate, centered audio)
|
||||||
|
3. Stereo panning (applied to base tone only)
|
||||||
|
4. Volume compensation (based on ball Y position)
|
||||||
|
5. Audio mixing and clipping prevention
|
||||||
|
6. Final conversion to int16 stereo format
|
||||||
|
"""
|
||||||
pts, time_base = await self.next_timestamp()
|
pts, time_base = await self.next_timestamp()
|
||||||
|
|
||||||
# --- 1. Generate base tone based on ball Y position ---
|
# --- 1. TONE GENERATION: Create base frequency tone based on ball Y position ---
|
||||||
|
# Frequency mapping: Top of screen = high pitch (400Hz), bottom = low pitch (200Hz)
|
||||||
if self.video_track:
|
if self.video_track:
|
||||||
base_freq = self._get_ball_frequency()
|
base_freq = self._get_ball_frequency() # 200-400Hz range
|
||||||
else:
|
else:
|
||||||
base_freq = 440.0 # default if no video track
|
base_freq = 440.0 # default A4 if no video track
|
||||||
|
|
||||||
|
# Generate sine wave at calculated frequency
|
||||||
t = (np.arange(self.samples_per_frame) + pts) / self.sample_rate
|
t = (np.arange(self.samples_per_frame) + pts) / self.sample_rate
|
||||||
samples = np.sin(2 * np.pi * base_freq * t).astype(np.float32)
|
base_samples = np.sin(2 * np.pi * base_freq * t).astype(np.float32)
|
||||||
|
|
||||||
# --- 2. Add bounce sound effect if triggered ---
|
# --- 2. BOUNCE EFFECTS: Generate separate bounce sound effects (centered audio) ---
|
||||||
if getattr(self, "just_bounced", False):
|
# Bounce effects are generated independently to avoid being affected by panning
|
||||||
logger.info("Audio: Generating bounce sound effect")
|
bounce_samples = np.zeros(self.samples_per_frame, dtype=np.float32)
|
||||||
tb = np.arange(self.samples_per_frame) / self.sample_rate
|
current_time_s = self.clock.now()
|
||||||
bounce_freq = 600.0 # Hz
|
current_sample = int(current_time_s * self.sample_rate)
|
||||||
bounce_env = np.exp(-tb * 20.0) # fast exponential decay
|
|
||||||
bounce_wave = 0.4 * np.sin(2 * np.pi * bounce_freq * tb) * bounce_env
|
|
||||||
samples = samples + bounce_wave.astype(np.float32)
|
|
||||||
self.just_bounced = False
|
|
||||||
|
|
||||||
# --- 3. Stereo panning based on X position ---
|
for bounce in self._active_bounces:
|
||||||
|
if bounce["start_sample"] <= current_sample < bounce["end_sample"]:
|
||||||
|
# Calculate relative time within this specific bounce event
|
||||||
|
sample_offset = current_sample - bounce["start_sample"]
|
||||||
|
bounce_t = sample_offset / self.sample_rate
|
||||||
|
|
||||||
|
# Generate bounce waveform: 600Hz tone with exponential decay envelope
|
||||||
|
tb = np.arange(self.samples_per_frame) / self.sample_rate + bounce_t
|
||||||
|
bounce_freq = 600.0 # Hz (higher than base tone for clarity)
|
||||||
|
bounce_env = np.exp(
|
||||||
|
-tb * 20.0
|
||||||
|
) # Fast exponential decay (20.0 = decay rate)
|
||||||
|
bounce_wave = (
|
||||||
|
0.8 * np.sin(2 * np.pi * bounce_freq * tb) * bounce_env
|
||||||
|
) # 0.8 = bounce amplitude (80% of full scale)
|
||||||
|
|
||||||
|
# Limit bounce duration to prevent runaway effects
|
||||||
|
valid_samples = tb < 0.2 # 200ms maximum bounce duration
|
||||||
|
bounce_wave[~valid_samples] = 0
|
||||||
|
|
||||||
|
# Accumulate bounce effects (multiple bounces can overlap)
|
||||||
|
bounce_samples = bounce_samples + bounce_wave.astype(np.float32)
|
||||||
|
|
||||||
|
# Clean up expired bounce events to prevent memory accumulation
|
||||||
|
self._active_bounces = [
|
||||||
|
bounce
|
||||||
|
for bounce in self._active_bounces
|
||||||
|
if bounce["end_sample"] > current_sample
|
||||||
|
]
|
||||||
|
|
||||||
|
# --- 3. STEREO PANNING: Apply left/right positioning to base tone only ---
|
||||||
|
# Pan calculation: 0.0 = full left, 0.5 = center, 1.0 = full right
|
||||||
if self.video_track:
|
if self.video_track:
|
||||||
pan = self.video_track.ball["x"] / self.video_track.width
|
pan = (
|
||||||
|
self.video_track.ball["x"] / self.video_track.width
|
||||||
|
) # Normalize to 0-1
|
||||||
else:
|
else:
|
||||||
pan = 0.5 # center if no video
|
pan = 0.5 # Center positioning if no video track
|
||||||
left_gain = math.cos(pan * math.pi / 2)
|
|
||||||
right_gain = math.sin(pan * math.pi / 2)
|
|
||||||
|
|
||||||
# --- 4. Volume scaling based on Y position ---
|
# Equal-power panning: maintains perceived loudness across stereo field
|
||||||
|
left_gain = math.cos(pan * math.pi / 2) # Left channel gain (1.0 to 0.0)
|
||||||
|
right_gain = math.sin(pan * math.pi / 2) # Right channel gain (0.0 to 1.0)
|
||||||
|
|
||||||
|
# --- 4. VOLUME COMPENSATION: Apply Y-position based volume scaling ---
|
||||||
|
# Volume scaling compensates for perceptual frequency/amplitude relationship
|
||||||
if self.video_track:
|
if self.video_track:
|
||||||
volume = (1.0 - (self.video_track.ball["y"] / self.video_track.height)) ** 2
|
# Quadratic scaling: top = loud (1.0), bottom = quiet (approaching 0.0)
|
||||||
|
# Formula: (1 - normalized_y)² provides smooth, natural volume curve
|
||||||
|
normalized_y = self.video_track.ball["y"] / self.video_track.height
|
||||||
|
volume = (1.0 - normalized_y) ** 2 # Squared for more dramatic effect
|
||||||
else:
|
else:
|
||||||
volume = 1.0
|
volume = 1.0 # Full volume if no video track
|
||||||
|
|
||||||
# --- 5. Apply gain and convert to int16 ---
|
# --- 5. AUDIO MIXING: Combine panned base tone with centered bounce effects ---
|
||||||
left = (samples * left_gain * volume * 32767).astype(np.int16)
|
# Base tone: Apply stereo panning and volume compensation
|
||||||
right = (samples * right_gain * volume * 32767).astype(np.int16)
|
left_base = base_samples * left_gain * volume
|
||||||
|
right_base = base_samples * right_gain * volume
|
||||||
|
|
||||||
# --- 6. Interleave channels for s16 format (samples arranged as [L, R, L, R, ...]) ---
|
# Final mix: Add bounce effects equally to both channels (no panning)
|
||||||
|
# This keeps bounce effects prominent and centered regardless of ball position
|
||||||
|
left_total = left_base + bounce_samples
|
||||||
|
right_total = right_base + bounce_samples
|
||||||
|
|
||||||
|
# --- 6. CLIPPING PREVENTION: Dynamic normalization with headroom management ---
|
||||||
|
# Check peak amplitude across both channels to detect potential clipping
|
||||||
|
max_left = np.max(np.abs(left_total))
|
||||||
|
max_right = np.max(np.abs(right_total))
|
||||||
|
max_amplitude = max(max_left, max_right)
|
||||||
|
|
||||||
|
# HEADROOM: Maintain 5% safety margin (0.95 threshold) to prevent digital artifacts
|
||||||
|
if max_amplitude > 0.95: # Threshold chosen to leave headroom for codec/DAC
|
||||||
|
# NORMALIZATION: Scale down entire signal to prevent clipping while preserving dynamics
|
||||||
|
normalization_factor = 0.95 / max_amplitude # Proportional scaling
|
||||||
|
left_total *= normalization_factor
|
||||||
|
right_total *= normalization_factor
|
||||||
|
logger.debug(
|
||||||
|
f"Audio normalization applied: peak={max_amplitude:.3f}, factor={normalization_factor:.3f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# FINAL CONVERSION: Convert to int16 with hard clipping as ultimate safety net
|
||||||
|
# np.clip ensures values never exceed int16 range (-32768 to 32767)
|
||||||
|
left = np.clip(left_total * 32767, -32767, 32767).astype(np.int16)
|
||||||
|
right = np.clip(right_total * 32767, -32767, 32767).astype(np.int16)
|
||||||
|
|
||||||
|
# --- 7. Interleave channels for s16 format (samples arranged as [L, R, L, R, ...]) ---
|
||||||
# Create interleaved array: [left[0], right[0], left[1], right[1], ...]
|
# Create interleaved array: [left[0], right[0], left[1], right[1], ...]
|
||||||
interleaved = np.empty(self.samples_per_frame * 2, dtype=np.int16)
|
interleaved = np.empty(self.samples_per_frame * 2, dtype=np.int16)
|
||||||
interleaved[0::2] = left # Even indices get left channel
|
interleaved[0::2] = left # Even indices get left channel
|
||||||
|
Loading…
x
Reference in New Issue
Block a user