Messing with audio

This commit is contained in:
James Ketr 2025-09-01 19:32:57 -07:00
parent fb0ce3f203
commit bf46a45f89

View File

@ -6,17 +6,36 @@ Contains AnimatedVideoTrack and SyntheticAudioTrack implementations ported from
"""
import numpy as np
import math
import cv2
import fractions
import time
import random
from typing import TypedDict
from av.audio.frame import AudioFrame
from asyncio import Queue, create_task, sleep
from typing import TypedDict, TYPE_CHECKING
from aiortc import MediaStreamTrack
from av import VideoFrame, AudioFrame
from av import VideoFrame
from logger import logger
if TYPE_CHECKING:
pass
# Shared clock
from time import perf_counter
class MediaClock:
def __init__(self):
self.t0 = perf_counter()
def now(self) -> float:
return perf_counter() - self.t0
class BounceEvent(TypedDict):
"""Type definition for bounce events"""
type: str
start_sample: int
end_sample: int
@ -35,6 +54,7 @@ class AnimatedVideoTrack(MediaStreamTrack):
def __init__(
self,
clock: MediaClock,
width: int = 320,
height: int = 240,
name: str = "",
@ -44,6 +64,10 @@ class AnimatedVideoTrack(MediaStreamTrack):
self.width = width
self.height = height
self.name = name
self.clock = clock
self.fps = 15
self._next_frame_index = 0
self.audio_track = audio_track # Reference to the audio track
self.remote_video_tracks: list[
MediaStreamTrack
@ -73,6 +97,10 @@ class AnimatedVideoTrack(MediaStreamTrack):
self._start_time = time.time()
self._last_frame_time = time.time()
self.fps = 15 # Target frames per second
self._remote_latest = {} # track -> np.ndarray
self._remote_tasks: list[
tuple[MediaStreamTrack, object, Queue[np.ndarray]]
] = []
def set_ball_speed(self, speed_mps: float):
"""Set the ball speed in meters per second"""
@ -83,6 +111,19 @@ class AnimatedVideoTrack(MediaStreamTrack):
if track.kind == "video":
self.remote_video_tracks.append(track)
logger.info(f"Added remote video track: {track}")
q: Queue[np.ndarray] = Queue(maxsize=1)
async def pump():
while True:
frame = await track.recv()
if isinstance(frame, VideoFrame):
img: np.ndarray = frame.to_ndarray(format="bgr24")
if q.full():
_ = q.get_nowait()
await q.put(img)
t = create_task(pump())
self._remote_tasks.append((track, t, q))
def remove_remote_video_track(self, track: MediaStreamTrack):
"""Remove a remote video track"""
@ -90,36 +131,16 @@ class AnimatedVideoTrack(MediaStreamTrack):
self.remote_video_tracks.remove(track)
logger.info(f"Removed remote video track: {track}")
def _calculate_velocity_components(self) -> tuple[float, float]:
"""
Calculate dx and dy velocity components based on speed in meters per second.
Frame width represents 1 meter, so pixels per second = width * speed_mps
"""
# Calculate actual time delta since last frame
current_time = time.time()
dt = current_time - self._last_frame_time
self._last_frame_time = current_time
# Normalize direction vector to ensure consistent speed
dir_x = self.ball["direction_x"]
dir_y = self.ball["direction_y"]
magnitude = np.sqrt(dir_x * dir_x + dir_y * dir_y)
if magnitude > 0:
dir_x_norm = dir_x / magnitude
dir_y_norm = dir_y / magnitude
else:
def _calculate_velocity_components(self, dt: float) -> tuple[float, float]:
dir_x, dir_y = self.ball["direction_x"], self.ball["direction_y"]
mag = np.hypot(dir_x, dir_y)
if mag == 0:
dir_x_norm, dir_y_norm = 1.0, 0.0
# Convert meters per second to pixels per actual time delta
else:
dir_x_norm, dir_y_norm = dir_x / mag, dir_y / mag
pixels_per_second = self.width * self.ball["speed_mps"]
pixels_this_frame = pixels_per_second * dt
# Apply normalized direction to get velocity components
dx = pixels_this_frame * dir_x_norm
dy = pixels_this_frame * dir_y_norm
return dx, dy
return pixels_this_frame * dir_x_norm, pixels_this_frame * dir_y_norm
async def next_timestamp(self):
"""Returns (pts, time_base) for 15 FPS video"""
@ -171,42 +192,36 @@ class AnimatedVideoTrack(MediaStreamTrack):
"""Generate video frames at 15 FPS"""
pts, time_base = await self.next_timestamp()
# Target timestamp for this frame (seconds since t0)
target_t = self._next_frame_index / self.fps
now = self.clock.now()
if target_t > now:
await sleep(target_t - now)
# Use constant dt tied to fps (prevents physics jitter)
dt = 1.0 / self.fps
dx, dy = self._calculate_velocity_components(dt)
# PTS derived from frame index, not wall clock
pts = int(self._next_frame_index * (90000 / self.fps))
time_base = 1 / 90000
self._next_frame_index += 1
# Create black background
frame_array = np.zeros((self.height, self.width, 3), dtype=np.uint8)
# Process remote video tracks with edge detection
for track in self.remote_video_tracks:
for _track, _task, q in self._remote_tasks:
try:
# Get the latest frame from the remote track (non-blocking)
remote_frame = await track.recv()
if remote_frame and isinstance(remote_frame, VideoFrame):
# Convert to numpy array
img: np.ndarray = remote_frame.to_ndarray(format="bgr24")
# Apply edge detection
edges = cv2.Canny(img, 100, 200)
img_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
# Resize to match our canvas size if needed
if img_edges.shape[:2] != (self.height, self.width):
img_edges = cv2.resize(img_edges, (self.width, self.height))
# Blend with existing frame (additive blend for edge detection overlay)
frame_array = cv2.addWeighted(
frame_array.astype(np.uint8),
0.7,
img_edges.astype(np.uint8),
0.3,
0,
)
except Exception as e:
# If we can't get a frame from this track, continue with others
logger.debug(f"Could not get frame from remote track: {e}")
img: np.ndarray = q.get_nowait()
except Exception:
continue
# Calculate velocity components based on current speed
dx, dy = self._calculate_velocity_components()
edges = cv2.Canny(img, 100, 200)
img_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
if img_edges.shape[:2] != (self.height, self.width):
img_edges = cv2.resize(img_edges, (self.width, self.height))
frame_array = cv2.addWeighted(frame_array, 0.7, img_edges, 0.3, 0.0)
# Update ball position
ball = self.ball
@ -225,7 +240,7 @@ class AnimatedVideoTrack(MediaStreamTrack):
# Trigger bounce sound if a bounce occurred
if bounce_occurred and self.audio_track:
logger.info("Video: Bounce detected, triggering audio event")
self.audio_track.add_bounce_event("bounce")
self.audio_track.add_bounce_event_at(self.clock.now())
# Keep ball in bounds
ball["x"] = max(ball["radius"], min(self.width - ball["radius"], ball["x"]))
@ -272,31 +287,61 @@ class AnimatedVideoTrack(MediaStreamTrack):
class SyntheticAudioTrack(MediaStreamTrack):
"""
Synthetic audio track that generates continuous tones based on ball position
and additional bounce sound effects.
The frequency of the continuous tone is mapped to the ball's Y position:
- Top of screen (Y=0): 800Hz (high pitch)
- Bottom of screen (Y=height): 200Hz (low pitch)
Bounce events add temporary audio effects on top of the continuous tone.
"""
kind = "audio"
def __init__(self):
def __init__(
self, clock: MediaClock, video_track: "AnimatedVideoTrack | None" = None
):
super().__init__()
self.sample_rate = 48000
self.samples_per_frame = 960
self._samples_generated = 0
self._active_bounces: list[BounceEvent] = [] # List of active bounce events
self.video_track = video_track # Reference to video track for ball position
self.clock = clock
def add_bounce_event(self, bounce_type: str = "bounce"):
"""Add a bounce event"""
bounce_duration_samples = int(0.2 * self.sample_rate) # 200ms
# Add new bounce to the list (they can overlap)
bounce_event: BounceEvent = {
"start_sample": self._samples_generated,
"end_sample": self._samples_generated + bounce_duration_samples,
"type": bounce_type,
}
self._active_bounces.append(bounce_event)
logger.info(
f"Bounce event added - start: {bounce_event['start_sample']}, end: {bounce_event['end_sample']}"
def add_bounce_event_at(self, bounce_time_s: float):
start_sample = int(bounce_time_s * self.sample_rate)
duration = int(0.2 * self.sample_rate)
self._active_bounces.append(
{
"type": "bounce",
"start_sample": start_sample,
"end_sample": start_sample + duration,
}
)
def _get_ball_frequency(self) -> float:
"""Get the current frequency based on ball Y position"""
if not self.video_track:
return 440.0 # Default frequency if no video track
# Map ball Y position to frequency range (200Hz to 800Hz)
ball_y = self.video_track.ball["y"]
height = self.video_track.height
# Normalize Y position (0.0 at top, 1.0 at bottom)
normalized_y = ball_y / height
# Map to frequency range (higher pitch for higher position, lower for lower)
# Invert so top = high frequency, bottom = low frequency
freq_min = 200.0
freq_max = 400.0
frequency = freq_max - (normalized_y * (freq_max - freq_min))
return frequency
def _generate_bounce_sample(self, t: float) -> float:
"""Generate a single bounce sample at time t"""
if t < 0 or t > 0.2:
@ -318,52 +363,58 @@ class SyntheticAudioTrack(MediaStreamTrack):
async def recv(self):
pts, time_base = await self.next_timestamp()
samples = np.zeros((self.samples_per_frame,), dtype=np.float32)
# Generate samples for this frame
active_bounce_count = 0
for i in range(self.samples_per_frame):
current_sample = self._samples_generated + i
sample_value = 0.0
# --- 1. Generate base tone based on ball Y position ---
if self.video_track:
base_freq = self._get_ball_frequency()
else:
base_freq = 440.0 # default if no video track
# Check all active bounces for this sample
for bounce in self._active_bounces:
if bounce["start_sample"] <= current_sample < bounce["end_sample"]:
# Calculate time within this bounce
sample_offset = current_sample - bounce["start_sample"]
t = sample_offset / self.sample_rate
t = (np.arange(self.samples_per_frame) + pts) / self.sample_rate
samples = np.sin(2 * np.pi * base_freq * t).astype(np.float32)
# Add this bounce's contribution
sample_value += self._generate_bounce_sample(t)
active_bounce_count += 1
# --- 2. Add bounce sound effect if triggered ---
if getattr(self, "just_bounced", False):
logger.info("Audio: Generating bounce sound effect")
tb = np.arange(self.samples_per_frame) / self.sample_rate
bounce_freq = 600.0 # Hz
bounce_env = np.exp(-tb * 20.0) # fast exponential decay
bounce_wave = 0.4 * np.sin(2 * np.pi * bounce_freq * tb) * bounce_env
samples = samples + bounce_wave.astype(np.float32)
self.just_bounced = False
samples[i] = sample_value
# --- 3. Stereo panning based on X position ---
if self.video_track:
pan = self.video_track.ball["x"] / self.video_track.width
else:
pan = 0.5 # center if no video
left_gain = math.cos(pan * math.pi / 2)
right_gain = math.sin(pan * math.pi / 2)
# Clean up expired bounces
self._active_bounces: list[BounceEvent] = [
bounce
for bounce in self._active_bounces
if bounce["end_sample"] > self._samples_generated + self.samples_per_frame
]
# --- 4. Volume scaling based on Y position ---
if self.video_track:
volume = (1.0 - (self.video_track.ball["y"] / self.video_track.height)) ** 2
else:
volume = 1.0
if active_bounce_count > 0:
logger.info(
f"Generated audio with {len(self._active_bounces)} active bounces"
)
# --- 5. Apply gain and convert to int16 ---
left = (samples * left_gain * volume * 32767).astype(np.int16)
right = (samples * right_gain * volume * 32767).astype(np.int16)
self._samples_generated += self.samples_per_frame
# --- 6. Interleave channels for s16 format (samples arranged as [L, R, L, R, ...]) ---
# Create interleaved array: [left[0], right[0], left[1], right[1], ...]
interleaved = np.empty(self.samples_per_frame * 2, dtype=np.int16)
interleaved[0::2] = left # Even indices get left channel
interleaved[1::2] = right # Odd indices get right channel
# Convert to audio frame
samples = np.clip(samples, -1.0, 1.0)
samples_s16 = (samples * 32767).astype(np.int16)
# Reshape to (1, samples*2) as expected by s16 format
stereo = interleaved.reshape(1, -1)
frame = AudioFrame.from_ndarray(
samples_s16.reshape(1, -1), format="s16", layout="stereo"
)
frame = AudioFrame.from_ndarray(stereo, format="s16", layout="stereo")
frame.sample_rate = self.sample_rate
frame.pts = pts
frame.time_base = fractions.Fraction(time_base).limit_denominator(1000000)
self._samples_generated += self.samples_per_frame
return frame
@ -378,13 +429,20 @@ def create_synthetic_tracks(session_name: str) -> dict[str, MediaStreamTrack]:
Dictionary containing 'video' and 'audio' tracks
Note:
To change ball speed, use: tracks["video"].set_ball_speed(speed_in_mps)
where speed_in_mps is meters per second (frame width = 1 meter)
- To change ball speed, use: tracks["video"].set_ball_speed(speed_in_mps)
where speed_in_mps is meters per second (frame width = 1 meter)
- Audio generates continuous tone based on ball Y position (200-800Hz)
- Bounce events add additional audio on top of the continuous tone
"""
# Create audio track first
audio_track = SyntheticAudioTrack()
media_clock = MediaClock()
# Create video track with reference to audio track for bounce events
video_track = AnimatedVideoTrack(name=session_name, audio_track=audio_track)
# Create video track first
video_track = AnimatedVideoTrack(name=session_name, clock=media_clock)
# Create audio track with reference to video track for ball position-based frequency
audio_track = SyntheticAudioTrack(video_track=video_track, clock=media_clock)
# Set the audio track reference on the video track for bounce events
video_track.audio_track = audio_track
return {"video": video_track, "audio": audio_track}