Messing with audio

This commit is contained in:
James Ketr 2025-09-01 19:32:57 -07:00
parent fb0ce3f203
commit bf46a45f89

View File

@ -6,17 +6,36 @@ Contains AnimatedVideoTrack and SyntheticAudioTrack implementations ported from
""" """
import numpy as np import numpy as np
import math
import cv2 import cv2
import fractions import fractions
import time import time
import random import random
from typing import TypedDict from av.audio.frame import AudioFrame
from asyncio import Queue, create_task, sleep
from typing import TypedDict, TYPE_CHECKING
from aiortc import MediaStreamTrack from aiortc import MediaStreamTrack
from av import VideoFrame, AudioFrame from av import VideoFrame
from logger import logger from logger import logger
if TYPE_CHECKING:
pass
# Shared clock
from time import perf_counter
class MediaClock:
def __init__(self):
self.t0 = perf_counter()
def now(self) -> float:
return perf_counter() - self.t0
class BounceEvent(TypedDict): class BounceEvent(TypedDict):
"""Type definition for bounce events""" """Type definition for bounce events"""
type: str type: str
start_sample: int start_sample: int
end_sample: int end_sample: int
@ -35,6 +54,7 @@ class AnimatedVideoTrack(MediaStreamTrack):
def __init__( def __init__(
self, self,
clock: MediaClock,
width: int = 320, width: int = 320,
height: int = 240, height: int = 240,
name: str = "", name: str = "",
@ -44,6 +64,10 @@ class AnimatedVideoTrack(MediaStreamTrack):
self.width = width self.width = width
self.height = height self.height = height
self.name = name self.name = name
self.clock = clock
self.fps = 15
self._next_frame_index = 0
self.audio_track = audio_track # Reference to the audio track self.audio_track = audio_track # Reference to the audio track
self.remote_video_tracks: list[ self.remote_video_tracks: list[
MediaStreamTrack MediaStreamTrack
@ -73,6 +97,10 @@ class AnimatedVideoTrack(MediaStreamTrack):
self._start_time = time.time() self._start_time = time.time()
self._last_frame_time = time.time() self._last_frame_time = time.time()
self.fps = 15 # Target frames per second self.fps = 15 # Target frames per second
self._remote_latest = {} # track -> np.ndarray
self._remote_tasks: list[
tuple[MediaStreamTrack, object, Queue[np.ndarray]]
] = []
def set_ball_speed(self, speed_mps: float): def set_ball_speed(self, speed_mps: float):
"""Set the ball speed in meters per second""" """Set the ball speed in meters per second"""
@ -83,6 +111,19 @@ class AnimatedVideoTrack(MediaStreamTrack):
if track.kind == "video": if track.kind == "video":
self.remote_video_tracks.append(track) self.remote_video_tracks.append(track)
logger.info(f"Added remote video track: {track}") logger.info(f"Added remote video track: {track}")
q: Queue[np.ndarray] = Queue(maxsize=1)
async def pump():
while True:
frame = await track.recv()
if isinstance(frame, VideoFrame):
img: np.ndarray = frame.to_ndarray(format="bgr24")
if q.full():
_ = q.get_nowait()
await q.put(img)
t = create_task(pump())
self._remote_tasks.append((track, t, q))
def remove_remote_video_track(self, track: MediaStreamTrack): def remove_remote_video_track(self, track: MediaStreamTrack):
"""Remove a remote video track""" """Remove a remote video track"""
@ -90,36 +131,16 @@ class AnimatedVideoTrack(MediaStreamTrack):
self.remote_video_tracks.remove(track) self.remote_video_tracks.remove(track)
logger.info(f"Removed remote video track: {track}") logger.info(f"Removed remote video track: {track}")
def _calculate_velocity_components(self) -> tuple[float, float]: def _calculate_velocity_components(self, dt: float) -> tuple[float, float]:
""" dir_x, dir_y = self.ball["direction_x"], self.ball["direction_y"]
Calculate dx and dy velocity components based on speed in meters per second. mag = np.hypot(dir_x, dir_y)
Frame width represents 1 meter, so pixels per second = width * speed_mps if mag == 0:
"""
# Calculate actual time delta since last frame
current_time = time.time()
dt = current_time - self._last_frame_time
self._last_frame_time = current_time
# Normalize direction vector to ensure consistent speed
dir_x = self.ball["direction_x"]
dir_y = self.ball["direction_y"]
magnitude = np.sqrt(dir_x * dir_x + dir_y * dir_y)
if magnitude > 0:
dir_x_norm = dir_x / magnitude
dir_y_norm = dir_y / magnitude
else:
dir_x_norm, dir_y_norm = 1.0, 0.0 dir_x_norm, dir_y_norm = 1.0, 0.0
else:
# Convert meters per second to pixels per actual time delta dir_x_norm, dir_y_norm = dir_x / mag, dir_y / mag
pixels_per_second = self.width * self.ball["speed_mps"] pixels_per_second = self.width * self.ball["speed_mps"]
pixels_this_frame = pixels_per_second * dt pixels_this_frame = pixels_per_second * dt
return pixels_this_frame * dir_x_norm, pixels_this_frame * dir_y_norm
# Apply normalized direction to get velocity components
dx = pixels_this_frame * dir_x_norm
dy = pixels_this_frame * dir_y_norm
return dx, dy
async def next_timestamp(self): async def next_timestamp(self):
"""Returns (pts, time_base) for 15 FPS video""" """Returns (pts, time_base) for 15 FPS video"""
@ -171,42 +192,36 @@ class AnimatedVideoTrack(MediaStreamTrack):
"""Generate video frames at 15 FPS""" """Generate video frames at 15 FPS"""
pts, time_base = await self.next_timestamp() pts, time_base = await self.next_timestamp()
# Target timestamp for this frame (seconds since t0)
target_t = self._next_frame_index / self.fps
now = self.clock.now()
if target_t > now:
await sleep(target_t - now)
# Use constant dt tied to fps (prevents physics jitter)
dt = 1.0 / self.fps
dx, dy = self._calculate_velocity_components(dt)
# PTS derived from frame index, not wall clock
pts = int(self._next_frame_index * (90000 / self.fps))
time_base = 1 / 90000
self._next_frame_index += 1
# Create black background # Create black background
frame_array = np.zeros((self.height, self.width, 3), dtype=np.uint8) frame_array = np.zeros((self.height, self.width, 3), dtype=np.uint8)
# Process remote video tracks with edge detection # Process remote video tracks with edge detection
for track in self.remote_video_tracks: for _track, _task, q in self._remote_tasks:
try: try:
# Get the latest frame from the remote track (non-blocking) img: np.ndarray = q.get_nowait()
remote_frame = await track.recv() except Exception:
if remote_frame and isinstance(remote_frame, VideoFrame):
# Convert to numpy array
img: np.ndarray = remote_frame.to_ndarray(format="bgr24")
# Apply edge detection
edges = cv2.Canny(img, 100, 200)
img_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
# Resize to match our canvas size if needed
if img_edges.shape[:2] != (self.height, self.width):
img_edges = cv2.resize(img_edges, (self.width, self.height))
# Blend with existing frame (additive blend for edge detection overlay)
frame_array = cv2.addWeighted(
frame_array.astype(np.uint8),
0.7,
img_edges.astype(np.uint8),
0.3,
0,
)
except Exception as e:
# If we can't get a frame from this track, continue with others
logger.debug(f"Could not get frame from remote track: {e}")
continue continue
edges = cv2.Canny(img, 100, 200)
# Calculate velocity components based on current speed img_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
dx, dy = self._calculate_velocity_components() if img_edges.shape[:2] != (self.height, self.width):
img_edges = cv2.resize(img_edges, (self.width, self.height))
frame_array = cv2.addWeighted(frame_array, 0.7, img_edges, 0.3, 0.0)
# Update ball position # Update ball position
ball = self.ball ball = self.ball
@ -225,7 +240,7 @@ class AnimatedVideoTrack(MediaStreamTrack):
# Trigger bounce sound if a bounce occurred # Trigger bounce sound if a bounce occurred
if bounce_occurred and self.audio_track: if bounce_occurred and self.audio_track:
logger.info("Video: Bounce detected, triggering audio event") logger.info("Video: Bounce detected, triggering audio event")
self.audio_track.add_bounce_event("bounce") self.audio_track.add_bounce_event_at(self.clock.now())
# Keep ball in bounds # Keep ball in bounds
ball["x"] = max(ball["radius"], min(self.width - ball["radius"], ball["x"])) ball["x"] = max(ball["radius"], min(self.width - ball["radius"], ball["x"]))
@ -272,31 +287,61 @@ class AnimatedVideoTrack(MediaStreamTrack):
class SyntheticAudioTrack(MediaStreamTrack): class SyntheticAudioTrack(MediaStreamTrack):
"""
Synthetic audio track that generates continuous tones based on ball position
and additional bounce sound effects.
The frequency of the continuous tone is mapped to the ball's Y position:
- Top of screen (Y=0): 800Hz (high pitch)
- Bottom of screen (Y=height): 200Hz (low pitch)
Bounce events add temporary audio effects on top of the continuous tone.
"""
kind = "audio" kind = "audio"
def __init__(self): def __init__(
self, clock: MediaClock, video_track: "AnimatedVideoTrack | None" = None
):
super().__init__() super().__init__()
self.sample_rate = 48000 self.sample_rate = 48000
self.samples_per_frame = 960 self.samples_per_frame = 960
self._samples_generated = 0 self._samples_generated = 0
self._active_bounces: list[BounceEvent] = [] # List of active bounce events self._active_bounces: list[BounceEvent] = [] # List of active bounce events
self.video_track = video_track # Reference to video track for ball position
self.clock = clock
def add_bounce_event(self, bounce_type: str = "bounce"): def add_bounce_event_at(self, bounce_time_s: float):
"""Add a bounce event""" start_sample = int(bounce_time_s * self.sample_rate)
bounce_duration_samples = int(0.2 * self.sample_rate) # 200ms duration = int(0.2 * self.sample_rate)
self._active_bounces.append(
# Add new bounce to the list (they can overlap) {
bounce_event: BounceEvent = { "type": "bounce",
"start_sample": self._samples_generated, "start_sample": start_sample,
"end_sample": self._samples_generated + bounce_duration_samples, "end_sample": start_sample + duration,
"type": bounce_type, }
}
self._active_bounces.append(bounce_event)
logger.info(
f"Bounce event added - start: {bounce_event['start_sample']}, end: {bounce_event['end_sample']}"
) )
def _get_ball_frequency(self) -> float:
"""Get the current frequency based on ball Y position"""
if not self.video_track:
return 440.0 # Default frequency if no video track
# Map ball Y position to frequency range (200Hz to 800Hz)
ball_y = self.video_track.ball["y"]
height = self.video_track.height
# Normalize Y position (0.0 at top, 1.0 at bottom)
normalized_y = ball_y / height
# Map to frequency range (higher pitch for higher position, lower for lower)
# Invert so top = high frequency, bottom = low frequency
freq_min = 200.0
freq_max = 400.0
frequency = freq_max - (normalized_y * (freq_max - freq_min))
return frequency
def _generate_bounce_sample(self, t: float) -> float: def _generate_bounce_sample(self, t: float) -> float:
"""Generate a single bounce sample at time t""" """Generate a single bounce sample at time t"""
if t < 0 or t > 0.2: if t < 0 or t > 0.2:
@ -318,52 +363,58 @@ class SyntheticAudioTrack(MediaStreamTrack):
async def recv(self): async def recv(self):
pts, time_base = await self.next_timestamp() pts, time_base = await self.next_timestamp()
samples = np.zeros((self.samples_per_frame,), dtype=np.float32)
# Generate samples for this frame # --- 1. Generate base tone based on ball Y position ---
active_bounce_count = 0 if self.video_track:
for i in range(self.samples_per_frame): base_freq = self._get_ball_frequency()
current_sample = self._samples_generated + i else:
sample_value = 0.0 base_freq = 440.0 # default if no video track
# Check all active bounces for this sample t = (np.arange(self.samples_per_frame) + pts) / self.sample_rate
for bounce in self._active_bounces: samples = np.sin(2 * np.pi * base_freq * t).astype(np.float32)
if bounce["start_sample"] <= current_sample < bounce["end_sample"]:
# Calculate time within this bounce
sample_offset = current_sample - bounce["start_sample"]
t = sample_offset / self.sample_rate
# Add this bounce's contribution # --- 2. Add bounce sound effect if triggered ---
sample_value += self._generate_bounce_sample(t) if getattr(self, "just_bounced", False):
active_bounce_count += 1 logger.info("Audio: Generating bounce sound effect")
tb = np.arange(self.samples_per_frame) / self.sample_rate
bounce_freq = 600.0 # Hz
bounce_env = np.exp(-tb * 20.0) # fast exponential decay
bounce_wave = 0.4 * np.sin(2 * np.pi * bounce_freq * tb) * bounce_env
samples = samples + bounce_wave.astype(np.float32)
self.just_bounced = False
samples[i] = sample_value # --- 3. Stereo panning based on X position ---
if self.video_track:
pan = self.video_track.ball["x"] / self.video_track.width
else:
pan = 0.5 # center if no video
left_gain = math.cos(pan * math.pi / 2)
right_gain = math.sin(pan * math.pi / 2)
# Clean up expired bounces # --- 4. Volume scaling based on Y position ---
self._active_bounces: list[BounceEvent] = [ if self.video_track:
bounce volume = (1.0 - (self.video_track.ball["y"] / self.video_track.height)) ** 2
for bounce in self._active_bounces else:
if bounce["end_sample"] > self._samples_generated + self.samples_per_frame volume = 1.0
]
if active_bounce_count > 0: # --- 5. Apply gain and convert to int16 ---
logger.info( left = (samples * left_gain * volume * 32767).astype(np.int16)
f"Generated audio with {len(self._active_bounces)} active bounces" right = (samples * right_gain * volume * 32767).astype(np.int16)
)
self._samples_generated += self.samples_per_frame # --- 6. Interleave channels for s16 format (samples arranged as [L, R, L, R, ...]) ---
# Create interleaved array: [left[0], right[0], left[1], right[1], ...]
interleaved = np.empty(self.samples_per_frame * 2, dtype=np.int16)
interleaved[0::2] = left # Even indices get left channel
interleaved[1::2] = right # Odd indices get right channel
# Convert to audio frame # Reshape to (1, samples*2) as expected by s16 format
samples = np.clip(samples, -1.0, 1.0) stereo = interleaved.reshape(1, -1)
samples_s16 = (samples * 32767).astype(np.int16)
frame = AudioFrame.from_ndarray( frame = AudioFrame.from_ndarray(stereo, format="s16", layout="stereo")
samples_s16.reshape(1, -1), format="s16", layout="stereo"
)
frame.sample_rate = self.sample_rate frame.sample_rate = self.sample_rate
frame.pts = pts frame.pts = pts
frame.time_base = fractions.Fraction(time_base).limit_denominator(1000000) frame.time_base = fractions.Fraction(time_base).limit_denominator(1000000)
self._samples_generated += self.samples_per_frame
return frame return frame
@ -378,13 +429,20 @@ def create_synthetic_tracks(session_name: str) -> dict[str, MediaStreamTrack]:
Dictionary containing 'video' and 'audio' tracks Dictionary containing 'video' and 'audio' tracks
Note: Note:
To change ball speed, use: tracks["video"].set_ball_speed(speed_in_mps) - To change ball speed, use: tracks["video"].set_ball_speed(speed_in_mps)
where speed_in_mps is meters per second (frame width = 1 meter) where speed_in_mps is meters per second (frame width = 1 meter)
- Audio generates continuous tone based on ball Y position (200-800Hz)
- Bounce events add additional audio on top of the continuous tone
""" """
# Create audio track first media_clock = MediaClock()
audio_track = SyntheticAudioTrack()
# Create video track with reference to audio track for bounce events # Create video track first
video_track = AnimatedVideoTrack(name=session_name, audio_track=audio_track) video_track = AnimatedVideoTrack(name=session_name, clock=media_clock)
# Create audio track with reference to video track for ball position-based frequency
audio_track = SyntheticAudioTrack(video_track=video_track, clock=media_clock)
# Set the audio track reference on the video track for bounce events
video_track.audio_track = audio_track
return {"video": video_track, "audio": audio_track} return {"video": video_track, "audio": audio_track}