391 lines
13 KiB
Python
391 lines
13 KiB
Python
"""
|
|
Synthetic Media Tracks Module
|
|
|
|
This module provides synthetic audio and video track creation for WebRTC media streaming.
|
|
Contains AnimatedVideoTrack and SyntheticAudioTrack implementations ported from JavaScript.
|
|
"""
|
|
|
|
import numpy as np
|
|
import cv2
|
|
import fractions
|
|
import time
|
|
import random
|
|
from typing import TypedDict
|
|
from aiortc import MediaStreamTrack
|
|
from av import VideoFrame, AudioFrame
|
|
from logger import logger
|
|
|
|
class BounceEvent(TypedDict):
|
|
"""Type definition for bounce events"""
|
|
type: str
|
|
start_sample: int
|
|
end_sample: int
|
|
|
|
|
|
class AnimatedVideoTrack(MediaStreamTrack):
|
|
"""
|
|
Synthetic video track that generates animated content with a bouncing ball.
|
|
|
|
Can also composite remote video tracks with edge detection overlay.
|
|
Remote video tracks are processed through Canny edge detection and blended
|
|
with the synthetic ball animation.
|
|
"""
|
|
|
|
kind = "video"
|
|
|
|
def __init__(
|
|
self,
|
|
width: int = 320,
|
|
height: int = 240,
|
|
name: str = "",
|
|
audio_track: "SyntheticAudioTrack | None" = None,
|
|
):
|
|
super().__init__()
|
|
self.width = width
|
|
self.height = height
|
|
self.name = name
|
|
self.audio_track = audio_track # Reference to the audio track
|
|
self.remote_video_tracks: list[
|
|
MediaStreamTrack
|
|
] = [] # Store remote video tracks
|
|
|
|
# Generate color from name hash (similar to JavaScript nameToColor)
|
|
self.ball_color = (
|
|
self._name_to_color(name) if name else (0, 255, 136)
|
|
) # Default green
|
|
|
|
# Ball properties
|
|
ball_radius = min(width, height) * 0.06
|
|
self.ball = {
|
|
"x": random.uniform(ball_radius, width - ball_radius),
|
|
"y": random.uniform(ball_radius, height - ball_radius),
|
|
"radius": ball_radius,
|
|
"speed_mps": 0.5, # Speed in meters per second (frame width = 1 meter)
|
|
"direction_x": random.uniform(
|
|
-1.0, 1.0
|
|
), # Random direction x component (-1 to 1)
|
|
"direction_y": random.uniform(
|
|
-1.0, 1.0
|
|
), # Random direction y component (-1 to 1)
|
|
}
|
|
|
|
self.frame_count = 0
|
|
self._start_time = time.time()
|
|
self._last_frame_time = time.time()
|
|
self.fps = 15 # Target frames per second
|
|
|
|
def set_ball_speed(self, speed_mps: float):
|
|
"""Set the ball speed in meters per second"""
|
|
self.ball["speed_mps"] = speed_mps
|
|
|
|
def add_remote_video_track(self, track: MediaStreamTrack):
|
|
"""Add a remote video track to be composited with edge detection"""
|
|
if track.kind == "video":
|
|
self.remote_video_tracks.append(track)
|
|
logger.info(f"Added remote video track: {track}")
|
|
|
|
def remove_remote_video_track(self, track: MediaStreamTrack):
|
|
"""Remove a remote video track"""
|
|
if track in self.remote_video_tracks:
|
|
self.remote_video_tracks.remove(track)
|
|
logger.info(f"Removed remote video track: {track}")
|
|
|
|
def _calculate_velocity_components(self) -> tuple[float, float]:
|
|
"""
|
|
Calculate dx and dy velocity components based on speed in meters per second.
|
|
Frame width represents 1 meter, so pixels per second = width * speed_mps
|
|
"""
|
|
# Calculate actual time delta since last frame
|
|
current_time = time.time()
|
|
dt = current_time - self._last_frame_time
|
|
self._last_frame_time = current_time
|
|
|
|
# Normalize direction vector to ensure consistent speed
|
|
dir_x = self.ball["direction_x"]
|
|
dir_y = self.ball["direction_y"]
|
|
magnitude = np.sqrt(dir_x * dir_x + dir_y * dir_y)
|
|
|
|
if magnitude > 0:
|
|
dir_x_norm = dir_x / magnitude
|
|
dir_y_norm = dir_y / magnitude
|
|
else:
|
|
dir_x_norm, dir_y_norm = 1.0, 0.0
|
|
|
|
# Convert meters per second to pixels per actual time delta
|
|
pixels_per_second = self.width * self.ball["speed_mps"]
|
|
pixels_this_frame = pixels_per_second * dt
|
|
|
|
# Apply normalized direction to get velocity components
|
|
dx = pixels_this_frame * dir_x_norm
|
|
dy = pixels_this_frame * dir_y_norm
|
|
|
|
return dx, dy
|
|
|
|
async def next_timestamp(self):
|
|
"""Returns (pts, time_base) for 15 FPS video"""
|
|
pts = int(self.frame_count * (1 / 15) * 90000)
|
|
time_base = 1 / 90000
|
|
return pts, time_base
|
|
|
|
def _name_to_color(self, name: str) -> tuple[int, int, int]:
|
|
"""Convert name to HSL color, then to RGB tuple"""
|
|
# Simple hash function (djb2)
|
|
hash_value = 5381
|
|
for char in name:
|
|
hash_value = ((hash_value << 5) + hash_value + ord(char)) & 0xFFFFFFFF
|
|
|
|
# Generate HSL color from hash
|
|
hue = abs(hash_value) % 360
|
|
sat = 60 + (abs(hash_value) % 30) # 60-89%
|
|
light = 45 + (abs(hash_value) % 30) # 45-74%
|
|
|
|
# Convert HSL to RGB
|
|
h = hue / 360.0
|
|
s = sat / 100.0
|
|
lightness = light / 100.0
|
|
|
|
c = (1 - abs(2 * lightness - 1)) * s
|
|
x = c * (1 - abs((h * 6) % 2 - 1))
|
|
m = lightness - c / 2
|
|
|
|
if h < 1 / 6:
|
|
r, g, b = c, x, 0
|
|
elif h < 2 / 6:
|
|
r, g, b = x, c, 0
|
|
elif h < 3 / 6:
|
|
r, g, b = 0, c, x
|
|
elif h < 4 / 6:
|
|
r, g, b = 0, x, c
|
|
elif h < 5 / 6:
|
|
r, g, b = x, 0, c
|
|
else:
|
|
r, g, b = c, 0, x
|
|
|
|
return (
|
|
int((b + m) * 255),
|
|
int((g + m) * 255),
|
|
int((r + m) * 255),
|
|
) # BGR for OpenCV
|
|
|
|
async def recv(self):
|
|
"""Generate video frames at 15 FPS"""
|
|
pts, time_base = await self.next_timestamp()
|
|
|
|
# Create black background
|
|
frame_array = np.zeros((self.height, self.width, 3), dtype=np.uint8)
|
|
|
|
# Process remote video tracks with edge detection
|
|
for track in self.remote_video_tracks:
|
|
try:
|
|
# Get the latest frame from the remote track (non-blocking)
|
|
remote_frame = await track.recv()
|
|
if remote_frame and isinstance(remote_frame, VideoFrame):
|
|
# Convert to numpy array
|
|
img: np.ndarray = remote_frame.to_ndarray(format="bgr24")
|
|
|
|
# Apply edge detection
|
|
edges = cv2.Canny(img, 100, 200)
|
|
img_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
|
|
|
|
# Resize to match our canvas size if needed
|
|
if img_edges.shape[:2] != (self.height, self.width):
|
|
img_edges = cv2.resize(img_edges, (self.width, self.height))
|
|
|
|
# Blend with existing frame (additive blend for edge detection overlay)
|
|
frame_array = cv2.addWeighted(
|
|
frame_array.astype(np.uint8),
|
|
0.7,
|
|
img_edges.astype(np.uint8),
|
|
0.3,
|
|
0,
|
|
)
|
|
|
|
except Exception as e:
|
|
# If we can't get a frame from this track, continue with others
|
|
logger.debug(f"Could not get frame from remote track: {e}")
|
|
continue
|
|
|
|
# Calculate velocity components based on current speed
|
|
dx, dy = self._calculate_velocity_components()
|
|
|
|
# Update ball position
|
|
ball = self.ball
|
|
ball["x"] += dx
|
|
ball["y"] += dy
|
|
|
|
# Bounce off walls and trigger audio events
|
|
bounce_occurred = False
|
|
if ball["x"] + ball["radius"] >= self.width or ball["x"] - ball["radius"] <= 0:
|
|
ball["direction_x"] = -ball["direction_x"]
|
|
bounce_occurred = True
|
|
if ball["y"] + ball["radius"] >= self.height or ball["y"] - ball["radius"] <= 0:
|
|
ball["direction_y"] = -ball["direction_y"]
|
|
bounce_occurred = True
|
|
|
|
# Trigger bounce sound if a bounce occurred
|
|
if bounce_occurred and self.audio_track:
|
|
logger.info("Video: Bounce detected, triggering audio event")
|
|
self.audio_track.add_bounce_event("bounce")
|
|
|
|
# Keep ball in bounds
|
|
ball["x"] = max(ball["radius"], min(self.width - ball["radius"], ball["x"]))
|
|
ball["y"] = max(ball["radius"], min(self.height - ball["radius"], ball["y"]))
|
|
|
|
# Draw ball
|
|
cv2.circle(
|
|
frame_array,
|
|
(int(ball["x"]), int(ball["y"])),
|
|
int(ball["radius"]),
|
|
self.ball_color,
|
|
-1,
|
|
)
|
|
|
|
# Add frame counter and speed text
|
|
frame_text = f"Frame: {int(time.time() * 1000) % 10000}"
|
|
speed_text = f"Speed: {ball['speed_mps']:.2f} m/s"
|
|
cv2.putText(
|
|
frame_array,
|
|
frame_text,
|
|
(10, 20),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
0.5,
|
|
(255, 255, 255),
|
|
1,
|
|
)
|
|
cv2.putText(
|
|
frame_array,
|
|
speed_text,
|
|
(10, 40),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
0.5,
|
|
(255, 255, 255),
|
|
1,
|
|
)
|
|
|
|
# Convert to VideoFrame
|
|
frame = VideoFrame.from_ndarray(frame_array.astype(np.uint8), format="bgr24")
|
|
frame.pts = pts
|
|
frame.time_base = fractions.Fraction(time_base).limit_denominator(1000000)
|
|
|
|
self.frame_count += 1
|
|
return frame
|
|
|
|
|
|
class SyntheticAudioTrack(MediaStreamTrack):
|
|
kind = "audio"
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.sample_rate = 48000
|
|
self.samples_per_frame = 960
|
|
self._samples_generated = 0
|
|
self._active_bounces: list[BounceEvent] = [] # List of active bounce events
|
|
|
|
def add_bounce_event(self, bounce_type: str = "bounce"):
|
|
"""Add a bounce event"""
|
|
bounce_duration_samples = int(0.2 * self.sample_rate) # 200ms
|
|
|
|
# Add new bounce to the list (they can overlap)
|
|
bounce_event: BounceEvent = {
|
|
"start_sample": self._samples_generated,
|
|
"end_sample": self._samples_generated + bounce_duration_samples,
|
|
"type": bounce_type,
|
|
}
|
|
|
|
self._active_bounces.append(bounce_event)
|
|
logger.info(
|
|
f"Bounce event added - start: {bounce_event['start_sample']}, end: {bounce_event['end_sample']}"
|
|
)
|
|
|
|
def _generate_bounce_sample(self, t: float) -> float:
|
|
"""Generate a single bounce sample at time t"""
|
|
if t < 0 or t > 0.2:
|
|
return 0.0
|
|
|
|
# Simple decay envelope
|
|
decay = np.exp(-t * 10)
|
|
|
|
# Clear, simple tone
|
|
freq = 400
|
|
sound = np.sin(2 * np.pi * freq * t) * decay
|
|
|
|
return sound * 0.9
|
|
|
|
async def next_timestamp(self) -> tuple[int, float]:
|
|
pts = self._samples_generated
|
|
time_base = 1 / self.sample_rate
|
|
return pts, time_base
|
|
|
|
async def recv(self):
|
|
pts, time_base = await self.next_timestamp()
|
|
samples = np.zeros((self.samples_per_frame,), dtype=np.float32)
|
|
|
|
# Generate samples for this frame
|
|
active_bounce_count = 0
|
|
for i in range(self.samples_per_frame):
|
|
current_sample = self._samples_generated + i
|
|
sample_value = 0.0
|
|
|
|
# Check all active bounces for this sample
|
|
for bounce in self._active_bounces:
|
|
if bounce["start_sample"] <= current_sample < bounce["end_sample"]:
|
|
# Calculate time within this bounce
|
|
sample_offset = current_sample - bounce["start_sample"]
|
|
t = sample_offset / self.sample_rate
|
|
|
|
# Add this bounce's contribution
|
|
sample_value += self._generate_bounce_sample(t)
|
|
active_bounce_count += 1
|
|
|
|
samples[i] = sample_value
|
|
|
|
# Clean up expired bounces
|
|
self._active_bounces: list[BounceEvent] = [
|
|
bounce
|
|
for bounce in self._active_bounces
|
|
if bounce["end_sample"] > self._samples_generated + self.samples_per_frame
|
|
]
|
|
|
|
if active_bounce_count > 0:
|
|
logger.info(
|
|
f"Generated audio with {len(self._active_bounces)} active bounces"
|
|
)
|
|
|
|
self._samples_generated += self.samples_per_frame
|
|
|
|
# Convert to audio frame
|
|
samples = np.clip(samples, -1.0, 1.0)
|
|
samples_s16 = (samples * 32767).astype(np.int16)
|
|
|
|
frame = AudioFrame.from_ndarray(
|
|
samples_s16.reshape(1, -1), format="s16", layout="stereo"
|
|
)
|
|
frame.sample_rate = self.sample_rate
|
|
frame.pts = pts
|
|
frame.time_base = fractions.Fraction(time_base).limit_denominator(1000000)
|
|
|
|
return frame
|
|
|
|
|
|
def create_synthetic_tracks(session_name: str) -> dict[str, MediaStreamTrack]:
|
|
"""
|
|
Create synthetic audio and video tracks for WebRTC streaming.
|
|
|
|
Args:
|
|
session_name: Name to use for generating video track colors
|
|
|
|
Returns:
|
|
Dictionary containing 'video' and 'audio' tracks
|
|
|
|
Note:
|
|
To change ball speed, use: tracks["video"].set_ball_speed(speed_in_mps)
|
|
where speed_in_mps is meters per second (frame width = 1 meter)
|
|
"""
|
|
# Create audio track first
|
|
audio_track = SyntheticAudioTrack()
|
|
|
|
# Create video track with reference to audio track for bounce events
|
|
video_track = AnimatedVideoTrack(name=session_name, audio_track=audio_track)
|
|
|
|
return {"video": video_track, "audio": audio_track}
|