From fb0ce3f20379a5c683a4f249038d518ad55537bf Mon Sep 17 00:00:00 2001 From: James Ketrenos Date: Mon, 1 Sep 2025 18:55:35 -0700 Subject: [PATCH] Added more effects to test things out. --- voicebot/main.py | 22 +++- voicebot/synthetic_media.py | 194 ++++++++++++++++++++++++------------ 2 files changed, 150 insertions(+), 66 deletions(-) diff --git a/voicebot/main.py b/voicebot/main.py index 710e88a..04a4b2d 100644 --- a/voicebot/main.py +++ b/voicebot/main.py @@ -58,7 +58,7 @@ from aiortc import ( MediaStreamTrack, ) from logger import logger -from synthetic_media import create_synthetic_tracks +from synthetic_media import create_synthetic_tracks, AnimatedVideoTrack # import debug_aioice @@ -1065,9 +1065,29 @@ async def main(): async def on_peer_removed(peer: Peer): print(f"Peer removed: {peer.peer_name}") + # Remove any video tracks from this peer from our synthetic video track + if "video" in client.local_tracks: + synthetic_video_track = client.local_tracks["video"] + if isinstance(synthetic_video_track, AnimatedVideoTrack): + # We need to identify and remove tracks from this specific peer + # Since we don't have a direct mapping, we'll need to track this differently + # For now, this is a placeholder - we might need to enhance the peer tracking + logger.info( + f"Peer {peer.peer_name} removed - may need to clean up video tracks" + ) + async def on_track_received(peer: Peer, track: MediaStreamTrack): print(f"Received {track.kind} track from {peer.peer_name}") + # If it's a video track, attach it to our synthetic video track for edge detection + if track.kind == "video" and "video" in client.local_tracks: + synthetic_video_track = client.local_tracks["video"] + if isinstance(synthetic_video_track, AnimatedVideoTrack): + synthetic_video_track.add_remote_video_track(track) + logger.info( + f"Attached remote video track from {peer.peer_name} to synthetic video track" + ) + client.on_peer_added = on_peer_added client.on_peer_removed = on_peer_removed client.on_track_received = on_track_received diff --git a/voicebot/synthetic_media.py b/voicebot/synthetic_media.py index 415b7a4..e6c938e 100644 --- a/voicebot/synthetic_media.py +++ b/voicebot/synthetic_media.py @@ -9,23 +9,26 @@ import numpy as np import cv2 import fractions import time +import random from typing import TypedDict from aiortc import MediaStreamTrack from av import VideoFrame, AudioFrame - +from logger import logger class BounceEvent(TypedDict): """Type definition for bounce events""" - type: str - start_time: float - end_time: float + start_sample: int + end_sample: int class AnimatedVideoTrack(MediaStreamTrack): """ Synthetic video track that generates animated content with a bouncing ball. - Ported from JavaScript createAnimatedVideoTrack function. + + Can also composite remote video tracks with edge detection overlay. + Remote video tracks are processed through Canny edge detection and blended + with the synthetic ball animation. """ kind = "video" @@ -42,6 +45,9 @@ class AnimatedVideoTrack(MediaStreamTrack): self.height = height self.name = name self.audio_track = audio_track # Reference to the audio track + self.remote_video_tracks: list[ + MediaStreamTrack + ] = [] # Store remote video tracks # Generate color from name hash (similar to JavaScript nameToColor) self.ball_color = ( @@ -49,13 +55,18 @@ class AnimatedVideoTrack(MediaStreamTrack): ) # Default green # Ball properties + ball_radius = min(width, height) * 0.06 self.ball = { - "x": width / 2, - "y": height / 2, - "radius": min(width, height) * 0.06, + "x": random.uniform(ball_radius, width - ball_radius), + "y": random.uniform(ball_radius, height - ball_radius), + "radius": ball_radius, "speed_mps": 0.5, # Speed in meters per second (frame width = 1 meter) - "direction_x": 1.0, # Direction vector x component (-1 to 1) - "direction_y": 0.6, # Direction vector y component (-1 to 1) + "direction_x": random.uniform( + -1.0, 1.0 + ), # Random direction x component (-1 to 1) + "direction_y": random.uniform( + -1.0, 1.0 + ), # Random direction y component (-1 to 1) } self.frame_count = 0 @@ -67,6 +78,18 @@ class AnimatedVideoTrack(MediaStreamTrack): """Set the ball speed in meters per second""" self.ball["speed_mps"] = speed_mps + def add_remote_video_track(self, track: MediaStreamTrack): + """Add a remote video track to be composited with edge detection""" + if track.kind == "video": + self.remote_video_tracks.append(track) + logger.info(f"Added remote video track: {track}") + + def remove_remote_video_track(self, track: MediaStreamTrack): + """Remove a remote video track""" + if track in self.remote_video_tracks: + self.remote_video_tracks.remove(track) + logger.info(f"Removed remote video track: {track}") + def _calculate_velocity_components(self) -> tuple[float, float]: """ Calculate dx and dy velocity components based on speed in meters per second. @@ -151,6 +174,37 @@ class AnimatedVideoTrack(MediaStreamTrack): # Create black background frame_array = np.zeros((self.height, self.width, 3), dtype=np.uint8) + # Process remote video tracks with edge detection + for track in self.remote_video_tracks: + try: + # Get the latest frame from the remote track (non-blocking) + remote_frame = await track.recv() + if remote_frame and isinstance(remote_frame, VideoFrame): + # Convert to numpy array + img: np.ndarray = remote_frame.to_ndarray(format="bgr24") + + # Apply edge detection + edges = cv2.Canny(img, 100, 200) + img_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR) + + # Resize to match our canvas size if needed + if img_edges.shape[:2] != (self.height, self.width): + img_edges = cv2.resize(img_edges, (self.width, self.height)) + + # Blend with existing frame (additive blend for edge detection overlay) + frame_array = cv2.addWeighted( + frame_array.astype(np.uint8), + 0.7, + img_edges.astype(np.uint8), + 0.3, + 0, + ) + + except Exception as e: + # If we can't get a frame from this track, continue with others + logger.debug(f"Could not get frame from remote track: {e}") + continue + # Calculate velocity components based on current speed dx, dy = self._calculate_velocity_components() @@ -170,6 +224,7 @@ class AnimatedVideoTrack(MediaStreamTrack): # Trigger bounce sound if a bounce occurred if bounce_occurred and self.audio_track: + logger.info("Video: Bounce detected, triggering audio event") self.audio_track.add_bounce_event("bounce") # Keep ball in bounds @@ -208,7 +263,7 @@ class AnimatedVideoTrack(MediaStreamTrack): ) # Convert to VideoFrame - frame = VideoFrame.from_ndarray(frame_array, format="bgr24") + frame = VideoFrame.from_ndarray(frame_array.astype(np.uint8), format="bgr24") frame.pts = pts frame.time_base = fractions.Fraction(time_base).limit_denominator(1000000) @@ -217,84 +272,93 @@ class AnimatedVideoTrack(MediaStreamTrack): class SyntheticAudioTrack(MediaStreamTrack): - """ - Synthetic audio track that generates audio including bounce sounds. - Originally a silent audio track, now enhanced to generate synthetic audio effects. - """ - kind = "audio" def __init__(self): super().__init__() self.sample_rate = 48000 - self.samples_per_frame = 960 # 20ms at 48kHz - self.bounce_queue: list[BounceEvent] = [] # Queue of bounce events to process - self.bounce_duration = 0.1 # 100ms bounce sound duration - self.bounce_amplitude = 0.3 # Amplitude of bounce sound + self.samples_per_frame = 960 + self._samples_generated = 0 + self._active_bounces: list[BounceEvent] = [] # List of active bounce events def add_bounce_event(self, bounce_type: str = "bounce"): - """Add a bounce event to the audio queue""" - current_time = time.time() - self.bounce_queue.append( - { - "type": bounce_type, - "start_time": current_time, - "end_time": current_time + self.bounce_duration, - } + """Add a bounce event""" + bounce_duration_samples = int(0.2 * self.sample_rate) # 200ms + + # Add new bounce to the list (they can overlap) + bounce_event: BounceEvent = { + "start_sample": self._samples_generated, + "end_sample": self._samples_generated + bounce_duration_samples, + "type": bounce_type, + } + + self._active_bounces.append(bounce_event) + logger.info( + f"Bounce event added - start: {bounce_event['start_sample']}, end: {bounce_event['end_sample']}" ) - def _generate_bounce_sound(self, t: float) -> float: - """Generate a simple bounce sound using a decaying sine wave""" - # Simple bounce sound: combination of two frequencies with decay - freq1 = 800 # Primary frequency - freq2 = 1200 # Secondary frequency - decay = np.exp(-t * 10) # Exponential decay + def _generate_bounce_sample(self, t: float) -> float: + """Generate a single bounce sample at time t""" + if t < 0 or t > 0.2: + return 0.0 - sound = ( - np.sin(2 * np.pi * freq1 * t) * 0.7 + np.sin(2 * np.pi * freq2 * t) * 0.3 - ) * decay - return sound * self.bounce_amplitude + # Simple decay envelope + decay = np.exp(-t * 10) - async def next_timestamp(self): - """Returns (pts, time_base) for 20ms audio frames at 48kHz""" - pts = int(time.time() * self.sample_rate) + # Clear, simple tone + freq = 400 + sound = np.sin(2 * np.pi * freq * t) * decay + + return sound * 0.9 + + async def next_timestamp(self) -> tuple[int, float]: + pts = self._samples_generated time_base = 1 / self.sample_rate return pts, time_base async def recv(self): - """Generate audio frames with bounce sounds""" pts, time_base = await self.next_timestamp() - current_time = time.time() - - # Create audio data samples = np.zeros((self.samples_per_frame,), dtype=np.float32) - # Check for active bounce events and generate sounds - active_bounces: list[BounceEvent] = [] - for bounce in self.bounce_queue: - if current_time < bounce["end_time"]: - # Calculate time within the bounce sound - t = current_time - bounce["start_time"] - if t >= 0: - # Generate bounce sound for this time frame - for i in range(self.samples_per_frame): - sample_time = t + (i / self.sample_rate) - if sample_time <= self.bounce_duration: - samples[i] += self._generate_bounce_sound(sample_time) - active_bounces.append(bounce) + # Generate samples for this frame + active_bounce_count = 0 + for i in range(self.samples_per_frame): + current_sample = self._samples_generated + i + sample_value = 0.0 - # Keep only active bounces - self.bounce_queue = active_bounces + # Check all active bounces for this sample + for bounce in self._active_bounces: + if bounce["start_sample"] <= current_sample < bounce["end_sample"]: + # Calculate time within this bounce + sample_offset = current_sample - bounce["start_sample"] + t = sample_offset / self.sample_rate - # Clamp samples to prevent distortion + # Add this bounce's contribution + sample_value += self._generate_bounce_sample(t) + active_bounce_count += 1 + + samples[i] = sample_value + + # Clean up expired bounces + self._active_bounces: list[BounceEvent] = [ + bounce + for bounce in self._active_bounces + if bounce["end_sample"] > self._samples_generated + self.samples_per_frame + ] + + if active_bounce_count > 0: + logger.info( + f"Generated audio with {len(self._active_bounces)} active bounces" + ) + + self._samples_generated += self.samples_per_frame + + # Convert to audio frame samples = np.clip(samples, -1.0, 1.0) - - # Convert to s16 format (required by Opus encoder) samples_s16 = (samples * 32767).astype(np.int16) - # Convert to AudioFrame frame = AudioFrame.from_ndarray( - samples_s16.reshape(1, -1), format="s16", layout="mono" + samples_s16.reshape(1, -1), format="s16", layout="stereo" ) frame.sample_rate = self.sample_rate frame.pts = pts