Added more effects to test things out.

2025-09-01 18:55:35 -07:00 · 2025-09-01 18:55:35 -07:00 · fb0ce3f203
commit fb0ce3f203
parent 35dd49e4ac
2 changed files with 150 additions and 66 deletions
--- a/voicebot/main.py
+++ b/voicebot/main.py
@ -58,7 +58,7 @@ from aiortc import (
    MediaStreamTrack,
 )
 from logger import logger
-from synthetic_media import create_synthetic_tracks
+from synthetic_media import create_synthetic_tracks, AnimatedVideoTrack
 # import debug_aioice
@ -1065,9 +1065,29 @@ async def main():
    async def on_peer_removed(peer: Peer):
        print(f"Peer removed: {peer.peer_name}")
        # Remove any video tracks from this peer from our synthetic video track
        if "video" in client.local_tracks:
            synthetic_video_track = client.local_tracks["video"]
            if isinstance(synthetic_video_track, AnimatedVideoTrack):
                # We need to identify and remove tracks from this specific peer
                # Since we don't have a direct mapping, we'll need to track this differently
                # For now, this is a placeholder - we might need to enhance the peer tracking
                logger.info(
                    f"Peer {peer.peer_name} removed - may need to clean up video tracks"
                )
    async def on_track_received(peer: Peer, track: MediaStreamTrack):
        print(f"Received {track.kind} track from {peer.peer_name}")
        # If it's a video track, attach it to our synthetic video track for edge detection
        if track.kind == "video" and "video" in client.local_tracks:
            synthetic_video_track = client.local_tracks["video"]
            if isinstance(synthetic_video_track, AnimatedVideoTrack):
                synthetic_video_track.add_remote_video_track(track)
                logger.info(
                    f"Attached remote video track from {peer.peer_name} to synthetic video track"
                )
    client.on_peer_added = on_peer_added
    client.on_peer_removed = on_peer_removed
    client.on_track_received = on_track_received
--- a/voicebot/synthetic_media.py
+++ b/voicebot/synthetic_media.py
@ -9,23 +9,26 @@ import numpy as np
 import cv2
 import fractions
 import time
 import random
 from typing import TypedDict
 from aiortc import MediaStreamTrack
 from av import VideoFrame, AudioFrame
-
+from logger import logger
 class BounceEvent(TypedDict):
    """Type definition for bounce events"""
    type: str
-    start_time: float
+    start_sample: int
-    end_time: float
+    end_sample: int
 class AnimatedVideoTrack(MediaStreamTrack):
    """
    Synthetic video track that generates animated content with a bouncing ball.
-    Ported from JavaScript createAnimatedVideoTrack function.
+
    Can also composite remote video tracks with edge detection overlay.
    Remote video tracks are processed through Canny edge detection and blended
    with the synthetic ball animation.
    """
    kind = "video"
@ -42,6 +45,9 @@ class AnimatedVideoTrack(MediaStreamTrack):
        self.height = height
        self.name = name
        self.audio_track = audio_track  # Reference to the audio track
        self.remote_video_tracks: list[
            MediaStreamTrack
        ] = []  # Store remote video tracks
        # Generate color from name hash (similar to JavaScript nameToColor)
        self.ball_color = (
@ -49,13 +55,18 @@ class AnimatedVideoTrack(MediaStreamTrack):
        )  # Default green
        # Ball properties
        ball_radius = min(width, height) * 0.06
        self.ball = {
-            "x": width / 2,
+            "x": random.uniform(ball_radius, width - ball_radius),
-            "y": height / 2,
+            "y": random.uniform(ball_radius, height - ball_radius),
-            "radius": min(width, height) * 0.06,
+            "radius": ball_radius,
            "speed_mps": 0.5,  # Speed in meters per second (frame width = 1 meter)
-            "direction_x": 1.0,  # Direction vector x component (-1 to 1)
+            "direction_x": random.uniform(
-            "direction_y": 0.6,  # Direction vector y component (-1 to 1)
+                -1.0, 1.0
            ),  # Random direction x component (-1 to 1)
            "direction_y": random.uniform(
                -1.0, 1.0
            ),  # Random direction y component (-1 to 1)
        }
        self.frame_count = 0
@ -67,6 +78,18 @@ class AnimatedVideoTrack(MediaStreamTrack):
        """Set the ball speed in meters per second"""
        self.ball["speed_mps"] = speed_mps
    def add_remote_video_track(self, track: MediaStreamTrack):
        """Add a remote video track to be composited with edge detection"""
        if track.kind == "video":
            self.remote_video_tracks.append(track)
            logger.info(f"Added remote video track: {track}")
    def remove_remote_video_track(self, track: MediaStreamTrack):
        """Remove a remote video track"""
        if track in self.remote_video_tracks:
            self.remote_video_tracks.remove(track)
            logger.info(f"Removed remote video track: {track}")
    def _calculate_velocity_components(self) -> tuple[float, float]:
        """
        Calculate dx and dy velocity components based on speed in meters per second.
@ -151,6 +174,37 @@ class AnimatedVideoTrack(MediaStreamTrack):
        # Create black background
        frame_array = np.zeros((self.height, self.width, 3), dtype=np.uint8)
        # Process remote video tracks with edge detection
        for track in self.remote_video_tracks:
            try:
                # Get the latest frame from the remote track (non-blocking)
                remote_frame = await track.recv()
                if remote_frame and isinstance(remote_frame, VideoFrame):
                    # Convert to numpy array
                    img: np.ndarray = remote_frame.to_ndarray(format="bgr24")
                    # Apply edge detection
                    edges = cv2.Canny(img, 100, 200)
                    img_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
                    # Resize to match our canvas size if needed
                    if img_edges.shape[:2] != (self.height, self.width):
                        img_edges = cv2.resize(img_edges, (self.width, self.height))
                    # Blend with existing frame (additive blend for edge detection overlay)
                    frame_array = cv2.addWeighted(
                        frame_array.astype(np.uint8),
                        0.7,
                        img_edges.astype(np.uint8),
                        0.3,
                        0,
                    )
            except Exception as e:
                # If we can't get a frame from this track, continue with others
                logger.debug(f"Could not get frame from remote track: {e}")
                continue
        # Calculate velocity components based on current speed
        dx, dy = self._calculate_velocity_components()
@ -170,6 +224,7 @@ class AnimatedVideoTrack(MediaStreamTrack):
        # Trigger bounce sound if a bounce occurred
        if bounce_occurred and self.audio_track:
            logger.info("Video: Bounce detected, triggering audio event")
            self.audio_track.add_bounce_event("bounce")
        # Keep ball in bounds
@ -208,7 +263,7 @@ class AnimatedVideoTrack(MediaStreamTrack):
        )
        # Convert to VideoFrame
-        frame = VideoFrame.from_ndarray(frame_array, format="bgr24")
+        frame = VideoFrame.from_ndarray(frame_array.astype(np.uint8), format="bgr24")
        frame.pts = pts
        frame.time_base = fractions.Fraction(time_base).limit_denominator(1000000)
@ -217,84 +272,93 @@ class AnimatedVideoTrack(MediaStreamTrack):
 class SyntheticAudioTrack(MediaStreamTrack):
    """
    Synthetic audio track that generates audio including bounce sounds.
    Originally a silent audio track, now enhanced to generate synthetic audio effects.
    """
    kind = "audio"
    def __init__(self):
        super().__init__()
        self.sample_rate = 48000
-        self.samples_per_frame = 960  # 20ms at 48kHz
+        self.samples_per_frame = 960
-        self.bounce_queue: list[BounceEvent] = []  # Queue of bounce events to process
+        self._samples_generated = 0
-        self.bounce_duration = 0.1  # 100ms bounce sound duration
+        self._active_bounces: list[BounceEvent] = []  # List of active bounce events
        self.bounce_amplitude = 0.3  # Amplitude of bounce sound
    def add_bounce_event(self, bounce_type: str = "bounce"):
-        """Add a bounce event to the audio queue"""
+        """Add a bounce event"""
-        current_time = time.time()
+        bounce_duration_samples = int(0.2 * self.sample_rate)  # 200ms
-        self.bounce_queue.append(
+
-            {
+        # Add new bounce to the list (they can overlap)
-                "type": bounce_type,
+        bounce_event: BounceEvent = {
-                "start_time": current_time,
+            "start_sample": self._samples_generated,
-                "end_time": current_time + self.bounce_duration,
+            "end_sample": self._samples_generated + bounce_duration_samples,
-            }
+            "type": bounce_type,
        }
        self._active_bounces.append(bounce_event)
        logger.info(
            f"Bounce event added - start: {bounce_event['start_sample']}, end: {bounce_event['end_sample']}"
        )
-    def _generate_bounce_sound(self, t: float) -> float:
+    def _generate_bounce_sample(self, t: float) -> float:
-        """Generate a simple bounce sound using a decaying sine wave"""
+        """Generate a single bounce sample at time t"""
-        # Simple bounce sound: combination of two frequencies with decay
+        if t < 0 or t > 0.2:
-        freq1 = 800  # Primary frequency
+            return 0.0
        freq2 = 1200  # Secondary frequency
        decay = np.exp(-t * 10)  # Exponential decay
-        sound = (
+        # Simple decay envelope
-            np.sin(2 * np.pi * freq1 * t) * 0.7 + np.sin(2 * np.pi * freq2 * t) * 0.3
+        decay = np.exp(-t * 10)
        ) * decay
        return sound * self.bounce_amplitude
-    async def next_timestamp(self):
+        # Clear, simple tone
-        """Returns (pts, time_base) for 20ms audio frames at 48kHz"""
+        freq = 400
-        pts = int(time.time() * self.sample_rate)
+        sound = np.sin(2 * np.pi * freq * t) * decay
        return sound * 0.9
    async def next_timestamp(self) -> tuple[int, float]:
        pts = self._samples_generated
        time_base = 1 / self.sample_rate
        return pts, time_base
    async def recv(self):
        """Generate audio frames with bounce sounds"""
        pts, time_base = await self.next_timestamp()
        current_time = time.time()
        # Create audio data
        samples = np.zeros((self.samples_per_frame,), dtype=np.float32)
-        # Check for active bounce events and generate sounds
+        # Generate samples for this frame
-        active_bounces: list[BounceEvent] = []
+        active_bounce_count = 0
-        for bounce in self.bounce_queue:
+        for i in range(self.samples_per_frame):
-            if current_time < bounce["end_time"]:
+            current_sample = self._samples_generated + i
-                # Calculate time within the bounce sound
+            sample_value = 0.0
                t = current_time - bounce["start_time"]
                if t >= 0:
                    # Generate bounce sound for this time frame
                    for i in range(self.samples_per_frame):
                        sample_time = t + (i / self.sample_rate)
                        if sample_time <= self.bounce_duration:
                            samples[i] += self._generate_bounce_sound(sample_time)
                active_bounces.append(bounce)
-        # Keep only active bounces
+            # Check all active bounces for this sample
-        self.bounce_queue = active_bounces
+            for bounce in self._active_bounces:
                if bounce["start_sample"] <= current_sample < bounce["end_sample"]:
                    # Calculate time within this bounce
                    sample_offset = current_sample - bounce["start_sample"]
                    t = sample_offset / self.sample_rate
-        # Clamp samples to prevent distortion
+                    # Add this bounce's contribution
                    sample_value += self._generate_bounce_sample(t)
                    active_bounce_count += 1
            samples[i] = sample_value
        # Clean up expired bounces
        self._active_bounces: list[BounceEvent] = [
            bounce
            for bounce in self._active_bounces
            if bounce["end_sample"] > self._samples_generated + self.samples_per_frame
        ]
        if active_bounce_count > 0:
            logger.info(
                f"Generated audio with {len(self._active_bounces)} active bounces"
            )
        self._samples_generated += self.samples_per_frame
        # Convert to audio frame
        samples = np.clip(samples, -1.0, 1.0)
        # Convert to s16 format (required by Opus encoder)
        samples_s16 = (samples * 32767).astype(np.int16)
        # Convert to AudioFrame
        frame = AudioFrame.from_ndarray(
-            samples_s16.reshape(1, -1), format="s16", layout="mono"
+            samples_s16.reshape(1, -1), format="s16", layout="stereo"
        )
        frame.sample_rate = self.sample_rate
        frame.pts = pts