Added more effects to test things out.

2025-09-01 18:55:35 -07:00 · 2025-09-01 18:55:35 -07:00 · fb0ce3f203
commit fb0ce3f203
parent 35dd49e4ac
2 changed files with 150 additions and 66 deletions
--- a/voicebot/main.py
+++ b/voicebot/main.py
@ -58,7 +58,7 @@ from aiortc import (
    MediaStreamTrack,
 )
 from logger import logger
-from synthetic_media import create_synthetic_tracks
+from synthetic_media import create_synthetic_tracks, AnimatedVideoTrack

 # import debug_aioice

@ -1065,9 +1065,29 @@ async def main():
    async def on_peer_removed(peer: Peer):
        print(f"Peer removed: {peer.peer_name}")

+        # Remove any video tracks from this peer from our synthetic video track
+        if "video" in client.local_tracks:
+            synthetic_video_track = client.local_tracks["video"]
+            if isinstance(synthetic_video_track, AnimatedVideoTrack):
+                # We need to identify and remove tracks from this specific peer
+                # Since we don't have a direct mapping, we'll need to track this differently
+                # For now, this is a placeholder - we might need to enhance the peer tracking
+                logger.info(
+                    f"Peer {peer.peer_name} removed - may need to clean up video tracks"
+                )
+
    async def on_track_received(peer: Peer, track: MediaStreamTrack):
        print(f"Received {track.kind} track from {peer.peer_name}")

+        # If it's a video track, attach it to our synthetic video track for edge detection
+        if track.kind == "video" and "video" in client.local_tracks:
+            synthetic_video_track = client.local_tracks["video"]
+            if isinstance(synthetic_video_track, AnimatedVideoTrack):
+                synthetic_video_track.add_remote_video_track(track)
+                logger.info(
+                    f"Attached remote video track from {peer.peer_name} to synthetic video track"
+                )
+
    client.on_peer_added = on_peer_added
    client.on_peer_removed = on_peer_removed
    client.on_track_received = on_track_received
--- a/voicebot/synthetic_media.py
+++ b/voicebot/synthetic_media.py
@ -9,23 +9,26 @@ import numpy as np
 import cv2
 import fractions
 import time
+import random
 from typing import TypedDict
 from aiortc import MediaStreamTrack
 from av import VideoFrame, AudioFrame
-
+from logger import logger

 class BounceEvent(TypedDict):
    """Type definition for bounce events"""
-
    type: str
-    start_time: float
-    end_time: float
+    start_sample: int
+    end_sample: int


 class AnimatedVideoTrack(MediaStreamTrack):
    """
    Synthetic video track that generates animated content with a bouncing ball.
-    Ported from JavaScript createAnimatedVideoTrack function.
+
+    Can also composite remote video tracks with edge detection overlay.
+    Remote video tracks are processed through Canny edge detection and blended
+    with the synthetic ball animation.
    """

    kind = "video"
@ -42,6 +45,9 @@ class AnimatedVideoTrack(MediaStreamTrack):
        self.height = height
        self.name = name
        self.audio_track = audio_track  # Reference to the audio track
+        self.remote_video_tracks: list[
+            MediaStreamTrack
+        ] = []  # Store remote video tracks

        # Generate color from name hash (similar to JavaScript nameToColor)
        self.ball_color = (
@ -49,13 +55,18 @@ class AnimatedVideoTrack(MediaStreamTrack):
        )  # Default green

        # Ball properties
+        ball_radius = min(width, height) * 0.06
        self.ball = {
-            "x": width / 2,
-            "y": height / 2,
-            "radius": min(width, height) * 0.06,
+            "x": random.uniform(ball_radius, width - ball_radius),
+            "y": random.uniform(ball_radius, height - ball_radius),
+            "radius": ball_radius,
            "speed_mps": 0.5,  # Speed in meters per second (frame width = 1 meter)
-            "direction_x": 1.0,  # Direction vector x component (-1 to 1)
-            "direction_y": 0.6,  # Direction vector y component (-1 to 1)
+            "direction_x": random.uniform(
+                -1.0, 1.0
+            ),  # Random direction x component (-1 to 1)
+            "direction_y": random.uniform(
+                -1.0, 1.0
+            ),  # Random direction y component (-1 to 1)
        }

        self.frame_count = 0
@ -67,6 +78,18 @@ class AnimatedVideoTrack(MediaStreamTrack):
        """Set the ball speed in meters per second"""
        self.ball["speed_mps"] = speed_mps

+    def add_remote_video_track(self, track: MediaStreamTrack):
+        """Add a remote video track to be composited with edge detection"""
+        if track.kind == "video":
+            self.remote_video_tracks.append(track)
+            logger.info(f"Added remote video track: {track}")
+
+    def remove_remote_video_track(self, track: MediaStreamTrack):
+        """Remove a remote video track"""
+        if track in self.remote_video_tracks:
+            self.remote_video_tracks.remove(track)
+            logger.info(f"Removed remote video track: {track}")
+
    def _calculate_velocity_components(self) -> tuple[float, float]:
        """
        Calculate dx and dy velocity components based on speed in meters per second.
@ -151,6 +174,37 @@ class AnimatedVideoTrack(MediaStreamTrack):
        # Create black background
        frame_array = np.zeros((self.height, self.width, 3), dtype=np.uint8)

+        # Process remote video tracks with edge detection
+        for track in self.remote_video_tracks:
+            try:
+                # Get the latest frame from the remote track (non-blocking)
+                remote_frame = await track.recv()
+                if remote_frame and isinstance(remote_frame, VideoFrame):
+                    # Convert to numpy array
+                    img: np.ndarray = remote_frame.to_ndarray(format="bgr24")
+
+                    # Apply edge detection
+                    edges = cv2.Canny(img, 100, 200)
+                    img_edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
+
+                    # Resize to match our canvas size if needed
+                    if img_edges.shape[:2] != (self.height, self.width):
+                        img_edges = cv2.resize(img_edges, (self.width, self.height))
+
+                    # Blend with existing frame (additive blend for edge detection overlay)
+                    frame_array = cv2.addWeighted(
+                        frame_array.astype(np.uint8),
+                        0.7,
+                        img_edges.astype(np.uint8),
+                        0.3,
+                        0,
+                    )
+
+            except Exception as e:
+                # If we can't get a frame from this track, continue with others
+                logger.debug(f"Could not get frame from remote track: {e}")
+                continue
+
        # Calculate velocity components based on current speed
        dx, dy = self._calculate_velocity_components()

@ -170,6 +224,7 @@ class AnimatedVideoTrack(MediaStreamTrack):

        # Trigger bounce sound if a bounce occurred
        if bounce_occurred and self.audio_track:
+            logger.info("Video: Bounce detected, triggering audio event")
            self.audio_track.add_bounce_event("bounce")

        # Keep ball in bounds
@ -208,7 +263,7 @@ class AnimatedVideoTrack(MediaStreamTrack):
        )

        # Convert to VideoFrame
-        frame = VideoFrame.from_ndarray(frame_array, format="bgr24")
+        frame = VideoFrame.from_ndarray(frame_array.astype(np.uint8), format="bgr24")
        frame.pts = pts
        frame.time_base = fractions.Fraction(time_base).limit_denominator(1000000)

@ -217,84 +272,93 @@ class AnimatedVideoTrack(MediaStreamTrack):


 class SyntheticAudioTrack(MediaStreamTrack):
-    """
-    Synthetic audio track that generates audio including bounce sounds.
-    Originally a silent audio track, now enhanced to generate synthetic audio effects.
-    """
-
    kind = "audio"

    def __init__(self):
        super().__init__()
        self.sample_rate = 48000
-        self.samples_per_frame = 960  # 20ms at 48kHz
-        self.bounce_queue: list[BounceEvent] = []  # Queue of bounce events to process
-        self.bounce_duration = 0.1  # 100ms bounce sound duration
-        self.bounce_amplitude = 0.3  # Amplitude of bounce sound
+        self.samples_per_frame = 960
+        self._samples_generated = 0
+        self._active_bounces: list[BounceEvent] = []  # List of active bounce events

    def add_bounce_event(self, bounce_type: str = "bounce"):
-        """Add a bounce event to the audio queue"""
-        current_time = time.time()
-        self.bounce_queue.append(
-            {
-                "type": bounce_type,
-                "start_time": current_time,
-                "end_time": current_time + self.bounce_duration,
-            }
+        """Add a bounce event"""
+        bounce_duration_samples = int(0.2 * self.sample_rate)  # 200ms
+
+        # Add new bounce to the list (they can overlap)
+        bounce_event: BounceEvent = {
+            "start_sample": self._samples_generated,
+            "end_sample": self._samples_generated + bounce_duration_samples,
+            "type": bounce_type,
+        }
+
+        self._active_bounces.append(bounce_event)
+        logger.info(
+            f"Bounce event added - start: {bounce_event['start_sample']}, end: {bounce_event['end_sample']}"
        )

-    def _generate_bounce_sound(self, t: float) -> float:
-        """Generate a simple bounce sound using a decaying sine wave"""
-        # Simple bounce sound: combination of two frequencies with decay
-        freq1 = 800  # Primary frequency
-        freq2 = 1200  # Secondary frequency
-        decay = np.exp(-t * 10)  # Exponential decay
+    def _generate_bounce_sample(self, t: float) -> float:
+        """Generate a single bounce sample at time t"""
+        if t < 0 or t > 0.2:
+            return 0.0

-        sound = (
-            np.sin(2 * np.pi * freq1 * t) * 0.7 + np.sin(2 * np.pi * freq2 * t) * 0.3
-        ) * decay
-        return sound * self.bounce_amplitude
+        # Simple decay envelope
+        decay = np.exp(-t * 10)

-    async def next_timestamp(self):
-        """Returns (pts, time_base) for 20ms audio frames at 48kHz"""
-        pts = int(time.time() * self.sample_rate)
+        # Clear, simple tone
+        freq = 400
+        sound = np.sin(2 * np.pi * freq * t) * decay
+
+        return sound * 0.9
+
+    async def next_timestamp(self) -> tuple[int, float]:
+        pts = self._samples_generated
        time_base = 1 / self.sample_rate
        return pts, time_base

    async def recv(self):
-        """Generate audio frames with bounce sounds"""
        pts, time_base = await self.next_timestamp()
-        current_time = time.time()
-
-        # Create audio data
        samples = np.zeros((self.samples_per_frame,), dtype=np.float32)

-        # Check for active bounce events and generate sounds
-        active_bounces: list[BounceEvent] = []
-        for bounce in self.bounce_queue:
-            if current_time < bounce["end_time"]:
-                # Calculate time within the bounce sound
-                t = current_time - bounce["start_time"]
-                if t >= 0:
-                    # Generate bounce sound for this time frame
-                    for i in range(self.samples_per_frame):
-                        sample_time = t + (i / self.sample_rate)
-                        if sample_time <= self.bounce_duration:
-                            samples[i] += self._generate_bounce_sound(sample_time)
-                active_bounces.append(bounce)
+        # Generate samples for this frame
+        active_bounce_count = 0
+        for i in range(self.samples_per_frame):
+            current_sample = self._samples_generated + i
+            sample_value = 0.0

-        # Keep only active bounces
-        self.bounce_queue = active_bounces
+            # Check all active bounces for this sample
+            for bounce in self._active_bounces:
+                if bounce["start_sample"] <= current_sample < bounce["end_sample"]:
+                    # Calculate time within this bounce
+                    sample_offset = current_sample - bounce["start_sample"]
+                    t = sample_offset / self.sample_rate

-        # Clamp samples to prevent distortion
+                    # Add this bounce's contribution
+                    sample_value += self._generate_bounce_sample(t)
+                    active_bounce_count += 1
+
+            samples[i] = sample_value
+
+        # Clean up expired bounces
+        self._active_bounces: list[BounceEvent] = [
+            bounce
+            for bounce in self._active_bounces
+            if bounce["end_sample"] > self._samples_generated + self.samples_per_frame
+        ]
+
+        if active_bounce_count > 0:
+            logger.info(
+                f"Generated audio with {len(self._active_bounces)} active bounces"
+            )
+
+        self._samples_generated += self.samples_per_frame
+
+        # Convert to audio frame
        samples = np.clip(samples, -1.0, 1.0)
-
-        # Convert to s16 format (required by Opus encoder)
        samples_s16 = (samples * 32767).astype(np.int16)

-        # Convert to AudioFrame
        frame = AudioFrame.from_ndarray(
-            samples_s16.reshape(1, -1), format="s16", layout="mono"
+            samples_s16.reshape(1, -1), format="s16", layout="stereo"
        )
        frame.sample_rate = self.sample_rate
        frame.pts = pts