Added bouncing

2025-09-01 16:16:59 -07:00 · 2025-09-01 16:16:59 -07:00 · 35dd49e4ac
commit 35dd49e4ac
parent e64edf92ca
3 changed files with 162 additions and 37 deletions
--- a/client/src/MediaControl.tsx
+++ b/client/src/MediaControl.tsx
@ -1252,7 +1252,7 @@ const MediaControl: React.FC<MediaControlProps> = ({ isSelf, peer, className })
        </div>
        {isValid ? (
          peer.attributes?.srcObject && (
-            <Box sx={{ position: 'relative' }}>
+            <Box sx={{ position: "relative" }}>
              <Video
                key={`video-${peer.session_id}-${peer.attributes.srcObject.id}`}
                className="Video"
@ -1262,19 +1262,13 @@ const MediaControl: React.FC<MediaControlProps> = ({ isSelf, peer, className })
                local={peer.local}
                muted={peer.local || muted} // Pass muted state
              />
-              <WebRTCStatus 
-                isNegotiating={peer.isNegotiating || false}
-                connectionState={peer.connectionState}
-              />
+              <WebRTCStatus isNegotiating={peer.isNegotiating || false} connectionState={peer.connectionState} />
            </Box>
          )
        ) : (
-          <Box sx={{ position: 'relative' }}>
-            <div className="placeholder">Waiting for media…</div>
-            <WebRTCStatus 
-              isNegotiating={peer.isNegotiating || false}
-              connectionState={peer.connectionState}
-            />
+          <Box sx={{ position: "relative" }}>
+            <div className="Video">Waiting for media…</div>
+            <WebRTCStatus isNegotiating={peer.isNegotiating || false} connectionState={peer.connectionState} />
          </Box>
        )}
        {/* <Moveable
--- a/voicebot/entrypoint.sh
+++ b/voicebot/entrypoint.sh
@ -22,7 +22,7 @@ export PATH="$VIRTUAL_ENV/bin:$PATH"
 if [ "$PRODUCTION" != "true" ]; then
    echo "Starting voicebot in development mode with auto-reload..."
    # Fix: Use single --watch argument with multiple paths instead of multiple --watch arguments
-    python3 -u scripts/reload_runner.py --watch . /shared --verbose --interval 0.5 -- uv run main.py \
+    python3 -u scripts/reload_runner.py --delay-restart 3 --watch . /shared --verbose --interval 0.5 -- uv run main.py \
        --insecure \
        --server-url https://ketrenos.com/ai-voicebot \
        --lobby default \
--- a/voicebot/synthetic_media.py
+++ b/voicebot/synthetic_media.py
@ -2,30 +2,46 @@
 Synthetic Media Tracks Module

 This module provides synthetic audio and video track creation for WebRTC media streaming.
-Contains AnimatedVideoTrack and SilentAudioTrack implementations ported from JavaScript.
+Contains AnimatedVideoTrack and SyntheticAudioTrack implementations ported from JavaScript.
 """

 import numpy as np
 import cv2
 import fractions
 import time
+from typing import TypedDict
 from aiortc import MediaStreamTrack
 from av import VideoFrame, AudioFrame


+class BounceEvent(TypedDict):
+    """Type definition for bounce events"""
+
+    type: str
+    start_time: float
+    end_time: float
+
+
 class AnimatedVideoTrack(MediaStreamTrack):
    """
    Synthetic video track that generates animated content with a bouncing ball.
    Ported from JavaScript createAnimatedVideoTrack function.
    """
-    
+
    kind = "video"

-    def __init__(self, width: int = 320, height: int = 240, name: str = ""):
+    def __init__(
+        self,
+        width: int = 320,
+        height: int = 240,
+        name: str = "",
+        audio_track: "SyntheticAudioTrack | None" = None,
+    ):
        super().__init__()
        self.width = width
        self.height = height
        self.name = name
+        self.audio_track = audio_track  # Reference to the audio track

        # Generate color from name hash (similar to JavaScript nameToColor)
        self.ball_color = (
@ -37,12 +53,50 @@ class AnimatedVideoTrack(MediaStreamTrack):
            "x": width / 2,
            "y": height / 2,
            "radius": min(width, height) * 0.06,
-            "dx": 3.0,
-            "dy": 2.0,
+            "speed_mps": 0.5,  # Speed in meters per second (frame width = 1 meter)
+            "direction_x": 1.0,  # Direction vector x component (-1 to 1)
+            "direction_y": 0.6,  # Direction vector y component (-1 to 1)
        }

        self.frame_count = 0
        self._start_time = time.time()
+        self._last_frame_time = time.time()
+        self.fps = 15  # Target frames per second
+
+    def set_ball_speed(self, speed_mps: float):
+        """Set the ball speed in meters per second"""
+        self.ball["speed_mps"] = speed_mps
+
+    def _calculate_velocity_components(self) -> tuple[float, float]:
+        """
+        Calculate dx and dy velocity components based on speed in meters per second.
+        Frame width represents 1 meter, so pixels per second = width * speed_mps
+        """
+        # Calculate actual time delta since last frame
+        current_time = time.time()
+        dt = current_time - self._last_frame_time
+        self._last_frame_time = current_time
+
+        # Normalize direction vector to ensure consistent speed
+        dir_x = self.ball["direction_x"]
+        dir_y = self.ball["direction_y"]
+        magnitude = np.sqrt(dir_x * dir_x + dir_y * dir_y)
+
+        if magnitude > 0:
+            dir_x_norm = dir_x / magnitude
+            dir_y_norm = dir_y / magnitude
+        else:
+            dir_x_norm, dir_y_norm = 1.0, 0.0
+
+        # Convert meters per second to pixels per actual time delta
+        pixels_per_second = self.width * self.ball["speed_mps"]
+        pixels_this_frame = pixels_per_second * dt
+
+        # Apply normalized direction to get velocity components
+        dx = pixels_this_frame * dir_x_norm
+        dy = pixels_this_frame * dir_y_norm
+
+        return dx, dy

    async def next_timestamp(self):
        """Returns (pts, time_base) for 15 FPS video"""
@ -97,16 +151,26 @@ class AnimatedVideoTrack(MediaStreamTrack):
        # Create black background
        frame_array = np.zeros((self.height, self.width, 3), dtype=np.uint8)

+        # Calculate velocity components based on current speed
+        dx, dy = self._calculate_velocity_components()
+
        # Update ball position
        ball = self.ball
-        ball["x"] += ball["dx"]
-        ball["y"] += ball["dy"]
+        ball["x"] += dx
+        ball["y"] += dy

-        # Bounce off walls
+        # Bounce off walls and trigger audio events
+        bounce_occurred = False
        if ball["x"] + ball["radius"] >= self.width or ball["x"] - ball["radius"] <= 0:
-            ball["dx"] = -ball["dx"]
+            ball["direction_x"] = -ball["direction_x"]
+            bounce_occurred = True
        if ball["y"] + ball["radius"] >= self.height or ball["y"] - ball["radius"] <= 0:
-            ball["dy"] = -ball["dy"]
+            ball["direction_y"] = -ball["direction_y"]
+            bounce_occurred = True
+
+        # Trigger bounce sound if a bounce occurred
+        if bounce_occurred and self.audio_track:
+            self.audio_track.add_bounce_event("bounce")

        # Keep ball in bounds
        ball["x"] = max(ball["radius"], min(self.width - ball["radius"], ball["x"]))
@ -121,8 +185,9 @@ class AnimatedVideoTrack(MediaStreamTrack):
            -1,
        )

-        # Add frame counter text
+        # Add frame counter and speed text
        frame_text = f"Frame: {int(time.time() * 1000) % 10000}"
+        speed_text = f"Speed: {ball['speed_mps']:.2f} m/s"
        cv2.putText(
            frame_array,
            frame_text,
@ -132,6 +197,15 @@ class AnimatedVideoTrack(MediaStreamTrack):
            (255, 255, 255),
            1,
        )
+        cv2.putText(
+            frame_array,
+            speed_text,
+            (10, 40),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (255, 255, 255),
+            1,
+        )

        # Convert to VideoFrame
        frame = VideoFrame.from_ndarray(frame_array, format="bgr24")
@ -142,18 +216,44 @@ class AnimatedVideoTrack(MediaStreamTrack):
        return frame


-class SilentAudioTrack(MediaStreamTrack):
+class SyntheticAudioTrack(MediaStreamTrack):
    """
-    Synthetic audio track that generates silence.
-    Ported from JavaScript createSilentAudioTrack function.
+    Synthetic audio track that generates audio including bounce sounds.
+    Originally a silent audio track, now enhanced to generate synthetic audio effects.
    """
-    
+
    kind = "audio"

    def __init__(self):
        super().__init__()
        self.sample_rate = 48000
        self.samples_per_frame = 960  # 20ms at 48kHz
+        self.bounce_queue: list[BounceEvent] = []  # Queue of bounce events to process
+        self.bounce_duration = 0.1  # 100ms bounce sound duration
+        self.bounce_amplitude = 0.3  # Amplitude of bounce sound
+
+    def add_bounce_event(self, bounce_type: str = "bounce"):
+        """Add a bounce event to the audio queue"""
+        current_time = time.time()
+        self.bounce_queue.append(
+            {
+                "type": bounce_type,
+                "start_time": current_time,
+                "end_time": current_time + self.bounce_duration,
+            }
+        )
+
+    def _generate_bounce_sound(self, t: float) -> float:
+        """Generate a simple bounce sound using a decaying sine wave"""
+        # Simple bounce sound: combination of two frequencies with decay
+        freq1 = 800  # Primary frequency
+        freq2 = 1200  # Secondary frequency
+        decay = np.exp(-t * 10)  # Exponential decay
+
+        sound = (
+            np.sin(2 * np.pi * freq1 * t) * 0.7 + np.sin(2 * np.pi * freq2 * t) * 0.3
+        ) * decay
+        return sound * self.bounce_amplitude

    async def next_timestamp(self):
        """Returns (pts, time_base) for 20ms audio frames at 48kHz"""
@ -162,15 +262,39 @@ class SilentAudioTrack(MediaStreamTrack):
        return pts, time_base

    async def recv(self):
-        """Generate silent audio frames"""
+        """Generate audio frames with bounce sounds"""
        pts, time_base = await self.next_timestamp()
+        current_time = time.time()

-        # Create silent audio data in s16 format (required by Opus encoder)
-        samples = np.zeros((self.samples_per_frame,), dtype=np.int16)
+        # Create audio data
+        samples = np.zeros((self.samples_per_frame,), dtype=np.float32)
+
+        # Check for active bounce events and generate sounds
+        active_bounces: list[BounceEvent] = []
+        for bounce in self.bounce_queue:
+            if current_time < bounce["end_time"]:
+                # Calculate time within the bounce sound
+                t = current_time - bounce["start_time"]
+                if t >= 0:
+                    # Generate bounce sound for this time frame
+                    for i in range(self.samples_per_frame):
+                        sample_time = t + (i / self.sample_rate)
+                        if sample_time <= self.bounce_duration:
+                            samples[i] += self._generate_bounce_sound(sample_time)
+                active_bounces.append(bounce)
+
+        # Keep only active bounces
+        self.bounce_queue = active_bounces
+
+        # Clamp samples to prevent distortion
+        samples = np.clip(samples, -1.0, 1.0)
+
+        # Convert to s16 format (required by Opus encoder)
+        samples_s16 = (samples * 32767).astype(np.int16)

        # Convert to AudioFrame
        frame = AudioFrame.from_ndarray(
-            samples.reshape(1, -1), format="s16", layout="mono"
+            samples_s16.reshape(1, -1), format="s16", layout="mono"
        )
        frame.sample_rate = self.sample_rate
        frame.pts = pts
@ -182,14 +306,21 @@ class SilentAudioTrack(MediaStreamTrack):
 def create_synthetic_tracks(session_name: str) -> dict[str, MediaStreamTrack]:
    """
    Create synthetic audio and video tracks for WebRTC streaming.
-    
+
    Args:
        session_name: Name to use for generating video track colors
-        
+
    Returns:
        Dictionary containing 'video' and 'audio' tracks
+
+    Note:
+        To change ball speed, use: tracks["video"].set_ball_speed(speed_in_mps)
+        where speed_in_mps is meters per second (frame width = 1 meter)
    """
-    return {
-        "video": AnimatedVideoTrack(name=session_name),
-        "audio": SilentAudioTrack()
-    }
+    # Create audio track first
+    audio_track = SyntheticAudioTrack()
+
+    # Create video track with reference to audio track for bounce events
+    video_track = AnimatedVideoTrack(name=session_name, audio_track=audio_track)
+
+    return {"video": video_track, "audio": audio_track}