Improved UI with timestamp

This commit is contained in:
James Ketr 2025-09-13 19:55:42 -07:00
parent 1e13a75709
commit 394d3f349c

View File

@ -724,6 +724,9 @@ _send_chat_func: Optional[Callable[[str], Awaitable[None]]] = None
_model_loading_status: str = "Not loaded" _model_loading_status: str = "Not loaded"
_model_loading_progress: float = 0.0 _model_loading_progress: float = 0.0
# Raw audio buffer for immediate graphing
_raw_audio_buffer: Dict[str, npt.NDArray[np.float32]] = {}
def _ensure_model_loaded(device: str = _device) -> OpenVINOWhisperModel: def _ensure_model_loaded(device: str = _device) -> OpenVINOWhisperModel:
"""Ensure the global model is loaded.""" """Ensure the global model is loaded."""
@ -1434,13 +1437,29 @@ class WaveformVideoTrack(MediaStreamTrack):
3, 3,
) )
# Select the most active processor (highest RMS) and draw its waveform # Draw clock in lower right corner, right justified
current_time = time.strftime("%H:%M:%S")
(text_width, text_height), _ = cv2.getTextSize(
current_time, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2
)
clock_x = self.width - text_width - 10 # 10px margin from right edge
clock_y = self.height - 10 # 10px margin from bottom
cv2.putText(
frame_array,
current_time,
(clock_x, clock_y),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(255, 255, 255),
2,
)
# Select the most active audio buffer (highest RMS) and draw its waveform
best_proc = None best_proc = None
best_rms = 0.0 best_rms = 0.0
try: try:
for pname, proc in _audio_processors.items(): for pname, arr in _raw_audio_buffer.items():
try: try:
arr = getattr(proc, "current_phrase_audio", None)
if arr is None or len(arr) == 0: if arr is None or len(arr) == 0:
continue continue
rms = float(np.sqrt(np.mean(arr**2))) rms = float(np.sqrt(np.mean(arr**2)))
@ -1455,13 +1474,15 @@ class WaveformVideoTrack(MediaStreamTrack):
if best_proc is not None: if best_proc is not None:
pname, arr = best_proc pname, arr = best_proc
# Use the entire current phrase audio (from the start of the ongoing recording) # Use the last 2 second of audio data, padded with zeros if less
# This ensures the waveform shows audio from when recording began until it is processed. samples_needed = SAMPLE_RATE * 2 # 2 second(s)
if len(arr) <= 0: if len(arr) <= 0:
arr_segment = np.zeros(1, dtype=np.float32) arr_segment = np.zeros(samples_needed, dtype=np.float32)
elif len(arr) >= samples_needed:
arr_segment = arr[-samples_needed:].copy()
else: else:
# Copy the buffer so downstream operations (resizing/bucketing) are safe # Pad with zeros at the beginning
arr_segment = arr.copy() arr_segment = np.concatenate([np.zeros(samples_needed - len(arr), dtype=np.float32), arr])
# Assume arr_segment is already in [-1, 1] # Assume arr_segment is already in [-1, 1]
norm = arr_segment norm = arr_segment
@ -1527,12 +1548,16 @@ class WaveformVideoTrack(MediaStreamTrack):
async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None: async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
"""Handle incoming audio tracks from WebRTC peers.""" """Handle incoming audio tracks from WebRTC peers."""
global _audio_processors, _send_chat_func global _audio_processors, _send_chat_func, _raw_audio_buffer
if track.kind != "audio": if track.kind != "audio":
logger.info(f"Ignoring non-audio track from {peer.peer_name}: {track.kind}") logger.info(f"Ignoring non-audio track from {peer.peer_name}: {track.kind}")
return return
# Initialize raw audio buffer for immediate graphing
if peer.peer_name not in _raw_audio_buffer:
_raw_audio_buffer[peer.peer_name] = np.array([], dtype=np.float32)
if peer.peer_name not in _audio_processors: if peer.peer_name not in _audio_processors:
if _send_chat_func is None: if _send_chat_func is None:
logger.error( logger.error(
@ -1615,7 +1640,7 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
continue continue
asyncio.create_task(init_processor()) asyncio.create_task(init_processor())
return # Exit early, processing is handled in background return # Exit early, processor is handled in the background
# If processor already exists, just continue processing # If processor already exists, just continue processing
audio_processor = _audio_processors[peer.peer_name] audio_processor = _audio_processors[peer.peer_name]
@ -1623,13 +1648,17 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
try: try:
frame_count = 0 frame_count = 0
logger.info(f"Entering frame processing loop for {peer.peer_name}")
while True: while True:
try: try:
logger.debug(f"Waiting for frame from {peer.peer_name}")
frame = await track.recv() frame = await track.recv()
frame_count += 1 frame_count += 1
if frame_count % 100 == 0: if frame_count == 1:
logger.debug(f"Received {frame_count} frames from {peer.peer_name}") logger.info(f"Received first frame from {peer.peer_name}")
elif frame_count % 50 == 0:
logger.info(f"Received {frame_count} frames from {peer.peer_name}")
except MediaStreamError as e: except MediaStreamError as e:
logger.info(f"Audio stream ended for {peer.peer_name}: {e}") logger.info(f"Audio stream ended for {peer.peer_name}: {e}")
@ -1638,7 +1667,6 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
logger.error(f"Error receiving frame from {peer.peer_name}: {e}") logger.error(f"Error receiving frame from {peer.peer_name}: {e}")
break break
logger.info(f"Processing frame {frame_count} from {peer.peer_name}")
if isinstance(frame, AudioFrame): if isinstance(frame, AudioFrame):
try: try:
# Convert frame to numpy array # Convert frame to numpy array
@ -1656,7 +1684,16 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
# Convert to float32 # Convert to float32
audio_data_float32 = audio_data.astype(np.float32) audio_data_float32 = audio_data.astype(np.float32)
# Process with optimized processor logger.debug(f"Processed audio frame {frame_count} from {peer.peer_name}: {len(audio_data_float32)} samples")
# Update raw buffer for graphing
_raw_audio_buffer[peer.peer_name] = np.concatenate([_raw_audio_buffer[peer.peer_name], audio_data_float32])
# Limit buffer size to last 10 seconds
max_samples = SAMPLE_RATE * 10
if len(_raw_audio_buffer[peer.peer_name]) > max_samples:
_raw_audio_buffer[peer.peer_name] = _raw_audio_buffer[peer.peer_name][-max_samples:]
# Process with optimized processor if available
audio_processor.add_audio_data(audio_data_float32) audio_processor.add_audio_data(audio_data_float32)
except Exception as e: except Exception as e:
@ -1797,7 +1834,7 @@ def bind_send_chat_function(send_chat_func: Callable[[str], Awaitable[None]]) ->
def cleanup_peer_processor(peer_name: str) -> None: def cleanup_peer_processor(peer_name: str) -> None:
"""Clean up processor for disconnected peer.""" """Clean up processor for disconnected peer."""
global _audio_processors global _audio_processors, _raw_audio_buffer
if peer_name in _audio_processors: if peer_name in _audio_processors:
logger.info(f"Cleaning up processor for {peer_name}") logger.info(f"Cleaning up processor for {peer_name}")
@ -1806,6 +1843,9 @@ def cleanup_peer_processor(peer_name: str) -> None:
del _audio_processors[peer_name] del _audio_processors[peer_name]
logger.info(f"Processor cleanup complete for {peer_name}") logger.info(f"Processor cleanup complete for {peer_name}")
if peer_name in _raw_audio_buffer:
del _raw_audio_buffer[peer_name]
def get_active_processors() -> Dict[str, OptimizedAudioProcessor]: def get_active_processors() -> Dict[str, OptimizedAudioProcessor]:
"""Get active processors for debugging.""" """Get active processors for debugging."""