Improved UI with timestamp
This commit is contained in:
parent
1e13a75709
commit
394d3f349c
@ -724,6 +724,9 @@ _send_chat_func: Optional[Callable[[str], Awaitable[None]]] = None
|
||||
_model_loading_status: str = "Not loaded"
|
||||
_model_loading_progress: float = 0.0
|
||||
|
||||
# Raw audio buffer for immediate graphing
|
||||
_raw_audio_buffer: Dict[str, npt.NDArray[np.float32]] = {}
|
||||
|
||||
|
||||
def _ensure_model_loaded(device: str = _device) -> OpenVINOWhisperModel:
|
||||
"""Ensure the global model is loaded."""
|
||||
@ -1434,13 +1437,29 @@ class WaveformVideoTrack(MediaStreamTrack):
|
||||
3,
|
||||
)
|
||||
|
||||
# Select the most active processor (highest RMS) and draw its waveform
|
||||
# Draw clock in lower right corner, right justified
|
||||
current_time = time.strftime("%H:%M:%S")
|
||||
(text_width, text_height), _ = cv2.getTextSize(
|
||||
current_time, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2
|
||||
)
|
||||
clock_x = self.width - text_width - 10 # 10px margin from right edge
|
||||
clock_y = self.height - 10 # 10px margin from bottom
|
||||
cv2.putText(
|
||||
frame_array,
|
||||
current_time,
|
||||
(clock_x, clock_y),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1.0,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
# Select the most active audio buffer (highest RMS) and draw its waveform
|
||||
best_proc = None
|
||||
best_rms = 0.0
|
||||
try:
|
||||
for pname, proc in _audio_processors.items():
|
||||
for pname, arr in _raw_audio_buffer.items():
|
||||
try:
|
||||
arr = getattr(proc, "current_phrase_audio", None)
|
||||
if arr is None or len(arr) == 0:
|
||||
continue
|
||||
rms = float(np.sqrt(np.mean(arr**2)))
|
||||
@ -1455,13 +1474,15 @@ class WaveformVideoTrack(MediaStreamTrack):
|
||||
if best_proc is not None:
|
||||
pname, arr = best_proc
|
||||
|
||||
# Use the entire current phrase audio (from the start of the ongoing recording)
|
||||
# This ensures the waveform shows audio from when recording began until it is processed.
|
||||
# Use the last 2 second of audio data, padded with zeros if less
|
||||
samples_needed = SAMPLE_RATE * 2 # 2 second(s)
|
||||
if len(arr) <= 0:
|
||||
arr_segment = np.zeros(1, dtype=np.float32)
|
||||
arr_segment = np.zeros(samples_needed, dtype=np.float32)
|
||||
elif len(arr) >= samples_needed:
|
||||
arr_segment = arr[-samples_needed:].copy()
|
||||
else:
|
||||
# Copy the buffer so downstream operations (resizing/bucketing) are safe
|
||||
arr_segment = arr.copy()
|
||||
# Pad with zeros at the beginning
|
||||
arr_segment = np.concatenate([np.zeros(samples_needed - len(arr), dtype=np.float32), arr])
|
||||
|
||||
# Assume arr_segment is already in [-1, 1]
|
||||
norm = arr_segment
|
||||
@ -1527,12 +1548,16 @@ class WaveformVideoTrack(MediaStreamTrack):
|
||||
|
||||
async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
||||
"""Handle incoming audio tracks from WebRTC peers."""
|
||||
global _audio_processors, _send_chat_func
|
||||
global _audio_processors, _send_chat_func, _raw_audio_buffer
|
||||
|
||||
if track.kind != "audio":
|
||||
logger.info(f"Ignoring non-audio track from {peer.peer_name}: {track.kind}")
|
||||
return
|
||||
|
||||
# Initialize raw audio buffer for immediate graphing
|
||||
if peer.peer_name not in _raw_audio_buffer:
|
||||
_raw_audio_buffer[peer.peer_name] = np.array([], dtype=np.float32)
|
||||
|
||||
if peer.peer_name not in _audio_processors:
|
||||
if _send_chat_func is None:
|
||||
logger.error(
|
||||
@ -1615,7 +1640,7 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
||||
continue
|
||||
|
||||
asyncio.create_task(init_processor())
|
||||
return # Exit early, processing is handled in background
|
||||
return # Exit early, processor is handled in the background
|
||||
|
||||
# If processor already exists, just continue processing
|
||||
audio_processor = _audio_processors[peer.peer_name]
|
||||
@ -1623,13 +1648,17 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
||||
|
||||
try:
|
||||
frame_count = 0
|
||||
logger.info(f"Entering frame processing loop for {peer.peer_name}")
|
||||
while True:
|
||||
try:
|
||||
logger.debug(f"Waiting for frame from {peer.peer_name}")
|
||||
frame = await track.recv()
|
||||
frame_count += 1
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
logger.debug(f"Received {frame_count} frames from {peer.peer_name}")
|
||||
if frame_count == 1:
|
||||
logger.info(f"Received first frame from {peer.peer_name}")
|
||||
elif frame_count % 50 == 0:
|
||||
logger.info(f"Received {frame_count} frames from {peer.peer_name}")
|
||||
|
||||
except MediaStreamError as e:
|
||||
logger.info(f"Audio stream ended for {peer.peer_name}: {e}")
|
||||
@ -1638,7 +1667,6 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
||||
logger.error(f"Error receiving frame from {peer.peer_name}: {e}")
|
||||
break
|
||||
|
||||
logger.info(f"Processing frame {frame_count} from {peer.peer_name}")
|
||||
if isinstance(frame, AudioFrame):
|
||||
try:
|
||||
# Convert frame to numpy array
|
||||
@ -1656,7 +1684,16 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
||||
# Convert to float32
|
||||
audio_data_float32 = audio_data.astype(np.float32)
|
||||
|
||||
# Process with optimized processor
|
||||
logger.debug(f"Processed audio frame {frame_count} from {peer.peer_name}: {len(audio_data_float32)} samples")
|
||||
|
||||
# Update raw buffer for graphing
|
||||
_raw_audio_buffer[peer.peer_name] = np.concatenate([_raw_audio_buffer[peer.peer_name], audio_data_float32])
|
||||
# Limit buffer size to last 10 seconds
|
||||
max_samples = SAMPLE_RATE * 10
|
||||
if len(_raw_audio_buffer[peer.peer_name]) > max_samples:
|
||||
_raw_audio_buffer[peer.peer_name] = _raw_audio_buffer[peer.peer_name][-max_samples:]
|
||||
|
||||
# Process with optimized processor if available
|
||||
audio_processor.add_audio_data(audio_data_float32)
|
||||
|
||||
except Exception as e:
|
||||
@ -1797,7 +1834,7 @@ def bind_send_chat_function(send_chat_func: Callable[[str], Awaitable[None]]) ->
|
||||
|
||||
def cleanup_peer_processor(peer_name: str) -> None:
|
||||
"""Clean up processor for disconnected peer."""
|
||||
global _audio_processors
|
||||
global _audio_processors, _raw_audio_buffer
|
||||
|
||||
if peer_name in _audio_processors:
|
||||
logger.info(f"Cleaning up processor for {peer_name}")
|
||||
@ -1806,6 +1843,9 @@ def cleanup_peer_processor(peer_name: str) -> None:
|
||||
del _audio_processors[peer_name]
|
||||
logger.info(f"Processor cleanup complete for {peer_name}")
|
||||
|
||||
if peer_name in _raw_audio_buffer:
|
||||
del _raw_audio_buffer[peer_name]
|
||||
|
||||
|
||||
def get_active_processors() -> Dict[str, OptimizedAudioProcessor]:
|
||||
"""Get active processors for debugging."""
|
||||
|
Loading…
x
Reference in New Issue
Block a user