Improved UI with timestamp
This commit is contained in:
parent
1e13a75709
commit
394d3f349c
@ -724,6 +724,9 @@ _send_chat_func: Optional[Callable[[str], Awaitable[None]]] = None
|
|||||||
_model_loading_status: str = "Not loaded"
|
_model_loading_status: str = "Not loaded"
|
||||||
_model_loading_progress: float = 0.0
|
_model_loading_progress: float = 0.0
|
||||||
|
|
||||||
|
# Raw audio buffer for immediate graphing
|
||||||
|
_raw_audio_buffer: Dict[str, npt.NDArray[np.float32]] = {}
|
||||||
|
|
||||||
|
|
||||||
def _ensure_model_loaded(device: str = _device) -> OpenVINOWhisperModel:
|
def _ensure_model_loaded(device: str = _device) -> OpenVINOWhisperModel:
|
||||||
"""Ensure the global model is loaded."""
|
"""Ensure the global model is loaded."""
|
||||||
@ -1434,13 +1437,29 @@ class WaveformVideoTrack(MediaStreamTrack):
|
|||||||
3,
|
3,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Select the most active processor (highest RMS) and draw its waveform
|
# Draw clock in lower right corner, right justified
|
||||||
|
current_time = time.strftime("%H:%M:%S")
|
||||||
|
(text_width, text_height), _ = cv2.getTextSize(
|
||||||
|
current_time, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2
|
||||||
|
)
|
||||||
|
clock_x = self.width - text_width - 10 # 10px margin from right edge
|
||||||
|
clock_y = self.height - 10 # 10px margin from bottom
|
||||||
|
cv2.putText(
|
||||||
|
frame_array,
|
||||||
|
current_time,
|
||||||
|
(clock_x, clock_y),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
1.0,
|
||||||
|
(255, 255, 255),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Select the most active audio buffer (highest RMS) and draw its waveform
|
||||||
best_proc = None
|
best_proc = None
|
||||||
best_rms = 0.0
|
best_rms = 0.0
|
||||||
try:
|
try:
|
||||||
for pname, proc in _audio_processors.items():
|
for pname, arr in _raw_audio_buffer.items():
|
||||||
try:
|
try:
|
||||||
arr = getattr(proc, "current_phrase_audio", None)
|
|
||||||
if arr is None or len(arr) == 0:
|
if arr is None or len(arr) == 0:
|
||||||
continue
|
continue
|
||||||
rms = float(np.sqrt(np.mean(arr**2)))
|
rms = float(np.sqrt(np.mean(arr**2)))
|
||||||
@ -1455,13 +1474,15 @@ class WaveformVideoTrack(MediaStreamTrack):
|
|||||||
if best_proc is not None:
|
if best_proc is not None:
|
||||||
pname, arr = best_proc
|
pname, arr = best_proc
|
||||||
|
|
||||||
# Use the entire current phrase audio (from the start of the ongoing recording)
|
# Use the last 2 second of audio data, padded with zeros if less
|
||||||
# This ensures the waveform shows audio from when recording began until it is processed.
|
samples_needed = SAMPLE_RATE * 2 # 2 second(s)
|
||||||
if len(arr) <= 0:
|
if len(arr) <= 0:
|
||||||
arr_segment = np.zeros(1, dtype=np.float32)
|
arr_segment = np.zeros(samples_needed, dtype=np.float32)
|
||||||
|
elif len(arr) >= samples_needed:
|
||||||
|
arr_segment = arr[-samples_needed:].copy()
|
||||||
else:
|
else:
|
||||||
# Copy the buffer so downstream operations (resizing/bucketing) are safe
|
# Pad with zeros at the beginning
|
||||||
arr_segment = arr.copy()
|
arr_segment = np.concatenate([np.zeros(samples_needed - len(arr), dtype=np.float32), arr])
|
||||||
|
|
||||||
# Assume arr_segment is already in [-1, 1]
|
# Assume arr_segment is already in [-1, 1]
|
||||||
norm = arr_segment
|
norm = arr_segment
|
||||||
@ -1527,12 +1548,16 @@ class WaveformVideoTrack(MediaStreamTrack):
|
|||||||
|
|
||||||
async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
||||||
"""Handle incoming audio tracks from WebRTC peers."""
|
"""Handle incoming audio tracks from WebRTC peers."""
|
||||||
global _audio_processors, _send_chat_func
|
global _audio_processors, _send_chat_func, _raw_audio_buffer
|
||||||
|
|
||||||
if track.kind != "audio":
|
if track.kind != "audio":
|
||||||
logger.info(f"Ignoring non-audio track from {peer.peer_name}: {track.kind}")
|
logger.info(f"Ignoring non-audio track from {peer.peer_name}: {track.kind}")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Initialize raw audio buffer for immediate graphing
|
||||||
|
if peer.peer_name not in _raw_audio_buffer:
|
||||||
|
_raw_audio_buffer[peer.peer_name] = np.array([], dtype=np.float32)
|
||||||
|
|
||||||
if peer.peer_name not in _audio_processors:
|
if peer.peer_name not in _audio_processors:
|
||||||
if _send_chat_func is None:
|
if _send_chat_func is None:
|
||||||
logger.error(
|
logger.error(
|
||||||
@ -1615,7 +1640,7 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
asyncio.create_task(init_processor())
|
asyncio.create_task(init_processor())
|
||||||
return # Exit early, processing is handled in background
|
return # Exit early, processor is handled in the background
|
||||||
|
|
||||||
# If processor already exists, just continue processing
|
# If processor already exists, just continue processing
|
||||||
audio_processor = _audio_processors[peer.peer_name]
|
audio_processor = _audio_processors[peer.peer_name]
|
||||||
@ -1623,13 +1648,17 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
frame_count = 0
|
frame_count = 0
|
||||||
|
logger.info(f"Entering frame processing loop for {peer.peer_name}")
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
logger.debug(f"Waiting for frame from {peer.peer_name}")
|
||||||
frame = await track.recv()
|
frame = await track.recv()
|
||||||
frame_count += 1
|
frame_count += 1
|
||||||
|
|
||||||
if frame_count % 100 == 0:
|
if frame_count == 1:
|
||||||
logger.debug(f"Received {frame_count} frames from {peer.peer_name}")
|
logger.info(f"Received first frame from {peer.peer_name}")
|
||||||
|
elif frame_count % 50 == 0:
|
||||||
|
logger.info(f"Received {frame_count} frames from {peer.peer_name}")
|
||||||
|
|
||||||
except MediaStreamError as e:
|
except MediaStreamError as e:
|
||||||
logger.info(f"Audio stream ended for {peer.peer_name}: {e}")
|
logger.info(f"Audio stream ended for {peer.peer_name}: {e}")
|
||||||
@ -1638,7 +1667,6 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
|||||||
logger.error(f"Error receiving frame from {peer.peer_name}: {e}")
|
logger.error(f"Error receiving frame from {peer.peer_name}: {e}")
|
||||||
break
|
break
|
||||||
|
|
||||||
logger.info(f"Processing frame {frame_count} from {peer.peer_name}")
|
|
||||||
if isinstance(frame, AudioFrame):
|
if isinstance(frame, AudioFrame):
|
||||||
try:
|
try:
|
||||||
# Convert frame to numpy array
|
# Convert frame to numpy array
|
||||||
@ -1656,7 +1684,16 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None:
|
|||||||
# Convert to float32
|
# Convert to float32
|
||||||
audio_data_float32 = audio_data.astype(np.float32)
|
audio_data_float32 = audio_data.astype(np.float32)
|
||||||
|
|
||||||
# Process with optimized processor
|
logger.debug(f"Processed audio frame {frame_count} from {peer.peer_name}: {len(audio_data_float32)} samples")
|
||||||
|
|
||||||
|
# Update raw buffer for graphing
|
||||||
|
_raw_audio_buffer[peer.peer_name] = np.concatenate([_raw_audio_buffer[peer.peer_name], audio_data_float32])
|
||||||
|
# Limit buffer size to last 10 seconds
|
||||||
|
max_samples = SAMPLE_RATE * 10
|
||||||
|
if len(_raw_audio_buffer[peer.peer_name]) > max_samples:
|
||||||
|
_raw_audio_buffer[peer.peer_name] = _raw_audio_buffer[peer.peer_name][-max_samples:]
|
||||||
|
|
||||||
|
# Process with optimized processor if available
|
||||||
audio_processor.add_audio_data(audio_data_float32)
|
audio_processor.add_audio_data(audio_data_float32)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -1797,7 +1834,7 @@ def bind_send_chat_function(send_chat_func: Callable[[str], Awaitable[None]]) ->
|
|||||||
|
|
||||||
def cleanup_peer_processor(peer_name: str) -> None:
|
def cleanup_peer_processor(peer_name: str) -> None:
|
||||||
"""Clean up processor for disconnected peer."""
|
"""Clean up processor for disconnected peer."""
|
||||||
global _audio_processors
|
global _audio_processors, _raw_audio_buffer
|
||||||
|
|
||||||
if peer_name in _audio_processors:
|
if peer_name in _audio_processors:
|
||||||
logger.info(f"Cleaning up processor for {peer_name}")
|
logger.info(f"Cleaning up processor for {peer_name}")
|
||||||
@ -1806,6 +1843,9 @@ def cleanup_peer_processor(peer_name: str) -> None:
|
|||||||
del _audio_processors[peer_name]
|
del _audio_processors[peer_name]
|
||||||
logger.info(f"Processor cleanup complete for {peer_name}")
|
logger.info(f"Processor cleanup complete for {peer_name}")
|
||||||
|
|
||||||
|
if peer_name in _raw_audio_buffer:
|
||||||
|
del _raw_audio_buffer[peer_name]
|
||||||
|
|
||||||
|
|
||||||
def get_active_processors() -> Dict[str, OptimizedAudioProcessor]:
|
def get_active_processors() -> Dict[str, OptimizedAudioProcessor]:
|
||||||
"""Get active processors for debugging."""
|
"""Get active processors for debugging."""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user