#!/usr/bin/env python3 """ Force transcription debug - processes any accumulated audio immediately. Run this to force the whisper agent to attempt transcription of current audio buffer. """ import sys import os import asyncio import numpy as np # Add the voicebot directory to the path sys.path.append(os.path.dirname(os.path.abspath(__file__))) def force_transcription(): """Force transcription of any accumulated audio.""" try: from bots.whisper import _audio_processors if not _audio_processors: print( "āŒ No audio processors found. Whisper agent may not be running or no peers connected." ) return print(f"šŸ” Found {len(_audio_processors)} active audio processors:") for peer_name, audio_processor in _audio_processors.items(): print(f"\nšŸ‘¤ {peer_name}:") print(f" - Running: {audio_processor.is_running}") print(f" - Buffer size: {len(audio_processor.audio_buffer)} frames") print(f" - Queue size: {audio_processor.processing_queue.qsize()}") print( f" - Current phrase length: {len(audio_processor.current_phrase_audio)} samples" ) # Force processing of current buffer if len(audio_processor.audio_buffer) > 0: print( f"šŸ”„ Forcing processing of {len(audio_processor.audio_buffer)} buffered frames for {peer_name}..." ) audio_processor._queue_for_processing() else: print(f"šŸ“­ No audio in buffer to process for {peer_name}") # If we have a current phrase, try to transcribe it if len(audio_processor.current_phrase_audio) > 0: phrase_duration = ( len(audio_processor.current_phrase_audio) / audio_processor.sample_rate ) phrase_rms = np.sqrt(np.mean(audio_processor.current_phrase_audio**2)) print( f"šŸŽ¤ Current phrase for {peer_name}: {phrase_duration:.2f}s, RMS: {phrase_rms:.6f}" ) if phrase_duration > 0.3: # Minimum duration print( f"šŸš€ Forcing transcription of current phrase for {peer_name}..." ) # Create an event loop if none exists try: loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) # Force transcription async def force_transcribe(): await audio_processor._transcribe_and_send( audio_processor.current_phrase_audio.copy(), is_final=True ) loop.run_until_complete(force_transcribe()) print(f"āœ… Forced transcription completed for {peer_name}") else: print( f"ā±ļø Current phrase too short for {peer_name} ({phrase_duration:.2f}s < 0.3s)" ) else: print(f"🤐 No current phrase to transcribe for {peer_name}") except ImportError: print( "āŒ Could not import whisper components. Make sure the whisper agent is loaded." ) except Exception as e: print(f"āŒ Error: {e}") def show_audio_stats(): """Show detailed audio statistics.""" try: from bots.whisper import _audio_processors if not _audio_processors: print("āŒ No audio processors found") return print( f"\nšŸ“Š Detailed Audio Statistics for {len(_audio_processors)} processors:" ) for peer_name, audio_processor in _audio_processors.items(): print(f"\nšŸ‘¤ {peer_name}:") print(f"Sample rate: {audio_processor.sample_rate}Hz") print(f"Samples per frame: {audio_processor.samples_per_frame}") print(f"Phrase timeout: {audio_processor.phrase_timeout}s") print(f"Buffer max length: {audio_processor.audio_buffer.maxlen}") print(f"Current buffer size: {len(audio_processor.audio_buffer)}") print(f"Processing queue size: {audio_processor.processing_queue.qsize()}") if len(audio_processor.current_phrase_audio) > 0: phrase_duration = ( len(audio_processor.current_phrase_audio) / audio_processor.sample_rate ) phrase_rms = np.sqrt(np.mean(audio_processor.current_phrase_audio**2)) phrase_peak = np.max(np.abs(audio_processor.current_phrase_audio)) print(" Current phrase:") print(f" Duration: {phrase_duration:.2f}s") print(f" Samples: {len(audio_processor.current_phrase_audio)}") print(f" RMS: {phrase_rms:.6f}") print(f" Peak: {phrase_peak:.6f}") if len(audio_processor.audio_buffer) > 0: combined = np.concatenate(list(audio_processor.audio_buffer)) buffer_duration = len(combined) / audio_processor.sample_rate buffer_rms = np.sqrt(np.mean(combined**2)) buffer_peak = np.max(np.abs(combined)) print(" Buffer contents:") print(f" Duration: {buffer_duration:.2f}s") print(f" Samples: {len(combined)}") print(f" RMS: {buffer_rms:.6f}") print(f" Peak: {buffer_peak:.6f}") except Exception as e: print(f"āŒ Error getting stats: {e}") if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "stats": show_audio_stats() else: force_transcription() show_audio_stats()