ai-voicebot/voicebot/force_transcription.py

149 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""
Force transcription debug - processes any accumulated audio immediately.
Run this to force the whisper agent to attempt transcription of current audio buffer.
"""
import sys
import os
import asyncio
import numpy as np
# Add the voicebot directory to the path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
def force_transcription():
"""Force transcription of any accumulated audio."""
try:
from bots.whisper import _audio_processors
if not _audio_processors:
print(
"❌ No audio processors found. Whisper agent may not be running or no peers connected."
)
return
print(f"🔍 Found {len(_audio_processors)} active audio processors:")
for peer_name, audio_processor in _audio_processors.items():
print(f"\n👤 {peer_name}:")
print(f" - Running: {audio_processor.is_running}")
print(f" - Buffer size: {len(audio_processor.audio_buffer)} frames")
print(f" - Queue size: {audio_processor.processing_queue.qsize()}")
print(
f" - Current phrase length: {len(audio_processor.current_phrase_audio)} samples"
)
# Force processing of current buffer
if len(audio_processor.audio_buffer) > 0:
print(
f"🔄 Forcing processing of {len(audio_processor.audio_buffer)} buffered frames for {peer_name}..."
)
audio_processor._queue_for_processing()
else:
print(f"📭 No audio in buffer to process for {peer_name}")
# If we have a current phrase, try to transcribe it
if len(audio_processor.current_phrase_audio) > 0:
phrase_duration = (
len(audio_processor.current_phrase_audio)
/ audio_processor.sample_rate
)
phrase_rms = np.sqrt(np.mean(audio_processor.current_phrase_audio**2))
print(
f"🎤 Current phrase for {peer_name}: {phrase_duration:.2f}s, RMS: {phrase_rms:.6f}"
)
if phrase_duration > 0.3: # Minimum duration
print(
f"🚀 Forcing transcription of current phrase for {peer_name}..."
)
# Create an event loop if none exists
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# Force transcription
async def force_transcribe():
await audio_processor._transcribe_and_send(
audio_processor.current_phrase_audio.copy(), is_final=True
)
loop.run_until_complete(force_transcribe())
print(f"✅ Forced transcription completed for {peer_name}")
else:
print(
f"⏱️ Current phrase too short for {peer_name} ({phrase_duration:.2f}s < 0.3s)"
)
else:
print(f"🤐 No current phrase to transcribe for {peer_name}")
except ImportError:
print(
"❌ Could not import whisper components. Make sure the whisper agent is loaded."
)
except Exception as e:
print(f"❌ Error: {e}")
def show_audio_stats():
"""Show detailed audio statistics."""
try:
from bots.whisper import _audio_processors
if not _audio_processors:
print("❌ No audio processors found")
return
print(
f"\n📊 Detailed Audio Statistics for {len(_audio_processors)} processors:"
)
for peer_name, audio_processor in _audio_processors.items():
print(f"\n👤 {peer_name}:")
print(f"Sample rate: {audio_processor.sample_rate}Hz")
print(f"Samples per frame: {audio_processor.samples_per_frame}")
print(f"Phrase timeout: {audio_processor.phrase_timeout}s")
print(f"Buffer max length: {audio_processor.audio_buffer.maxlen}")
print(f"Current buffer size: {len(audio_processor.audio_buffer)}")
print(f"Processing queue size: {audio_processor.processing_queue.qsize()}")
if len(audio_processor.current_phrase_audio) > 0:
phrase_duration = (
len(audio_processor.current_phrase_audio)
/ audio_processor.sample_rate
)
phrase_rms = np.sqrt(np.mean(audio_processor.current_phrase_audio**2))
phrase_peak = np.max(np.abs(audio_processor.current_phrase_audio))
print(" Current phrase:")
print(f" Duration: {phrase_duration:.2f}s")
print(f" Samples: {len(audio_processor.current_phrase_audio)}")
print(f" RMS: {phrase_rms:.6f}")
print(f" Peak: {phrase_peak:.6f}")
if len(audio_processor.audio_buffer) > 0:
combined = np.concatenate(list(audio_processor.audio_buffer))
buffer_duration = len(combined) / audio_processor.sample_rate
buffer_rms = np.sqrt(np.mean(combined**2))
buffer_peak = np.max(np.abs(combined))
print(" Buffer contents:")
print(f" Duration: {buffer_duration:.2f}s")
print(f" Samples: {len(combined)}")
print(f" RMS: {buffer_rms:.6f}")
print(f" Peak: {buffer_peak:.6f}")
except Exception as e:
print(f"❌ Error getting stats: {e}")
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "stats":
show_audio_stats()
else:
force_transcription()
show_audio_stats()