149 lines
5.9 KiB
Python
149 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Force transcription debug - processes any accumulated audio immediately.
|
|
Run this to force the whisper agent to attempt transcription of current audio buffer.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import asyncio
|
|
import numpy as np
|
|
|
|
# Add the voicebot directory to the path
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
def force_transcription():
|
|
"""Force transcription of any accumulated audio."""
|
|
try:
|
|
from bots.whisper import _audio_processors
|
|
|
|
if not _audio_processors:
|
|
print(
|
|
"❌ No audio processors found. Whisper agent may not be running or no peers connected."
|
|
)
|
|
return
|
|
|
|
print(f"🔍 Found {len(_audio_processors)} active audio processors:")
|
|
|
|
for peer_name, audio_processor in _audio_processors.items():
|
|
print(f"\n👤 {peer_name}:")
|
|
print(f" - Running: {audio_processor.is_running}")
|
|
print(f" - Buffer size: {len(audio_processor.audio_buffer)} frames")
|
|
print(f" - Queue size: {audio_processor.processing_queue.qsize()}")
|
|
print(
|
|
f" - Current phrase length: {len(audio_processor.current_phrase_audio)} samples"
|
|
)
|
|
|
|
# Force processing of current buffer
|
|
if len(audio_processor.audio_buffer) > 0:
|
|
print(
|
|
f"🔄 Forcing processing of {len(audio_processor.audio_buffer)} buffered frames for {peer_name}..."
|
|
)
|
|
audio_processor._queue_for_processing()
|
|
else:
|
|
print(f"📭 No audio in buffer to process for {peer_name}")
|
|
|
|
# If we have a current phrase, try to transcribe it
|
|
if len(audio_processor.current_phrase_audio) > 0:
|
|
phrase_duration = (
|
|
len(audio_processor.current_phrase_audio)
|
|
/ audio_processor.sample_rate
|
|
)
|
|
phrase_rms = np.sqrt(np.mean(audio_processor.current_phrase_audio**2))
|
|
print(
|
|
f"🎤 Current phrase for {peer_name}: {phrase_duration:.2f}s, RMS: {phrase_rms:.6f}"
|
|
)
|
|
|
|
if phrase_duration > 0.3: # Minimum duration
|
|
print(
|
|
f"🚀 Forcing transcription of current phrase for {peer_name}..."
|
|
)
|
|
|
|
# Create an event loop if none exists
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
except RuntimeError:
|
|
loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(loop)
|
|
|
|
# Force transcription
|
|
async def force_transcribe():
|
|
await audio_processor._transcribe_and_send(
|
|
audio_processor.current_phrase_audio.copy(), is_final=True
|
|
)
|
|
|
|
loop.run_until_complete(force_transcribe())
|
|
print(f"✅ Forced transcription completed for {peer_name}")
|
|
else:
|
|
print(
|
|
f"⏱️ Current phrase too short for {peer_name} ({phrase_duration:.2f}s < 0.3s)"
|
|
)
|
|
else:
|
|
print(f"🤐 No current phrase to transcribe for {peer_name}")
|
|
|
|
except ImportError:
|
|
print(
|
|
"❌ Could not import whisper components. Make sure the whisper agent is loaded."
|
|
)
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
|
|
|
|
def show_audio_stats():
|
|
"""Show detailed audio statistics."""
|
|
try:
|
|
from bots.whisper import _audio_processors
|
|
|
|
if not _audio_processors:
|
|
print("❌ No audio processors found")
|
|
return
|
|
|
|
print(
|
|
f"\n📊 Detailed Audio Statistics for {len(_audio_processors)} processors:"
|
|
)
|
|
|
|
for peer_name, audio_processor in _audio_processors.items():
|
|
print(f"\n👤 {peer_name}:")
|
|
print(f"Sample rate: {audio_processor.sample_rate}Hz")
|
|
print(f"Samples per frame: {audio_processor.samples_per_frame}")
|
|
print(f"Phrase timeout: {audio_processor.phrase_timeout}s")
|
|
print(f"Buffer max length: {audio_processor.audio_buffer.maxlen}")
|
|
print(f"Current buffer size: {len(audio_processor.audio_buffer)}")
|
|
print(f"Processing queue size: {audio_processor.processing_queue.qsize()}")
|
|
|
|
if len(audio_processor.current_phrase_audio) > 0:
|
|
phrase_duration = (
|
|
len(audio_processor.current_phrase_audio)
|
|
/ audio_processor.sample_rate
|
|
)
|
|
phrase_rms = np.sqrt(np.mean(audio_processor.current_phrase_audio**2))
|
|
phrase_peak = np.max(np.abs(audio_processor.current_phrase_audio))
|
|
print(" Current phrase:")
|
|
print(f" Duration: {phrase_duration:.2f}s")
|
|
print(f" Samples: {len(audio_processor.current_phrase_audio)}")
|
|
print(f" RMS: {phrase_rms:.6f}")
|
|
print(f" Peak: {phrase_peak:.6f}")
|
|
|
|
if len(audio_processor.audio_buffer) > 0:
|
|
combined = np.concatenate(list(audio_processor.audio_buffer))
|
|
buffer_duration = len(combined) / audio_processor.sample_rate
|
|
buffer_rms = np.sqrt(np.mean(combined**2))
|
|
buffer_peak = np.max(np.abs(combined))
|
|
print(" Buffer contents:")
|
|
print(f" Duration: {buffer_duration:.2f}s")
|
|
print(f" Samples: {len(combined)}")
|
|
print(f" RMS: {buffer_rms:.6f}")
|
|
print(f" Peak: {buffer_peak:.6f}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error getting stats: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1 and sys.argv[1] == "stats":
|
|
show_audio_stats()
|
|
else:
|
|
force_transcription()
|
|
show_audio_stats()
|