Fixing transcode
This commit is contained in:
parent
825544002e
commit
b2169da4cd
@ -8,6 +8,7 @@ import asyncio
|
||||
import threading
|
||||
import uuid
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
import sys
|
||||
import os
|
||||
@ -346,7 +347,10 @@ async def bot_join(bot_name: str, req: JoinRequest):
|
||||
if req.config_values and config_handler:
|
||||
try:
|
||||
logger.info(f"Applying existing configuration to bot {bot_name}")
|
||||
success = await config_handler(req.lobby_id, req.config_values)
|
||||
if inspect.iscoroutinefunction(config_handler):
|
||||
success = await config_handler(req.lobby_id, req.config_values)
|
||||
else:
|
||||
success = config_handler(req.lobby_id, req.config_values)
|
||||
if success:
|
||||
logger.info(f"Successfully applied existing configuration to bot {bot_name}")
|
||||
else:
|
||||
@ -451,7 +455,10 @@ async def update_bot_config(bot_name: str, config_data: dict[str, Any]) -> dict[
|
||||
raise HTTPException(status_code=400, detail="lobby_id is required")
|
||||
|
||||
# Call the bot's configuration handler
|
||||
success = await config_handler(lobby_id, config_values)
|
||||
if inspect.iscoroutinefunction(config_handler):
|
||||
success = await config_handler(lobby_id, config_values)
|
||||
else:
|
||||
success = config_handler(lobby_id, config_values)
|
||||
|
||||
if success:
|
||||
return {"success": True, "message": "Configuration updated successfully"}
|
||||
|
@ -1115,6 +1115,7 @@ class OptimizedAudioProcessor:
|
||||
self.transcription_history: List[TranscriptionHistoryItem] = []
|
||||
self.last_activity_time = time.time()
|
||||
self.last_audio_time = time.time() # Track when any audio chunk is received
|
||||
self.final_transcription_pending = False # Flag to prevent accumulating audio during final transcription
|
||||
|
||||
# Current transcription message for refinements
|
||||
self.current_message: Optional[ChatMessageModel] = None
|
||||
@ -1172,6 +1173,7 @@ class OptimizedAudioProcessor:
|
||||
if is_speech:
|
||||
self.silence_frames = 0
|
||||
self.last_activity_time = time.time()
|
||||
self.final_transcription_pending = False # Reset flag when new speech is detected
|
||||
self._add_to_circular_buffer(audio_data)
|
||||
elif (len(self.current_phrase_audio) > 0 and
|
||||
self.silence_frames < self.max_trailing_silence_frames):
|
||||
@ -1262,6 +1264,7 @@ class OptimizedAudioProcessor:
|
||||
): # At least 0.5 seconds
|
||||
if self.main_loop:
|
||||
logger.info(f"Queueing final transcription for {self.peer_name}")
|
||||
self.final_transcription_pending = True
|
||||
asyncio.create_task(
|
||||
self._transcribe_and_send(
|
||||
self.current_phrase_audio.copy(), is_final=True
|
||||
@ -1282,24 +1285,25 @@ class OptimizedAudioProcessor:
|
||||
)
|
||||
|
||||
# Add to current phrase
|
||||
self.current_phrase_audio = np.concatenate(
|
||||
[self.current_phrase_audio, audio_item.audio]
|
||||
)
|
||||
|
||||
# Check if we should transcribe
|
||||
phrase_duration = len(self.current_phrase_audio) / self.sample_rate
|
||||
|
||||
if phrase_duration >= 1.0: # Transcribe every 1 second
|
||||
logger.info(f"Transcribing for {self.peer_name} (1s interval)")
|
||||
await self._transcribe_and_send(
|
||||
self.current_phrase_audio.copy(), is_final=False
|
||||
if not self.final_transcription_pending:
|
||||
self.current_phrase_audio = np.concatenate(
|
||||
[self.current_phrase_audio, audio_item.audio]
|
||||
)
|
||||
|
||||
# Check if we should transcribe
|
||||
phrase_duration = len(self.current_phrase_audio) / self.sample_rate
|
||||
|
||||
if phrase_duration >= 1.0: # Transcribe every 1 second
|
||||
logger.info(f"Transcribing for {self.peer_name} (1s interval)")
|
||||
await self._transcribe_and_send(
|
||||
self.current_phrase_audio.copy(), is_final=False
|
||||
)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# Check for final transcription on timeout
|
||||
if (
|
||||
len(self.current_phrase_audio) > 0
|
||||
and time.time() - self.last_audio_time > 2.0
|
||||
and time.time() - self.last_activity_time > 2.0
|
||||
):
|
||||
logger.info(
|
||||
f"Final transcription timeout for {self.peer_name} (asyncio.TimeoutError)"
|
||||
@ -1308,6 +1312,7 @@ class OptimizedAudioProcessor:
|
||||
self.current_phrase_audio.copy(), is_final=True
|
||||
)
|
||||
self.current_phrase_audio = np.array([], dtype=np.float32)
|
||||
self.final_transcription_pending = False # Reset the flag
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error in async processing loop for {self.peer_name}: {e}"
|
||||
@ -1325,30 +1330,31 @@ class OptimizedAudioProcessor:
|
||||
audio_item = self._threading_queue.get(timeout=1.0)
|
||||
|
||||
# Add to current phrase
|
||||
self.current_phrase_audio = np.concatenate(
|
||||
[self.current_phrase_audio, audio_item.audio]
|
||||
)
|
||||
if not self.final_transcription_pending:
|
||||
self.current_phrase_audio = np.concatenate(
|
||||
[self.current_phrase_audio, audio_item.audio]
|
||||
)
|
||||
|
||||
# Check if we should transcribe
|
||||
phrase_duration = len(self.current_phrase_audio) / self.sample_rate
|
||||
# Check if we should transcribe
|
||||
phrase_duration = len(self.current_phrase_audio) / self.sample_rate
|
||||
|
||||
if phrase_duration >= 1.0:
|
||||
if self.main_loop:
|
||||
logger.info(
|
||||
f"Transcribing from thread for {self.peer_name} (_thread_processing_loop > 1s interval)"
|
||||
)
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self._transcribe_and_send(
|
||||
self.current_phrase_audio.copy(), is_final=False
|
||||
),
|
||||
self.main_loop,
|
||||
)
|
||||
if phrase_duration >= 1.0:
|
||||
if self.main_loop:
|
||||
logger.info(
|
||||
f"Transcribing from thread for {self.peer_name} (_thread_processing_loop > 1s interval)"
|
||||
)
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self._transcribe_and_send(
|
||||
self.current_phrase_audio.copy(), is_final=False
|
||||
),
|
||||
self.main_loop,
|
||||
)
|
||||
|
||||
except Empty:
|
||||
# Check for final transcription
|
||||
if (
|
||||
len(self.current_phrase_audio) > 0
|
||||
and time.time() - self.last_audio_time > 2.0
|
||||
and time.time() - self.last_activity_time > 2.0
|
||||
):
|
||||
if self.main_loop:
|
||||
logger.info(
|
||||
@ -1361,6 +1367,7 @@ class OptimizedAudioProcessor:
|
||||
self.main_loop,
|
||||
)
|
||||
self.current_phrase_audio = np.array([], dtype=np.float32)
|
||||
self.final_transcription_pending = False # Reset the flag
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error in thread processing loop for {self.peer_name}: {e}"
|
||||
@ -1380,6 +1387,10 @@ class OptimizedAudioProcessor:
|
||||
if audio_array.ndim != 1:
|
||||
raise ValueError("Expected mono audio as a 1D numpy array.")
|
||||
|
||||
# Reset the flag for final transcriptions to allow new processing
|
||||
if is_final:
|
||||
self.final_transcription_pending = False
|
||||
|
||||
transcription_start = time.time()
|
||||
transcription_type = "final" if is_final else "streaming"
|
||||
|
||||
@ -2047,7 +2058,396 @@ def _resample_audio(
|
||||
|
||||
# Public API functions
|
||||
def agent_info() -> Dict[str, str]:
|
||||
return {"name": AGENT_NAME, "description": AGENT_DESCRIPTION, "has_media": "true"}
|
||||
return {"name": AGENT_NAME, "description": AGENT_DESCRIPTION, "has_media": "true", "configurable": "true"}
|
||||
|
||||
|
||||
def get_config_schema() -> Dict[str, Any]:
|
||||
"""Get the configuration schema for the Whisper bot"""
|
||||
return {
|
||||
"bot_name": AGENT_NAME,
|
||||
"version": "1.0",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
"type": "select",
|
||||
"label": "Whisper Model",
|
||||
"description": "The Whisper model to use for transcription",
|
||||
"default_value": _model_id,
|
||||
"required": True,
|
||||
"options": [
|
||||
{"value": "distil-whisper/distil-large-v2", "label": "Distil-Whisper Large v2 (Fast)"},
|
||||
{"value": "distil-whisper/distil-medium.en", "label": "Distil-Whisper Medium EN"},
|
||||
{"value": "distil-whisper/distil-small.en", "label": "Distil-Whisper Small EN"},
|
||||
{"value": "openai/whisper-large-v3", "label": "OpenAI Whisper Large v3"},
|
||||
{"value": "openai/whisper-large-v2", "label": "OpenAI Whisper Large v2"},
|
||||
{"value": "openai/whisper-large", "label": "OpenAI Whisper Large"},
|
||||
{"value": "openai/whisper-medium", "label": "OpenAI Whisper Medium"},
|
||||
{"value": "openai/whisper-small", "label": "OpenAI Whisper Small"},
|
||||
{"value": "openai/whisper-base", "label": "OpenAI Whisper Base"},
|
||||
{"value": "openai/whisper-tiny", "label": "OpenAI Whisper Tiny"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "device",
|
||||
"type": "select",
|
||||
"label": "Inference Device",
|
||||
"description": "The device to run inference on",
|
||||
"default_value": _device,
|
||||
"required": True,
|
||||
"options": [
|
||||
{"value": "GPU.1", "label": "Intel Arc GPU (GPU.1)"},
|
||||
{"value": "GPU", "label": "GPU"},
|
||||
{"value": "CPU", "label": "CPU"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "enable_quantization",
|
||||
"type": "boolean",
|
||||
"label": "Enable Quantization",
|
||||
"description": "Enable INT8 quantization for faster inference",
|
||||
"default_value": _ov_config.enable_quantization,
|
||||
"required": False
|
||||
},
|
||||
{
|
||||
"name": "throughput_streams",
|
||||
"type": "range",
|
||||
"label": "Throughput Streams",
|
||||
"description": "Number of parallel inference streams",
|
||||
"default_value": _ov_config.throughput_streams,
|
||||
"required": False,
|
||||
"min_value": 1,
|
||||
"max_value": 8,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"name": "max_threads",
|
||||
"type": "range",
|
||||
"label": "Max CPU Threads",
|
||||
"description": "Maximum number of CPU threads for inference",
|
||||
"default_value": _ov_config.max_threads,
|
||||
"required": False,
|
||||
"min_value": 1,
|
||||
"max_value": 16,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"name": "sample_rate",
|
||||
"type": "number",
|
||||
"label": "Sample Rate",
|
||||
"description": "Audio sample rate in Hz",
|
||||
"default_value": SAMPLE_RATE,
|
||||
"required": True,
|
||||
"min_value": 8000,
|
||||
"max_value": 48000
|
||||
},
|
||||
{
|
||||
"name": "chunk_duration_ms",
|
||||
"type": "range",
|
||||
"label": "Chunk Duration (ms)",
|
||||
"description": "Duration of audio chunks in milliseconds",
|
||||
"default_value": CHUNK_DURATION_MS,
|
||||
"required": False,
|
||||
"min_value": 50,
|
||||
"max_value": 500,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"name": "vad_threshold",
|
||||
"type": "range",
|
||||
"label": "VAD Threshold",
|
||||
"description": "Voice activity detection threshold",
|
||||
"default_value": VAD_THRESHOLD,
|
||||
"required": False,
|
||||
"min_value": 0.001,
|
||||
"max_value": 0.1,
|
||||
"step": 0.001
|
||||
},
|
||||
{
|
||||
"name": "max_silence_frames",
|
||||
"type": "range",
|
||||
"label": "Max Silence Frames",
|
||||
"description": "Maximum frames of silence before stopping",
|
||||
"default_value": MAX_SILENCE_FRAMES,
|
||||
"required": False,
|
||||
"min_value": 10,
|
||||
"max_value": 100,
|
||||
"step": 5
|
||||
},
|
||||
{
|
||||
"name": "max_trailing_silence_frames",
|
||||
"type": "range",
|
||||
"label": "Max Trailing Silence Frames",
|
||||
"description": "Maximum trailing silence frames to include",
|
||||
"default_value": MAX_TRAILING_SILENCE_FRAMES,
|
||||
"required": False,
|
||||
"min_value": 1,
|
||||
"max_value": 20,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"name": "vad_energy_threshold",
|
||||
"type": "range",
|
||||
"label": "VAD Energy Threshold",
|
||||
"description": "Energy threshold for voice activity detection",
|
||||
"default_value": 0.005,
|
||||
"required": False,
|
||||
"min_value": 0.001,
|
||||
"max_value": 0.05,
|
||||
"step": 0.001
|
||||
},
|
||||
{
|
||||
"name": "vad_zcr_min",
|
||||
"type": "range",
|
||||
"label": "VAD ZCR Min",
|
||||
"description": "Minimum zero-crossing rate for speech",
|
||||
"default_value": 0.02,
|
||||
"required": False,
|
||||
"min_value": 0.01,
|
||||
"max_value": 0.5,
|
||||
"step": 0.01
|
||||
},
|
||||
{
|
||||
"name": "vad_zcr_max",
|
||||
"type": "range",
|
||||
"label": "VAD ZCR Max",
|
||||
"description": "Maximum zero-crossing rate for speech",
|
||||
"default_value": 0.8,
|
||||
"required": False,
|
||||
"min_value": 0.1,
|
||||
"max_value": 1.0,
|
||||
"step": 0.05
|
||||
},
|
||||
{
|
||||
"name": "vad_spectral_centroid_min",
|
||||
"type": "range",
|
||||
"label": "VAD Spectral Centroid Min",
|
||||
"description": "Minimum spectral centroid for speech",
|
||||
"default_value": 200,
|
||||
"required": False,
|
||||
"min_value": 50,
|
||||
"max_value": 1000,
|
||||
"step": 50
|
||||
},
|
||||
{
|
||||
"name": "vad_spectral_centroid_max",
|
||||
"type": "range",
|
||||
"label": "VAD Spectral Centroid Max",
|
||||
"description": "Maximum spectral centroid for speech",
|
||||
"default_value": 4000,
|
||||
"required": False,
|
||||
"min_value": 1000,
|
||||
"max_value": 8000,
|
||||
"step": 500
|
||||
},
|
||||
{
|
||||
"name": "vad_spectral_rolloff_threshold",
|
||||
"type": "range",
|
||||
"label": "VAD Spectral Rolloff Threshold",
|
||||
"description": "Spectral rolloff threshold for speech detection",
|
||||
"default_value": 3000,
|
||||
"required": False,
|
||||
"min_value": 1000,
|
||||
"max_value": 10000,
|
||||
"step": 500
|
||||
},
|
||||
{
|
||||
"name": "vad_minimum_duration",
|
||||
"type": "range",
|
||||
"label": "VAD Minimum Duration",
|
||||
"description": "Minimum duration for speech segments",
|
||||
"default_value": 0.2,
|
||||
"required": False,
|
||||
"min_value": 0.1,
|
||||
"max_value": 1.0,
|
||||
"step": 0.1
|
||||
},
|
||||
{
|
||||
"name": "vad_max_history",
|
||||
"type": "range",
|
||||
"label": "VAD Max History",
|
||||
"description": "Maximum history frames for temporal consistency",
|
||||
"default_value": 8,
|
||||
"required": False,
|
||||
"min_value": 4,
|
||||
"max_value": 20,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"name": "vad_noise_floor_energy",
|
||||
"type": "range",
|
||||
"label": "VAD Noise Floor Energy",
|
||||
"description": "Initial noise floor energy level",
|
||||
"default_value": 0.001,
|
||||
"required": False,
|
||||
"min_value": 0.0001,
|
||||
"max_value": 0.01,
|
||||
"step": 0.0001
|
||||
},
|
||||
{
|
||||
"name": "vad_adaptation_rate",
|
||||
"type": "range",
|
||||
"label": "VAD Adaptation Rate",
|
||||
"description": "Rate of noise floor adaptation",
|
||||
"default_value": 0.05,
|
||||
"required": False,
|
||||
"min_value": 0.01,
|
||||
"max_value": 0.2,
|
||||
"step": 0.01
|
||||
},
|
||||
{
|
||||
"name": "vad_harmonic_threshold",
|
||||
"type": "range",
|
||||
"label": "VAD Harmonic Threshold",
|
||||
"description": "Threshold for harmonic content detection",
|
||||
"default_value": 0.15,
|
||||
"required": False,
|
||||
"min_value": 0.05,
|
||||
"max_value": 0.5,
|
||||
"step": 0.05
|
||||
}
|
||||
],
|
||||
"categories": [
|
||||
{"Model Settings": ["model_id", "device", "enable_quantization"]},
|
||||
{"Performance Settings": ["throughput_streams", "max_threads"]},
|
||||
{"Audio Settings": ["sample_rate", "chunk_duration_ms"]},
|
||||
{"Voice Activity Detection": ["vad_threshold", "max_silence_frames", "max_trailing_silence_frames", "vad_energy_threshold", "vad_zcr_min", "vad_zcr_max", "vad_spectral_centroid_min", "vad_spectral_centroid_max", "vad_spectral_rolloff_threshold", "vad_minimum_duration", "vad_max_history", "vad_noise_floor_energy", "vad_adaptation_rate", "vad_harmonic_threshold"]}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def handle_config_update(lobby_id: str, config_values: Dict[str, Any]) -> bool:
|
||||
"""Handle configuration update for a specific lobby"""
|
||||
global _model_id, _device, _ov_config, SAMPLE_RATE, CHUNK_DURATION_MS, VAD_THRESHOLD
|
||||
global MAX_SILENCE_FRAMES, MAX_TRAILING_SILENCE_FRAMES
|
||||
|
||||
try:
|
||||
logger.info(f"Updating Whisper config for lobby {lobby_id}: {config_values}")
|
||||
|
||||
config_applied = False
|
||||
|
||||
# Update model configuration
|
||||
if "model_id" in config_values:
|
||||
new_model_id = config_values["model_id"]
|
||||
if new_model_id in [model for models in model_ids.values() for model in models]:
|
||||
_model_id = new_model_id
|
||||
config_applied = True
|
||||
logger.info(f"Updated model_id to: {_model_id}")
|
||||
else:
|
||||
logger.warning(f"Invalid model_id: {new_model_id}")
|
||||
|
||||
# Update device configuration
|
||||
if "device" in config_values:
|
||||
new_device = config_values["device"]
|
||||
available_devices = [d["name"] for d in get_available_devices()]
|
||||
if new_device in available_devices or new_device in ["CPU", "GPU", "GPU.1"]:
|
||||
_device = new_device
|
||||
_ov_config.device = new_device
|
||||
config_applied = True
|
||||
logger.info(f"Updated device to: {_device}")
|
||||
else:
|
||||
logger.warning(f"Invalid device: {new_device}, available: {available_devices}")
|
||||
|
||||
# Update OpenVINO configuration
|
||||
if "enable_quantization" in config_values:
|
||||
_ov_config.enable_quantization = bool(config_values["enable_quantization"])
|
||||
config_applied = True
|
||||
logger.info(f"Updated quantization to: {_ov_config.enable_quantization}")
|
||||
|
||||
if "throughput_streams" in config_values:
|
||||
streams = int(config_values["throughput_streams"])
|
||||
if 1 <= streams <= 8:
|
||||
_ov_config.throughput_streams = streams
|
||||
config_applied = True
|
||||
logger.info(f"Updated throughput_streams to: {_ov_config.throughput_streams}")
|
||||
|
||||
if "max_threads" in config_values:
|
||||
threads = int(config_values["max_threads"])
|
||||
if 1 <= threads <= 16:
|
||||
_ov_config.max_threads = threads
|
||||
config_applied = True
|
||||
logger.info(f"Updated max_threads to: {_ov_config.max_threads}")
|
||||
|
||||
# Update audio processing parameters
|
||||
if "sample_rate" in config_values:
|
||||
rate = int(config_values["sample_rate"])
|
||||
if 8000 <= rate <= 48000:
|
||||
SAMPLE_RATE = rate
|
||||
config_applied = True
|
||||
logger.info(f"Updated sample_rate to: {SAMPLE_RATE}")
|
||||
|
||||
if "chunk_duration_ms" in config_values:
|
||||
duration = int(config_values["chunk_duration_ms"])
|
||||
if 50 <= duration <= 500:
|
||||
CHUNK_DURATION_MS = duration
|
||||
config_applied = True
|
||||
logger.info(f"Updated chunk_duration_ms to: {CHUNK_DURATION_MS}")
|
||||
|
||||
if "vad_threshold" in config_values:
|
||||
threshold = float(config_values["vad_threshold"])
|
||||
if 0.001 <= threshold <= 0.1:
|
||||
VAD_THRESHOLD = threshold
|
||||
config_applied = True
|
||||
logger.info(f"Updated vad_threshold to: {VAD_THRESHOLD}")
|
||||
|
||||
if "max_silence_frames" in config_values:
|
||||
frames = int(config_values["max_silence_frames"])
|
||||
if 10 <= frames <= 100:
|
||||
MAX_SILENCE_FRAMES = frames
|
||||
config_applied = True
|
||||
logger.info(f"Updated max_silence_frames to: {MAX_SILENCE_FRAMES}")
|
||||
|
||||
if "max_trailing_silence_frames" in config_values:
|
||||
frames = int(config_values["max_trailing_silence_frames"])
|
||||
if 1 <= frames <= 20:
|
||||
MAX_TRAILING_SILENCE_FRAMES = frames
|
||||
config_applied = True
|
||||
logger.info(f"Updated max_trailing_silence_frames to: {MAX_TRAILING_SILENCE_FRAMES}")
|
||||
|
||||
# Update VAD configuration (this would require updating existing processors)
|
||||
vad_updates = {}
|
||||
if "vad_energy_threshold" in config_values:
|
||||
vad_updates["energy_threshold"] = float(config_values["vad_energy_threshold"])
|
||||
if "vad_zcr_min" in config_values:
|
||||
vad_updates["zcr_min"] = float(config_values["vad_zcr_min"])
|
||||
if "vad_zcr_max" in config_values:
|
||||
vad_updates["zcr_max"] = float(config_values["vad_zcr_max"])
|
||||
if "vad_spectral_centroid_min" in config_values:
|
||||
vad_updates["spectral_centroid_min"] = float(config_values["vad_spectral_centroid_min"])
|
||||
if "vad_spectral_centroid_max" in config_values:
|
||||
vad_updates["spectral_centroid_max"] = float(config_values["vad_spectral_centroid_max"])
|
||||
if "vad_spectral_rolloff_threshold" in config_values:
|
||||
vad_updates["spectral_rolloff_threshold"] = float(config_values["vad_spectral_rolloff_threshold"])
|
||||
if "vad_minimum_duration" in config_values:
|
||||
vad_updates["minimum_duration"] = float(config_values["vad_minimum_duration"])
|
||||
if "vad_max_history" in config_values:
|
||||
vad_updates["max_history"] = int(config_values["vad_max_history"])
|
||||
if "vad_noise_floor_energy" in config_values:
|
||||
vad_updates["noise_floor_energy"] = float(config_values["vad_noise_floor_energy"])
|
||||
if "vad_adaptation_rate" in config_values:
|
||||
vad_updates["adaptation_rate"] = float(config_values["vad_adaptation_rate"])
|
||||
if "vad_harmonic_threshold" in config_values:
|
||||
vad_updates["harmonic_threshold"] = float(config_values["vad_harmonic_threshold"])
|
||||
|
||||
if vad_updates:
|
||||
# Update VAD_CONFIG global
|
||||
VAD_CONFIG.update(vad_updates)
|
||||
config_applied = True
|
||||
logger.info(f"Updated VAD config: {vad_updates}")
|
||||
|
||||
# Note: Existing processors would need to be recreated to pick up VAD changes
|
||||
# For now, we'll log that a restart may be needed
|
||||
logger.info("VAD configuration updated - existing processors may need restart to take effect")
|
||||
|
||||
if config_applied:
|
||||
logger.info(f"Configuration update completed for lobby {lobby_id}")
|
||||
else:
|
||||
logger.warning(f"No valid configuration changes applied for lobby {lobby_id}")
|
||||
|
||||
return config_applied
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to apply Whisper config update: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def create_agent_tracks(session_name: str) -> Dict[str, MediaStreamTrack]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user