diff --git a/client/src/MediaControl.tsx b/client/src/MediaControl.tsx index 7a0561c..23d7925 100644 --- a/client/src/MediaControl.tsx +++ b/client/src/MediaControl.tsx @@ -762,10 +762,14 @@ const MediaAgent = (props: MediaAgentProps) => { if (localPeer) { if (t.kind === "audio") { t.enabled = !localPeer.muted; - console.log(`media-agent - addPeer:${peer.peer_name} Audio track ${t.id} enabled: ${t.enabled} (local muted: ${localPeer.muted})`); + console.log( + `media-agent - addPeer:${peer.peer_name} Audio track ${t.id} enabled: ${t.enabled} (local muted: ${localPeer.muted})` + ); } else if (t.kind === "video") { t.enabled = localPeer.video_on; - console.log(`media-agent - addPeer:${peer.peer_name} Video track ${t.id} enabled: ${t.enabled} (local video_on: ${localPeer.video_on})`); + console.log( + `media-agent - addPeer:${peer.peer_name} Video track ${t.id} enabled: ${t.enabled} (local video_on: ${localPeer.video_on})` + ); } } @@ -778,14 +782,39 @@ const MediaAgent = (props: MediaAgentProps) => { id: t.id, }); - // Enable tracks for bots that need audio/video input (whisper, synthetic media, etc.) + // Do NOT force-enable the original local tracks for bot peers. + // For privacy and principle of least surprise, respect the local user's + // muted/video state. If a bot is added and needs media, mark a pending + // consent on the local peer so the UI can prompt the user. if (peer.peer_name.includes("-bot")) { - if (t.kind === "audio" || t.kind === "video") { - t.enabled = true; - console.log(`media-agent - addPeer:${peer.peer_name} Force enabled ${t.kind} track for bot`); + try { + const localPeerId = session.id; + if (updatedPeers[localPeerId]) { + const existingPending = + (updatedPeers[localPeerId].attributes && updatedPeers[localPeerId].attributes.pendingBotConsent) || []; + const botName = peer.peer_name; + if (!existingPending.includes(botName)) { + const newPending = [...existingPending, botName]; + updatedPeers[localPeerId] = { + ...updatedPeers[localPeerId], + attributes: { + ...updatedPeers[localPeerId].attributes, + pendingBotConsent: newPending, + }, + }; + console.log( + `media-agent - addPeer:${peer.peer_name} Marked pending bot consent for local user:`, + newPending + ); + } + } + } catch (e) { + console.warn(`media-agent - addPeer: failed to mark pending bot consent`, e); } } + // Add the existing track to the connection and rely on the track.enabled + // value which was set above based on the local peer's muted/video state. connection.addTrack(t, media); }); } else if (!localUserHasMedia) { @@ -992,6 +1021,17 @@ const MediaAgent = (props: MediaAgentProps) => { updatedPeers[peer_id].connection = undefined; } + // Also clear any pending bot consent references for this peer name on the local peer + try { + const localPeer = updatedPeers[session.id]; + if (localPeer && localPeer.attributes && localPeer.attributes.pendingBotConsent) { + const pending = localPeer.attributes.pendingBotConsent.filter((b: string) => b !== peer_name); + localPeer.attributes.pendingBotConsent = pending; + } + } catch (e) { + // session may not be available in this closure; ignore safely + } + setPeers(updatedPeers); }, [peers, setPeers] @@ -1339,7 +1379,10 @@ const MediaControl: React.FC = ({ isSelf, peer, className }) const [isDragging, setIsDragging] = useState(false); useEffect(() => { - console.log(`media-agent - MediaControl mounted for peer ${peer?.peer_name}, local=${peer?.local}, hasSrcObject=${!!peer?.attributes?.srcObject}`); + console.log( + `media-agent - MediaControl mounted for peer ${peer?.peer_name}, local=${peer?.local}, hasSrcObject=${!!peer + ?.attributes?.srcObject}` + ); if (!peer) return; console.log(`media-agent - MediaControl peer changed for ${peer.peer_name}, updating state`); setMuted(peer.muted); @@ -1435,7 +1478,10 @@ const MediaControl: React.FC = ({ isSelf, peer, className }) useEffect(() => { if (!peer || peer.dead || !peer.attributes?.srcObject) { - console.log(`media-agent - Audio track control: skipping for ${peer?.peer_name} (dead=${peer?.dead}, hasSrcObject=${!!peer?.attributes?.srcObject})`); + console.log( + `media-agent - Audio track control: skipping for ${peer?.peer_name} (dead=${peer?.dead}, hasSrcObject=${!!peer + ?.attributes?.srcObject})` + ); return; } @@ -1478,29 +1524,77 @@ const MediaControl: React.FC = ({ isSelf, peer, className }) } }, [peer?.session_id]); - const toggleMute = useCallback((e: React.MouseEvent | React.TouchEvent) => { - e.stopPropagation(); - if (peer) { - const newMutedState = !muted; - // Update local state first - setMuted(newMutedState); - // Update peer object (this should trigger re-renders in parent components) - peer.muted = newMutedState; - console.log(`media-agent - toggleMute: ${peer.peer_name} muted=${newMutedState}`); - } - }, [peer, muted]); + const toggleMute = useCallback( + (e: React.MouseEvent | React.TouchEvent) => { + e.stopPropagation(); + if (peer) { + const newMutedState = !muted; + // Update local state first + setMuted(newMutedState); + // Update peer object (this should trigger re-renders in parent components) + peer.muted = newMutedState; + console.log(`media-agent - toggleMute: ${peer.peer_name} muted=${newMutedState}`); + } + }, + [peer, muted] + ); - const toggleVideo = useCallback((e: React.MouseEvent | React.TouchEvent) => { - e.stopPropagation(); - if (peer) { - const newVideoState = !videoOn; - // Update local state first - setVideoOn(newVideoState); - // Update peer object (this should trigger re-renders in parent components) - peer.video_on = newVideoState; - console.log(`media-agent - toggleVideo: ${peer.peer_name} video_on=${newVideoState}`); - } - }, [peer, videoOn]); + const toggleVideo = useCallback( + (e: React.MouseEvent | React.TouchEvent) => { + e.stopPropagation(); + if (peer) { + const newVideoState = !videoOn; + // Update local state first + setVideoOn(newVideoState); + // Update peer object (this should trigger re-renders in parent components) + peer.video_on = newVideoState; + console.log(`media-agent - toggleVideo: ${peer.peer_name} video_on=${newVideoState}`); + } + }, + [peer, videoOn] + ); + + // Handlers for bot consent prompts (local user only) + const handleAllowBot = useCallback( + (botName: string) => { + if (!peer || !peer.local) return; + + // Clear pending for this bot and unmute/share media + const pending = (peer.attributes && peer.attributes.pendingBotConsent) || []; + const remaining = pending.filter((b: string) => b !== botName); + + peer.attributes = { + ...(peer.attributes || {}), + pendingBotConsent: remaining, + }; + + // Share: unmute and enable video (you might want different behaviour) + setMuted(false); + setVideoOn(true); + peer.muted = false; + peer.video_on = true; + + console.log(`media-agent - User allowed ${botName} to receive media`); + // Note: MediaAgent.addPeer uses the current media.getTracks().enabled state when adding tracks; + // existing connections won't be retroactively changed here. For a nicer UX we could trigger + // renegotiation for that bot connection, but keep this minimal for now. + }, + [peer] + ); + + const handleDenyBot = useCallback( + (botName: string) => { + if (!peer || !peer.local) return; + const pending = (peer.attributes && peer.attributes.pendingBotConsent) || []; + const remaining = pending.filter((b: string) => b !== botName); + peer.attributes = { + ...(peer.attributes || {}), + pendingBotConsent: remaining, + }; + console.log(`media-agent - User denied ${botName} access to media`); + }, + [peer] + ); // Snap-back functionality const checkSnapBack = (x: number, y: number) => { @@ -1606,6 +1700,44 @@ const MediaControl: React.FC = ({ isSelf, peer, className }) muted={peer.local || muted} /> + {/* If this is the local user's UI and there are pending bot consent requests, show them */} + {peer.local && peer.attributes?.pendingBotConsent && peer.attributes.pendingBotConsent.length > 0 && ( +
+
Bots requesting media access:
+ {peer.attributes.pendingBotConsent.map((bot: string) => ( +
+
{bot}
+ + +
+ ))} +
+ )} ) ) : ( diff --git a/voicebot/bots/whisper.py b/voicebot/bots/whisper.py index a4a1172..d2627d7 100644 --- a/voicebot/bots/whisper.py +++ b/voicebot/bots/whisper.py @@ -1318,6 +1318,15 @@ class OptimizedAudioProcessor: f"Error in async processing loop for {self.peer_name}: {e}" ) + # Final transcription for any remaining audio + if len(self.current_phrase_audio) > 0 and not self.final_transcription_pending: + logger.info(f"Final transcription for remaining audio in async loop for {self.peer_name}") + await self._transcribe_and_send( + self.current_phrase_audio.copy(), is_final=True + ) + self.current_phrase_audio = np.array([], dtype=np.float32) + self.final_transcription_pending = False + logger.info(f"Async processing loop ended for {self.peer_name}") def _thread_processing_loop(self) -> None: @@ -1373,6 +1382,19 @@ class OptimizedAudioProcessor: f"Error in thread processing loop for {self.peer_name}: {e}" ) + # Final transcription for any remaining audio + if len(self.current_phrase_audio) > 0 and not self.final_transcription_pending: + if self.main_loop: + logger.info(f"Final transcription for remaining audio in thread loop for {self.peer_name}") + asyncio.run_coroutine_threadsafe( + self._transcribe_and_send( + self.current_phrase_audio.copy(), is_final=True + ), + self.main_loop, + ) + self.current_phrase_audio = np.array([], dtype=np.float32) + self.final_transcription_pending = False + async def _transcribe_and_send( self, audio_array: AudioArray, is_final: bool, language: str = "en" ) -> None: @@ -1858,28 +1880,35 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack) -> None: # Start background task to load model and create processor async def init_processor(): global _model_loading_status, _model_loading_progress - # Load model asynchronously to avoid blocking frame reading - _model_loading_status = "Initializing model loading..." - _model_loading_progress = 0.0 + try: + # Load model asynchronously to avoid blocking frame reading + _model_loading_status = "Initializing model loading..." + _model_loading_progress = 0.0 - loop = asyncio.get_event_loop() - await loop.run_in_executor(None, _ensure_model_loaded) + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, _ensure_model_loaded) - _model_loading_status = "Model loaded, creating processor..." - _model_loading_progress = 0.8 + _model_loading_status = "Model loaded, creating processor..." + _model_loading_progress = 0.8 - logger.info(f"Creating OptimizedAudioProcessor for {peer.peer_name}") - if _send_chat_func is None or _create_chat_message_func is None: - logger.error(f"No send function available for {peer.peer_name}") - _model_loading_status = "Error: No send function available" - return - _audio_processors[peer.peer_name] = OptimizedAudioProcessor( - peer_name=peer.peer_name, send_chat_func=_send_chat_func, create_chat_message_func=_create_chat_message_func - ) + logger.info(f"Creating OptimizedAudioProcessor for {peer.peer_name}") + if _send_chat_func is None or _create_chat_message_func is None: + logger.error(f"No send function available for {peer.peer_name}") + _model_loading_status = "Error: No send function available" + _model_loading_progress = 1.0 # Hide progress bar on error + return + _audio_processors[peer.peer_name] = OptimizedAudioProcessor( + peer_name=peer.peer_name, send_chat_func=_send_chat_func, create_chat_message_func=_create_chat_message_func + ) - _model_loading_status = "Ready for transcription" - _model_loading_progress = 1.0 - logger.info(f"OptimizedAudioProcessor ready for {peer.peer_name}") + _model_loading_status = "Ready for transcription" + _model_loading_progress = 1.0 + logger.info(f"OptimizedAudioProcessor ready for {peer.peer_name}") + + except Exception as e: + logger.error(f"Failed to initialize processor for {peer.peer_name}: {e}") + _model_loading_status = f"Error: {str(e)[:50]}..." + _model_loading_progress = 1.0 # Hide progress bar on error if peer.peer_name not in _audio_processors: if _send_chat_func is None or _create_chat_message_func is None: