This commit is contained in:
James Ketr 2025-09-08 13:02:57 -07:00
parent 09ed1ec447
commit 08f2583ebc
37 changed files with 269 additions and 290 deletions

View File

@ -13,7 +13,7 @@ import WebRTCStatus from "./WebRTCStatus";
const debug = true; const debug = true;
// When true, do not send host candidates to the signaling server. Keeps TURN relays preferred. // When true, do not send host candidates to the signaling server. Keeps TURN relays preferred.
const FILTER_HOST_CANDIDATES = true; const FILTER_HOST_CANDIDATES = false; // Temporarily disabled to test direct connections
/* ---------- Synthetic Tracks Helpers ---------- */ /* ---------- Synthetic Tracks Helpers ---------- */
@ -319,6 +319,9 @@ const MediaAgent = (props: MediaAgentProps) => {
const [media, setMedia] = useState<MediaStream | null>(null); const [media, setMedia] = useState<MediaStream | null>(null);
const [pendingPeers, setPendingPeers] = useState<AddPeerConfig[]>([]); const [pendingPeers, setPendingPeers] = useState<AddPeerConfig[]>([]);
// Extract has_media value to avoid unnecessary effect re-runs when session object changes
const localUserHasMedia = session?.has_media !== false; // Default to true for backward compatibility
// Refs for singleton resources // Refs for singleton resources
const mediaStreamRef = useRef<MediaStream | null>(null); const mediaStreamRef = useRef<MediaStream | null>(null);
const connectionsRef = useRef<Map<string, RTCPeerConnection>>(new Map()); const connectionsRef = useRef<Map<string, RTCPeerConnection>>(new Map());
@ -454,7 +457,7 @@ const MediaAgent = (props: MediaAgentProps) => {
// Create RTCPeerConnection // Create RTCPeerConnection
const connection = new RTCPeerConnection({ const connection = new RTCPeerConnection({
iceTransportPolicy: "relay", iceTransportPolicy: "all", // Allow both direct and relay connections
iceServers: [ iceServers: [
{ urls: "stun:ketrenos.com:3478" }, { urls: "stun:ketrenos.com:3478" },
{ {
@ -462,6 +465,9 @@ const MediaAgent = (props: MediaAgentProps) => {
username: "ketra", username: "ketra",
credential: "ketran", credential: "ketran",
}, },
// DO NOT add Google's public STUN server as fallback; if ketrenos.com STUN/TURN fails,
// this is an infrastructure failure that must be resolved (not silently bypassed).
// { urls: "stun:stun.l.google.com:19302" },
], ],
}); });
@ -552,13 +558,42 @@ const MediaAgent = (props: MediaAgentProps) => {
updatePeerConnectionState(peer_id, connection.connectionState); updatePeerConnectionState(peer_id, connection.connectionState);
if (connection.connectionState === "failed") { if (connection.connectionState === "failed") {
console.error(`media-agent - addPeer:${peer.peer_name} Connection failed for`, peer.peer_name);
// Immediate cleanup of failed connection
connectionsRef.current.delete(peer_id);
makingOfferRef.current.delete(peer_id);
isNegotiatingRef.current.delete(peer_id);
initiatedOfferRef.current.delete(peer_id);
// Clean up the peer from state
setPeers((prevPeers) => {
const updated = { ...prevPeers };
delete updated[peer_id];
return updated;
});
// Close the connection
try {
connection.close();
} catch (e) {
console.warn(`media-agent - Error closing failed connection:`, e);
}
} else if (connection.connectionState === "disconnected") {
console.warn(`media-agent - addPeer:${peer.peer_name} Connection disconnected for`, peer.peer_name, "- may recover");
// Set a timeout for disconnected state recovery
setTimeout(() => { setTimeout(() => {
if (connection.connectionState === "failed") { if (connection.connectionState === "disconnected" || connection.connectionState === "failed") {
console.log(`media-agent - addPeer:${peer.peer_name} Cleaning up failed connection for`, peer.peer_name); console.log(`media-agent - addPeer:${peer.peer_name} Connection did not recover, cleaning up`);
connectionsRef.current.delete(peer_id); connectionsRef.current.delete(peer_id);
// You might want to trigger a reconnection attempt here
} }
}, 5000); }, 10000); // Give 10 seconds for recovery
} else if (connection.connectionState === "connected") {
console.log(`media-agent - addPeer:${peer.peer_name} Connection established successfully`);
// Clear any negotiation flags on successful connection
isNegotiatingRef.current.set(peer_id, false);
makingOfferRef.current.set(peer_id, false);
} }
}); });
@ -677,8 +712,36 @@ const MediaAgent = (props: MediaAgentProps) => {
connection.iceConnectionState, connection.iceConnectionState,
event event
); );
if (connection.iceConnectionState === "failed") { if (connection.iceConnectionState === "failed") {
console.log("media-agent - ICE connection failed for", peer.peer_name); console.error(`media-agent - ICE connection failed for ${peer.peer_name}`);
// Log ICE candidate gathering stats for debugging
connection
.getStats()
.then((stats) => {
const candidateStats: any[] = [];
stats.forEach((report) => {
if (report.type === "local-candidate" || report.type === "remote-candidate") {
candidateStats.push({
type: report.type,
candidateType: (report as any).candidateType,
protocol: (report as any).protocol,
address: (report as any).address,
port: (report as any).port,
});
}
});
console.error(`media-agent - ICE candidates for failed connection to ${peer.peer_name}:`, candidateStats);
})
.catch((e) => console.log("Could not get stats:", e));
} else if (connection.iceConnectionState === "disconnected") {
console.warn(`media-agent - ICE connection disconnected for ${peer.peer_name}, may recover`);
} else if (connection.iceConnectionState === "connected") {
console.log(`media-agent - ICE connection established successfully for ${peer.peer_name}`);
} else if (connection.iceConnectionState === "checking") {
console.log(`media-agent - ICE connection checking for ${peer.peer_name}`);
} else if (connection.iceConnectionState === "completed") {
console.log(`media-agent - ICE connection completed for ${peer.peer_name}`);
} }
}; };
@ -746,7 +809,7 @@ const MediaAgent = (props: MediaAgentProps) => {
} }
} }
}, },
[peers, setPeers, media, sendJsonMessage, session?.has_media, updatePeerConnectionState] [peers, setPeers, media, sendJsonMessage, localUserHasMedia, updatePeerConnectionState]
); );
// Process queued peers when media becomes available // Process queued peers when media becomes available
@ -1174,8 +1237,6 @@ const MediaAgent = (props: MediaAgentProps) => {
useEffect(() => { useEffect(() => {
mountedRef.current = true; mountedRef.current = true;
const localUserHasMedia = session?.has_media !== false; // Default to true for backward compatibility
if (mediaStreamRef.current || readyState !== ReadyState.OPEN) return; if (mediaStreamRef.current || readyState !== ReadyState.OPEN) return;
// Capture the connections at effect setup time // Capture the connections at effect setup time
@ -1222,7 +1283,7 @@ const MediaAgent = (props: MediaAgentProps) => {
connectionsToCleanup.forEach((connection) => connection.close()); connectionsToCleanup.forEach((connection) => connection.close());
connectionsToCleanup.clear(); connectionsToCleanup.clear();
}; };
}, [readyState, setup_local_media, session]); }, [readyState, setup_local_media, localUserHasMedia]);
return null; return null;
}; };

View File

@ -19,7 +19,7 @@ from shared.models import (
AdminMetricsResponse, AdminMetricsResponse,
AdminMetricsConfig, AdminMetricsConfig,
) )
from logger import logger from shared.logger import logger
if TYPE_CHECKING: if TYPE_CHECKING:
from core.session_manager import SessionManager, SessionConfig from core.session_manager import SessionManager, SessionConfig

View File

@ -9,7 +9,7 @@ from typing import Dict, Any
from fastapi import APIRouter, HTTPException, BackgroundTasks, WebSocket from fastapi import APIRouter, HTTPException, BackgroundTasks, WebSocket
from core.bot_manager import BotManager from core.bot_manager import BotManager
from logger import logger from shared.logger import logger
from core.bot_config_manager import BotConfigManager from core.bot_config_manager import BotConfigManager
# Import WebSocket handler base class # Import WebSocket handler base class

View File

@ -20,7 +20,7 @@ from shared.models import (
ChatMessagesResponse, ChatMessagesResponse,
) )
from logger import logger from shared.logger import logger
if TYPE_CHECKING: if TYPE_CHECKING:
from ..core.session_manager import SessionManager from ..core.session_manager import SessionManager

View File

@ -18,7 +18,7 @@ from typing import Optional
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
from datetime import datetime from datetime import datetime
from logger import logger from shared.logger import logger
# Import monitoring components # Import monitoring components
try: try:

View File

@ -14,7 +14,7 @@ import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
from shared.models import SessionResponse, HealthResponse, LobbyModel from shared.models import SessionResponse, HealthResponse, LobbyModel
from logger import logger from shared.logger import logger
if TYPE_CHECKING: if TYPE_CHECKING:
from ..core.session_manager import SessionManager from ..core.session_manager import SessionManager

View File

@ -31,7 +31,7 @@ except ImportError:
"Cannot import shared.models. Ensure the project is run as a package or PYTHONPATH is set." "Cannot import shared.models. Ensure the project is run as a package or PYTHONPATH is set."
) )
from logger import logger from shared.logger import logger
class AuthManager: class AuthManager:

View File

@ -14,7 +14,7 @@ import httpx
from typing import Dict, List, Optional, Any from typing import Dict, List, Optional, Any
from pathlib import Path from pathlib import Path
from logger import logger from shared.logger import logger
# Import shared models # Import shared models
import sys import sys

View File

@ -9,7 +9,7 @@ import json
import asyncio import asyncio
from typing import Dict, List, Optional from typing import Dict, List, Optional
from pydantic import ValidationError from pydantic import ValidationError
from logger import logger from shared.logger import logger
# Import shared models # Import shared models
import sys import sys

View File

@ -20,7 +20,7 @@ from typing import Any, Dict, Optional, Callable, TypeVar
from collections import OrderedDict from collections import OrderedDict
from dataclasses import dataclass from dataclasses import dataclass
from logger import logger from shared.logger import logger
T = TypeVar('T') T = TypeVar('T')

View File

@ -15,7 +15,7 @@ from functools import wraps
from dataclasses import dataclass from dataclasses import dataclass
from fastapi import WebSocket from fastapi import WebSocket
from logger import logger from shared.logger import logger
T = TypeVar('T') T = TypeVar('T')

View File

@ -18,7 +18,7 @@ from datetime import datetime, timedelta
from typing import Dict, Any, List, Optional, NamedTuple from typing import Dict, Any, List, Optional, NamedTuple
from enum import Enum from enum import Enum
from logger import logger from shared.logger import logger
class HealthStatus(Enum): class HealthStatus(Enum):

View File

@ -28,7 +28,7 @@ except ImportError:
f"Failed to import shared models: {e}. Ensure shared/models.py is accessible and PYTHONPATH is correctly set." f"Failed to import shared models: {e}. Ensure shared/models.py is accessible and PYTHONPATH is correctly set."
) )
from logger import logger from shared.logger import logger
# Use try/except for importing events to handle both relative and absolute imports # Use try/except for importing events to handle both relative and absolute imports
try: try:

View File

@ -22,7 +22,7 @@ from dataclasses import dataclass, field
from collections import defaultdict, deque from collections import defaultdict, deque
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from logger import logger from shared.logger import logger
@dataclass @dataclass

View File

@ -45,7 +45,7 @@ except ImportError:
) )
from core.lobby_manager import Lobby from core.lobby_manager import Lobby
from logger import logger from shared.logger import logger
# Import WebRTC signaling for peer management # Import WebRTC signaling for peer management
from websocket.webrtc_signaling import WebRTCSignalingHandlers from websocket.webrtc_signaling import WebRTCSignalingHandlers
@ -140,6 +140,10 @@ class Session:
self.bot_instance_id: Optional[str] = None # Bot instance ID for tracking self.bot_instance_id: Optional[str] = None # Bot instance ID for tracking
self.session_lock = threading.RLock() # Instance-level lock self.session_lock = threading.RLock() # Instance-level lock
def is_bot(self) -> bool:
"""Check if this session represents a bot"""
return bool(self.bot_run_id or self.bot_provider_id or self.bot_instance_id)
def getName(self) -> str: def getName(self) -> str:
with self.session_lock: with self.session_lock:
return f"{self.short}:{self.name if self.name else '[ ---- ]'}" return f"{self.short}:{self.name if self.name else '[ ---- ]'}"
@ -401,6 +405,10 @@ class SessionManager:
with self.lock: with self.lock:
sessions_list: List[SessionSaved] = [] sessions_list: List[SessionSaved] = []
for s in self._instances: for s in self._instances:
# Skip bot sessions - they should not be persisted
# Bot sessions are managed by the voicebot service lifecycle
if s.bot_instance_id is not None or s.bot_run_id is not None or s.bot_provider_id is not None:
continue
sessions_list.append(s.to_saved()) sessions_list.append(s.to_saved())
# Note: We'll need to handle name_passwords separately or inject it # Note: We'll need to handle name_passwords separately or inject it

View File

@ -1,81 +0,0 @@
import os
from typing import Any
import warnings
import logging
logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper()
class RelativePathFormatter(logging.Formatter):
def __init__(
self, fmt: Any = None, datefmt: Any = None, remove_prefix: str | None = None
):
super().__init__(fmt, datefmt)
self.remove_prefix = remove_prefix or os.getcwd()
# Ensure the prefix ends with a separator
if not self.remove_prefix.endswith(os.sep):
self.remove_prefix += os.sep
def format(self, record: logging.LogRecord):
# Make a copy of the record to avoid modifying the original
record = logging.makeLogRecord(record.__dict__)
# Remove the prefix from pathname
if record.pathname.startswith(self.remove_prefix):
record.pathname = record.pathname[len(self.remove_prefix) :]
return super().format(record)
def _setup_logging(level: Any = logging_level) -> logging.Logger:
os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR"
warnings.filterwarnings(
"ignore", message="Overriding a previously registered kernel"
)
warnings.filterwarnings("ignore", message="Warning only once for all operators")
warnings.filterwarnings("ignore", message=".*Couldn't find ffmpeg or avconv.*")
warnings.filterwarnings("ignore", message="'force_all_finite' was renamed to")
warnings.filterwarnings("ignore", message="n_jobs value 1 overridden")
warnings.filterwarnings("ignore", message=".*websocket.*is deprecated")
numeric_level = getattr(logging, level.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f"Invalid log level: {level}")
# Create a custom formatter
formatter = RelativePathFormatter(
fmt="%(levelname)s - %(pathname)s:%(lineno)d - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# Create a handler (e.g., StreamHandler for console output)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
# Configure root logger
logging.basicConfig(
level=numeric_level,
handlers=[handler], # Use only your handler
force=True,
)
# Set levels for noisy loggers
for noisy_logger in (
"uvicorn",
"uvicorn.error",
"uvicorn.access",
"fastapi",
"starlette",
"httpx",
):
logger = logging.getLogger(noisy_logger)
logger.setLevel(logging.WARNING)
logger.handlers = [] # Remove default handlers
logger.addHandler(handler) # Add your custom handler
logger = logging.getLogger(__name__)
return logger
logger = _setup_logging(level=logging_level)
logger.debug(f"Logging initialized with level: {logging_level}")

View File

@ -57,7 +57,7 @@ except ImportError:
from api.bots import create_bot_router from api.bots import create_bot_router
from api.bot_config import create_bot_config_router, setup_websocket_config_handlers from api.bot_config import create_bot_config_router, setup_websocket_config_handlers
from logger import logger from shared.logger import logger
# Import performance monitoring components # Import performance monitoring components
try: try:

View File

@ -5,7 +5,7 @@ Event system for decoupled communication between server components.
from abc import ABC from abc import ABC
from typing import Protocol, Dict, List from typing import Protocol, Dict, List
import asyncio import asyncio
from logger import logger from shared.logger import logger
class Event(ABC): class Event(ABC):

View File

@ -4,10 +4,11 @@ WebSocket connection management.
This module handles WebSocket connections and integrates with the message router. This module handles WebSocket connections and integrates with the message router.
""" """
import asyncio
from typing import Dict, Any, Optional, TYPE_CHECKING from typing import Dict, Any, Optional, TYPE_CHECKING
from fastapi import WebSocket, WebSocketDisconnect from fastapi import WebSocket, WebSocketDisconnect
from logger import logger from shared.logger import logger
from .message_handlers import MessageRouter from .message_handlers import MessageRouter
if TYPE_CHECKING: if TYPE_CHECKING:
@ -100,7 +101,23 @@ class WebSocketConnectionManager:
if session.id in lobby.sessions: if session.id in lobby.sessions:
logger.info(f"{session.getName()} - Stale session in lobby {lobby.getName()}. Re-joining.") logger.info(f"{session.getName()} - Stale session in lobby {lobby.getName()}. Re-joining.")
try: try:
# Leave the lobby to clean up peer connections
await session.leave_lobby(lobby) await session.leave_lobby(lobby)
# Verify the session has been properly removed before proceeding
while session.id in lobby.sessions:
logger.debug(
f"Waiting for session {session.getName()} to be fully removed from lobby"
)
# Brief yield to allow cleanup to complete
await asyncio.sleep(0.01)
# Safety check to prevent infinite loop
if session.id in lobby.sessions:
logger.warning(
f"Force removing stale session {session.getName()} from lobby"
)
with lobby.lock:
lobby.sessions.pop(session.id, None)
break
except Exception as e: except Exception as e:
logger.warning(f"Error cleaning up stale session: {e}") logger.warning(f"Error cleaning up stale session: {e}")

View File

@ -9,7 +9,7 @@ from abc import ABC, abstractmethod
from typing import Dict, Any, TYPE_CHECKING from typing import Dict, Any, TYPE_CHECKING
from fastapi import WebSocket from fastapi import WebSocket
from logger import logger from shared.logger import logger
from .webrtc_signaling import WebRTCSignalingHandlers from .webrtc_signaling import WebRTCSignalingHandlers
from core.error_handling import ( from core.error_handling import (
error_handler, error_handler,

View File

@ -8,7 +8,7 @@ Handles ICE candidate relay and session description exchange between peers.
from typing import Any, Dict, TYPE_CHECKING from typing import Any, Dict, TYPE_CHECKING
from fastapi import WebSocket from fastapi import WebSocket
from logger import logger from shared.logger import logger
from core.error_handling import with_webrtc_error_handling from core.error_handling import with_webrtc_error_handling
if TYPE_CHECKING: if TYPE_CHECKING:
@ -216,6 +216,9 @@ class WebRTCSignalingHandlers:
peer_session: The existing peer session in the lobby peer_session: The existing peer session in the lobby
lobby: The lobby context lobby: The lobby context
""" """
logger.info(
f"[TRACE] handle_add_peer called: session={session.getName()} (id={session.id}), peer_session={peer_session.getName()} (id={peer_session.id}), lobby={lobby.getName()}"
)
# Only establish WebRTC connections if at least one has media # Only establish WebRTC connections if at least one has media
if session.has_media or peer_session.has_media: if session.has_media or peer_session.has_media:
# Add peer_session to session's peer list # Add peer_session to session's peer list
@ -229,11 +232,23 @@ class WebRTCSignalingHandlers:
if lobby.id not in peer_session.lobby_peers: if lobby.id not in peer_session.lobby_peers:
peer_session.lobby_peers[lobby.id] = [] peer_session.lobby_peers[lobby.id] = []
peer_session.lobby_peers[lobby.id].append(session.id) peer_session.lobby_peers[lobby.id].append(session.id)
logger.info(
f"[TRACE] {session.getName()} lobby_peers after add: {session.lobby_peers}"
)
# Notify existing peer about new peer (they should not create offer) # Determine offer roles: bots should never create offers
# When a bot joins: existing humans create offers to the bot
# When a human joins: human creates offers to existing bots, bots don't create offers to human
# Determine who should create the offer based on timing
# In our polite peer implementation, the newer peer typically creates the offer
existing_peer_should_offer = False
new_session_should_offer = True
# Notify existing peer about new peer
logger.info( logger.info(
f"{session.getName()} -> {peer_session.getName()}:addPeer(" f"{session.getName()} -> {peer_session.getName()}:addPeer("
f"{session.getName()}, {lobby.getName()}, should_create_offer=False, " f"{session.getName()}, {lobby.getName()}, should_create_offer={existing_peer_should_offer}, "
f"has_media={session.has_media})" f"has_media={session.has_media})"
) )
try: try:
@ -245,7 +260,7 @@ class WebRTCSignalingHandlers:
"peer_id": session.id, "peer_id": session.id,
"peer_name": session.name, "peer_name": session.name,
"has_media": session.has_media, "has_media": session.has_media,
"should_create_offer": False, "should_create_offer": existing_peer_should_offer,
}, },
} }
) )
@ -254,10 +269,10 @@ class WebRTCSignalingHandlers:
f"Failed to send addPeer to {peer_session.getName()}: {e}" f"Failed to send addPeer to {peer_session.getName()}: {e}"
) )
# Notify new session about existing peer (they should create offer) # Notify new session about existing peer
logger.info( logger.info(
f"{session.getName()} -> {session.getName()}:addPeer(" f"{session.getName()} -> {session.getName()}:addPeer("
f"{peer_session.getName()}, {lobby.getName()}, should_create_offer=True, " f"{peer_session.getName()}, {lobby.getName()}, should_create_offer={new_session_should_offer}, "
f"has_media={peer_session.has_media})" f"has_media={peer_session.has_media})"
) )
try: try:
@ -269,7 +284,7 @@ class WebRTCSignalingHandlers:
"peer_id": peer_session.id, "peer_id": peer_session.id,
"peer_name": peer_session.name, "peer_name": peer_session.name,
"has_media": peer_session.has_media, "has_media": peer_session.has_media,
"should_create_offer": True, "should_create_offer": new_session_should_offer,
}, },
} }
) )
@ -297,6 +312,9 @@ class WebRTCSignalingHandlers:
peer_session: The peer session to disconnect from peer_session: The peer session to disconnect from
lobby: The lobby context lobby: The lobby context
""" """
logger.info(
f"[TRACE] handle_remove_peer called: session={session.getName()} (id={session.id}), peer_session={peer_session.getName()} (id={peer_session.id}), lobby={lobby.getName()}"
)
# Notify peer about session removal # Notify peer about session removal
if peer_session.ws: if peer_session.ws:
logger.info( logger.info(

View File

@ -14,7 +14,7 @@ from enum import Enum
from dataclasses import dataclass from dataclasses import dataclass
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from logger import logger from shared.logger import logger
class AIProviderType(str, Enum): class AIProviderType(str, Enum):

View File

@ -21,7 +21,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import uvicorn import uvicorn
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from logger import logger from shared.logger import logger
from voicebot.models import JoinRequest from voicebot.models import JoinRequest
from voicebot.webrtc_signaling import WebRTCSignalingClient from voicebot.webrtc_signaling import WebRTCSignalingClient

View File

@ -17,7 +17,7 @@ from aiortc import MediaStreamTrack
import sys import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from logger import logger from shared.logger import logger
from shared.models import ChatMessageModel from shared.models import ChatMessageModel
# Import advanced bot management modules # Import advanced bot management modules

View File

@ -7,7 +7,7 @@ rather than media streams.
from typing import Dict, Optional, Callable, Awaitable from typing import Dict, Optional, Callable, Awaitable
import time import time
import random import random
from logger import logger from shared.logger import logger
from aiortc import MediaStreamTrack from aiortc import MediaStreamTrack
# Import shared models for chat functionality # Import shared models for chat functionality

View File

@ -20,7 +20,7 @@ from typing import TypedDict, TYPE_CHECKING, Tuple, List, Optional, Any
import numpy.typing as npt import numpy.typing as npt
from aiortc import MediaStreamTrack from aiortc import MediaStreamTrack
from av import VideoFrame from av import VideoFrame
from logger import logger from shared.logger import logger
if TYPE_CHECKING: if TYPE_CHECKING:
pass pass

View File

@ -20,7 +20,7 @@ from pydantic import BaseModel, Field, ConfigDict
# Core dependencies # Core dependencies
import librosa import librosa
from logger import logger from shared.logger import logger
from aiortc import MediaStreamTrack from aiortc import MediaStreamTrack
from aiortc.mediastreams import MediaStreamError from aiortc.mediastreams import MediaStreamError
from av import AudioFrame from av import AudioFrame

View File

@ -16,7 +16,7 @@ from typing import Optional
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from fastapi import FastAPI from fastapi import FastAPI
from logger import logger from shared.logger import logger
# Import shared models # Import shared models
from shared.models import ClientStatusResponse from shared.models import ClientStatusResponse

View File

@ -7,7 +7,7 @@ This module contains the main client functionality and entry points.
import asyncio import asyncio
import sys import sys
import os import os
from logger import logger from shared.logger import logger
from voicebot.bots.synthetic_media import AnimatedVideoTrack from voicebot.bots.synthetic_media import AnimatedVideoTrack
# Add the parent directory to sys.path to allow absolute imports # Add the parent directory to sys.path to allow absolute imports

View File

@ -13,7 +13,7 @@ from dataclasses import dataclass, field
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from collections import defaultdict from collections import defaultdict
from logger import logger from shared.logger import logger
@dataclass @dataclass

View File

@ -32,9 +32,6 @@ if [ "$PRODUCTION" != "true" ]; then
exec uv run uvicorn main:uvicorn_app \ exec uv run uvicorn main:uvicorn_app \
--host 0.0.0.0 \ --host 0.0.0.0 \
--port 8788 \ --port 8788 \
--reload \
--reload-dir /voicebot \
--reload-dir /shared \
--log-level info --log-level info
else else
echo "Running as client (connecting to lobby)..." echo "Running as client (connecting to lobby)..."

View File

@ -1,125 +0,0 @@
import os
import warnings
import logging
import time
from typing import Optional, Tuple
logging_level = os.getenv("LOGGING_LEVEL", "INFO").upper()
#logging_level = os.getenv("LOGGING_LEVEL", "DEBUG").upper()
class RelativePathFormatter(logging.Formatter):
def __init__(
self,
fmt: Optional[str] = None,
datefmt: Optional[str] = None,
remove_prefix: Optional[str] = None,
) -> None:
super().__init__(fmt, datefmt)
self.remove_prefix = remove_prefix or os.getcwd()
# Ensure the prefix ends with a separator
if not self.remove_prefix.endswith(os.sep):
self.remove_prefix += os.sep
def format(self, record: logging.LogRecord) -> str:
"""Create a shallow copy of the record and rewrite the pathname
to be relative to the configured prefix. Defensive checks are used
to satisfy static type checkers.
"""
# Make a copy of the record dict so we don't mutate the caller's record
record_dict = record.__dict__.copy()
new_record = logging.makeLogRecord(record_dict)
# Remove the prefix from pathname if present
pathname = getattr(new_record, "pathname", "")
if pathname.startswith(self.remove_prefix):
new_record.pathname = pathname[len(self.remove_prefix) :]
return super().format(new_record)
def _setup_logging(level: str=logging_level) -> logging.Logger:
os.environ["TORCH_CPP_LOG_LEVEL"] = "ERROR"
warnings.filterwarnings(
"ignore", message="Overriding a previously registered kernel"
)
warnings.filterwarnings("ignore", message="Warning only once for all operators")
warnings.filterwarnings("ignore", message=".*Couldn't find ffmpeg or avconv.*")
warnings.filterwarnings("ignore", message="'force_all_finite' was renamed to")
warnings.filterwarnings("ignore", message="n_jobs value 1 overridden")
warnings.filterwarnings("ignore", message=".*websocket.*is deprecated")
logging.getLogger("aiortc").setLevel(logging.WARNING)
logging.getLogger("aioice").setLevel(logging.WARNING)
logging.getLogger("asyncio").setLevel(logging.WARNING)
numeric_level = getattr(logging, level.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f"Invalid log level: {level}")
# Create a custom formatter
formatter = RelativePathFormatter(
fmt="%(levelname)s - %(pathname)s:%(lineno)d - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# Create a handler (e.g., StreamHandler for console output)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
# Simple repeat-suppression filter: if the exact same message (level+text)
# appears repeatedly within a short window, drop duplicates. This keeps
# the first occurrence for diagnostics but avoids log flooding from
# recurring asyncio/aioice stack traces.
class _RepeatFilter(logging.Filter):
def __init__(self, interval: float = 5.0) -> None:
super().__init__()
self._interval = interval
self._last: Optional[Tuple[int, str]] = None
self._last_time: float = 0.0
def filter(self, record: logging.LogRecord) -> bool:
try:
msg = record.getMessage()
except Exception:
# Fallback to a string representation if getMessage fails
msg = str(record)
key: Tuple[int, str] = (getattr(record, "levelno", 0), msg)
now = time.time()
if self._last == key and (now - self._last_time) < self._interval:
return False
self._last = key
self._last_time = now
return True
handler.addFilter(_RepeatFilter())
# Configure root logger
logging.basicConfig(
level=numeric_level,
handlers=[handler], # Use only your handler
force=True,
)
# Set levels for noisy loggers
for noisy_logger in (
"uvicorn",
"uvicorn.error",
"uvicorn.access",
"fastapi",
"starlette",
):
logger = logging.getLogger(noisy_logger)
logger.setLevel(logging.WARNING)
logger.handlers = [] # Remove default handlers
logger.addHandler(handler) # Add your custom handler
logger = logging.getLogger(__name__)
return logger
logger = _setup_logging(level=logging_level)
logger.debug(f"Logging initialized with level: {logging_level}")

View File

@ -14,7 +14,7 @@ import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Import logger for reload debugging # Import logger for reload debugging
from logger import logger from shared.logger import logger
from voicebot.models import VoicebotArgs, VoicebotMode from voicebot.models import VoicebotArgs, VoicebotMode
from voicebot.client_main import main_with_args, start_client_with_reload from voicebot.client_main import main_with_args, start_client_with_reload

View File

@ -11,7 +11,7 @@ from typing import Dict, List, Optional, Any
from enum import Enum from enum import Enum
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from logger import logger from shared.logger import logger
class PersonalityTrait(str, Enum): class PersonalityTrait(str, Enum):

View File

@ -11,7 +11,7 @@ import os
# Add the voicebot directory to the path # Add the voicebot directory to the path
sys.path.append(os.path.dirname(os.path.abspath(__file__))) sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from logger import logger from shared.logger import logger
def set_debug_logging(): def set_debug_logging():

View File

@ -38,7 +38,7 @@ def create_ssl_context(insecure: bool = False) -> ssl.SSLContext | None:
def log_network_info(): def log_network_info():
"""Log network information for debugging.""" """Log network information for debugging."""
from logger import logger from shared.logger import logger
try: try:
import socket import socket

View File

@ -53,7 +53,7 @@ from shared.models import (
ChatMessageModel, ChatMessageModel,
) )
from logger import logger from shared.logger import logger
from voicebot.bots.synthetic_media import create_synthetic_tracks from voicebot.bots.synthetic_media import create_synthetic_tracks
from voicebot.models import Peer, MessageData from voicebot.models import Peer, MessageData
from voicebot.utils import create_ssl_context, log_network_info from voicebot.utils import create_ssl_context, log_network_info
@ -442,8 +442,8 @@ class WebRTCSignalingClient:
async def _setup_local_media(self): async def _setup_local_media(self):
"""Create local media tracks""" """Create local media tracks"""
# If a bot provided a create_tracks callable, use it to create tracks. # Always clear out old tracks to avoid reusing stopped or stale tracks
# Otherwise, use default synthetic tracks. self.local_tracks.clear()
try: try:
if self.create_tracks: if self.create_tracks:
tracks = self.create_tracks(self.session_name) tracks = self.create_tracks(self.session_name)
@ -679,26 +679,66 @@ class WebRTCSignalingClient:
logger.info( logger.info(
f"Adding peer: {peer_name} (should_create_offer: {should_create_offer})" f"Adding peer: {peer_name} (should_create_offer: {should_create_offer})"
) )
logger.info(f"_handle_add_peer: Current peer_connections count: {len(self.peer_connections)}")
logger.info(f"_handle_add_peer: Current active peers: {list(self.peer_connections.keys())}")
logger.debug( logger.debug(
f"_handle_add_peer: peer_id={peer_id}, peer_name={peer_name}, should_create_offer={should_create_offer}" f"_handle_add_peer: peer_id={peer_id}, peer_name={peer_name}, should_create_offer={should_create_offer}"
) )
# Check if peer already exists # Check if peer already exists and clean up stale connections
if peer_id in self.peer_connections: if peer_id in self.peer_connections:
pc = self.peer_connections[peer_id] pc = self.peer_connections[peer_id]
logger.debug( logger.info(f"_handle_add_peer: Found existing connection for {peer_name} (state: {pc.connectionState})")
f"_handle_add_peer: Existing connection state: {pc.connectionState}" logger.info("_handle_add_peer: Cleaning up existing connection to ensure fresh start")
)
if pc.connectionState in ["new", "connected", "connecting"]: # Always clean up existing connection for fresh start
logger.info(f"Peer connection already exists for {peer_name}") # This prevents stale connection issues during page refresh
return try:
else:
# Clean up stale connection
logger.debug(
f"_handle_add_peer: Closing stale connection for {peer_name}"
)
await pc.close() await pc.close()
del self.peer_connections[peer_id] except Exception as e:
logger.warning(f"Error closing existing connection for {peer_name}: {e}")
# Clean up all associated state
self.is_negotiating.pop(peer_id, None)
self.making_offer.pop(peer_id, None)
self.initiated_offer.discard(peer_id)
self.pending_ice_candidates.pop(peer_id, None)
self.peers.pop(peer_id, None) # Also clean up peers dictionary
del self.peer_connections[peer_id]
# Give the connection a moment to close, but don't block indefinitely
# If it doesn't close quickly, proceed anyway to avoid blocking other operations
try:
await asyncio.wait_for(
self._wait_for_connection_close(pc, peer_name),
timeout=1.0
)
except asyncio.TimeoutError:
logger.warning(f"Connection to {peer_name} did not close within timeout, proceeding anyway")
# Also check for any existing connections with the same peer name but different ID
# This handles cases where the frontend gets a new session ID after refresh
connections_to_remove: list[str] = []
for existing_peer_id, existing_peer in self.peers.items():
if existing_peer.peer_name == peer_name and existing_peer_id != peer_id:
logger.info(f"_handle_add_peer: Found existing connection for peer name {peer_name} with different ID {existing_peer_id}, cleaning up")
connections_to_remove.append(existing_peer_id)
for old_peer_id in connections_to_remove:
if old_peer_id in self.peer_connections:
pc = self.peer_connections[old_peer_id]
try:
await pc.close()
except Exception as e:
logger.warning(f"Error closing old connection for {peer_name}: {e}")
# Clean up all associated state for old connection
self.is_negotiating.pop(old_peer_id, None)
self.making_offer.pop(old_peer_id, None)
self.initiated_offer.discard(old_peer_id)
self.pending_ice_candidates.pop(old_peer_id, None)
self.peers.pop(old_peer_id, None)
del self.peer_connections[old_peer_id]
# Create new peer # Create new peer
peer = Peer(session_id=peer_id, peer_name=peer_name, local=False) peer = Peer(session_id=peer_id, peer_name=peer_name, local=False)
@ -713,8 +753,6 @@ class WebRTCSignalingClient:
username="ketra", username="ketra",
credential="ketran", credential="ketran",
), ),
# Add Google's public STUN server as fallback
RTCIceServer(urls="stun:stun.l.google.com:19302"),
], ],
) )
logger.debug( logger.debug(
@ -734,12 +772,39 @@ class WebRTCSignalingClient:
# Add connection state change handler # Add connection state change handler
def on_connection_state_change() -> None: def on_connection_state_change() -> None:
logger.info(f"Connection state: {pc.connectionState}") logger.info(f"🔄 Connection state changed for {peer_name}: {pc.connectionState}")
# Log additional details for debugging connection closure/failure
logger.info(f" ICE connection state: {pc.iceConnectionState}")
logger.info(f" Signaling state: {pc.signalingState}")
# If available, log the last ICE candidate pair and any error details
selected_pair = None
try:
if hasattr(pc, 'sctp') and pc.sctp:
selected_pair = getattr(pc.sctp.transport, 'iceTransport', None)
elif hasattr(pc, 'iceTransport'):
selected_pair = pc.iceTransport
except Exception as e:
logger.warning(f"Error accessing selected ICE pair: {e}")
if selected_pair:
logger.info(f" Selected ICE candidate pair: {getattr(selected_pair, 'selectedCandidatePair', None)}")
# Handle failed connections - could be due to network issues during refresh
if pc.connectionState == "failed":
logger.warning(f"❌ Connection to {peer_name} failed - this might be due to page refresh")
elif pc.connectionState == "disconnected":
logger.info(f"⚠️ Connection to {peer_name} disconnected")
elif pc.connectionState == "connected":
logger.info(f"✅ Connection to {peer_name} established successfully")
elif pc.connectionState == "closed":
logger.info(f"🔒 Connection to {peer_name} closed")
pc.on("connectionstatechange")(on_connection_state_change) pc.on("connectionstatechange")(on_connection_state_change)
logger.info(f"📝 Storing peer connection for {peer_name} (ID: {peer_id})")
self.peer_connections[peer_id] = pc self.peer_connections[peer_id] = pc
peer.connection = pc peer.connection = pc
logger.info(f"📊 Total peer connections now: {len(self.peer_connections)}")
# Set up event handlers # Set up event handlers
def on_track(track: MediaStreamTrack) -> None: def on_track(track: MediaStreamTrack) -> None:
@ -1043,19 +1108,33 @@ class WebRTCSignalingClient:
# Close peer connection # Close peer connection
if peer_id in self.peer_connections: if peer_id in self.peer_connections:
pc = self.peer_connections[peer_id] pc = self.peer_connections[peer_id]
await pc.close() try:
await pc.close()
# Wait for connection to be properly closed
while pc.connectionState not in ["closed", "failed"]:
logger.debug(f"Waiting for connection to {peer_name} to close (current state: {pc.connectionState})")
# Brief yield to allow cleanup
await asyncio.sleep(0.01)
except Exception as e:
logger.warning(f"Error closing peer connection for {peer_name}: {e}")
del self.peer_connections[peer_id] del self.peer_connections[peer_id]
# Clean up state # Extra forced cleanup: remove all state for this peer_id
self.is_negotiating.pop(peer_id, None) for state_dict in [self.is_negotiating, self.making_offer, self.pending_ice_candidates]:
self.making_offer.pop(peer_id, None) if peer_id in state_dict:
logger.debug(f"Force removing {peer_id} from {state_dict}")
state_dict.pop(peer_id, None)
self.initiated_offer.discard(peer_id) self.initiated_offer.discard(peer_id)
self.pending_ice_candidates.pop(peer_id, None)
# Remove peer # Remove peer
peer = self.peers.pop(peer_id, None) peer = self.peers.pop(peer_id, None)
if peer and self.on_peer_removed: if peer and self.on_peer_removed:
await self.on_peer_removed(peer) try:
await self.on_peer_removed(peer)
except Exception as e:
logger.warning(f"Error in on_peer_removed callback for {peer_name}: {e}")
logger.info(f"Completed removing peer: {peer_name} (all state reset)")
async def _handle_session_description(self, data: SessionDescriptionModel): async def _handle_session_description(self, data: SessionDescriptionModel):
"""Handle sessionDescription message""" """Handle sessionDescription message"""
@ -1084,12 +1163,12 @@ class WebRTCSignalingClient:
) )
we_initiated = peer_id in self.initiated_offer we_initiated = peer_id in self.initiated_offer
# For bots, be more polite - always yield to human users in collision # For bots, be more polite - accept remote offers even in collision scenarios
# Bots should generally be the polite peer # Only ignore if we're in the middle of sending our own offer AND it's actually conflicting
ignore_offer = offer_collision and we_initiated ignore_offer = offer_collision and we_initiated and pc.signalingState != "stable"
if ignore_offer: if ignore_offer:
logger.info(f"Ignoring offer from {peer_name} due to collision (bot being polite)") logger.info(f"Ignoring offer from {peer_name} due to active collision (bot being polite)")
# Reset our offer state to allow the remote offer to proceed # Reset our offer state to allow the remote offer to proceed
if peer_id in self.initiated_offer: if peer_id in self.initiated_offer:
self.initiated_offer.remove(peer_id) self.initiated_offer.remove(peer_id)
@ -1101,7 +1180,11 @@ class WebRTCSignalingClient:
await asyncio.sleep(1.0) # Wait 1 second await asyncio.sleep(1.0) # Wait 1 second
if pc.connectionState not in ["connected", "closed", "failed"]: if pc.connectionState not in ["connected", "closed", "failed"]:
logger.info(f"Retrying connection setup for {peer_name} after collision") logger.info(f"Retrying connection setup for {peer_name} after collision")
# Don't create offer, let the remote peer drive # Reset state to allow the remote peer to retry
self.making_offer[peer_id] = False
self.is_negotiating[peer_id] = False
if peer_id in self.initiated_offer:
self.initiated_offer.remove(peer_id)
asyncio.create_task(retry_connection()) asyncio.create_task(retry_connection())
return return
@ -1212,7 +1295,7 @@ class WebRTCSignalingClient:
peer_name = data.peer_name peer_name = data.peer_name
candidate_data = data.candidate candidate_data = data.candidate
logger.info(f"Received ICE candidate from {peer_name}") logger.info(f"Received ICE candidate from {peer_name}: {candidate_data}")
pc = self.peer_connections.get(peer_id) pc = self.peer_connections.get(peer_id)
if not pc: if not pc:
@ -1250,6 +1333,7 @@ class WebRTCSignalingClient:
except Exception: except Exception:
cand_type = "unknown" cand_type = "unknown"
logger.info(f"Attempting to add ICE candidate for {peer_name}: type={cand_type}, sdp='{sdp_part}'")
try: try:
rtc_candidate = candidate_from_sdp(sdp_part) rtc_candidate = candidate_from_sdp(sdp_part)
rtc_candidate.sdpMid = candidate_data.sdpMid rtc_candidate.sdpMid = candidate_data.sdpMid
@ -1257,7 +1341,7 @@ class WebRTCSignalingClient:
# aiortc expects an object with attributes (RTCIceCandidate) # aiortc expects an object with attributes (RTCIceCandidate)
await pc.addIceCandidate(rtc_candidate) await pc.addIceCandidate(rtc_candidate)
logger.info(f"ICE candidate added for {peer_name}: type={cand_type}") logger.info(f"ICE candidate successfully added for {peer_name}: type={cand_type}")
except Exception as e: except Exception as e:
logger.error( logger.error(
f"Failed to add ICE candidate for {peer_name}: type={cand_type} error={e} sdp='{sdp_part}'", f"Failed to add ICE candidate for {peer_name}: type={cand_type} error={e} sdp='{sdp_part}'",