This commit is contained in:
James Ketr 2025-09-07 21:56:08 -07:00
parent 9089edaeea
commit 795e9b1d67
3 changed files with 84 additions and 30 deletions

View File

@ -688,7 +688,23 @@ const MediaAgent = (props: MediaAgentProps) => {
);
if (media && localUserHasMedia) {
media.getTracks().forEach((t) => {
console.log(`media-agent - addPeer:${peer.peer_name} Adding track:`, t.kind, t.enabled);
console.log(`media-agent - addPeer:${peer.peer_name} Adding track:`, {
kind: t.kind,
enabled: t.enabled,
muted: t.muted,
readyState: t.readyState,
label: t.label,
id: t.id,
});
// Enable tracks for bots that need audio/video input (whisper, synthetic media, etc.)
if (peer.peer_name.includes("-bot")) {
if (t.kind === "audio" || t.kind === "video") {
t.enabled = true;
console.log(`media-agent - addPeer:${peer.peer_name} Force enabled ${t.kind} track for bot`);
}
}
connection.addTrack(t, media);
});
} else if (!localUserHasMedia) {
@ -824,10 +840,11 @@ const MediaAgent = (props: MediaAgentProps) => {
await pc.addIceCandidate(candidateInit);
console.log(`media-agent - sessionDescription:${peer_name} - Queued ICE candidate added`);
} catch (err) {
console.error(
`media-agent - sessionDescription:${peer_name} - Failed to add queued ICE candidate:`,
{ candidateInit, err }
);
console.error(`media-agent - sessionDescription:${peer_name} - Failed to add queued ICE candidate:`, {
candidateInit,
rawCandidate: candidate,
err,
});
}
}
} catch (err) {
@ -969,7 +986,13 @@ const MediaAgent = (props: MediaAgentProps) => {
.then(() =>
console.log(`media-agent - iceCandidate::${peer_name} - ICE candidate added for ${peer.peer_name}`)
)
.catch((err) => console.error(`media-agent - iceCandidate::${peer_name} - Failed to add ICE candidate:`, { candidateInit, err }));
.catch((err) =>
console.error(`media-agent - iceCandidate::${peer_name} - Failed to add ICE candidate:`, {
candidateInit,
rawCandidate: candidate,
err,
})
);
}
},
[peers]
@ -1102,9 +1125,16 @@ const MediaAgent = (props: MediaAgentProps) => {
const videoTracks = media.getVideoTracks();
if (audioTracks.length > 0) {
tracks.push(audioTracks[0]);
const audioTrack = audioTracks[0];
tracks.push(audioTrack);
hasRealAudio = true;
console.log("media-agent - Using real audio");
console.log("media-agent - Using real audio:", {
enabled: audioTrack.enabled,
muted: audioTrack.muted,
readyState: audioTrack.readyState,
label: audioTrack.label,
id: audioTrack.id,
});
}
if (videoTracks.length > 0) {
@ -1130,7 +1160,13 @@ const MediaAgent = (props: MediaAgentProps) => {
}
const finalMedia = new MediaStream(tracks);
console.log(`media-agent - Media setup complete`);
console.log(`media-agent - Media setup complete:`, {
totalTracks: finalMedia.getTracks().length,
audioTracks: finalMedia.getAudioTracks().length,
videoTracks: finalMedia.getVideoTracks().length,
hasRealAudio,
hasRealVideo,
});
return finalMedia;
}, [session.name]);

View File

@ -480,20 +480,13 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack):
# Receive audio frame
frame = await track.recv()
if isinstance(frame, AudioFrame):
frame_info = (
f"{frame.sample_rate}Hz, {frame.format.name}, {frame.layout.name}"
)
logger.debug(
f"Received audio frame from {peer.peer_name}: {frame_info}"
)
# Convert AudioFrame to numpy array
audio_data = frame.to_ndarray()
original_shape = audio_data.shape
original_dtype = audio_data.dtype
logger.debug(
f"Audio frame data: shape={original_shape}, dtype={original_dtype}"
f"Audio frame data: shape={original_shape}, dtype={original_dtype}, samples={frame.samples if hasattr(frame, 'samples') else 'unknown'}"
)
# Handle different audio formats
@ -528,17 +521,46 @@ async def handle_track_received(peer: Peer, track: MediaStreamTrack):
frame_rms = np.sqrt(np.mean(audio_data_float32**2))
frame_peak = np.max(np.abs(audio_data_float32))
# Only log full frame details every 20 frames to reduce noise
# Track frame count and audio state
frame_count = getattr(peer, "_whisper_frame_count", 0) + 1
setattr(peer, "_whisper_frame_count", frame_count)
if frame_count % 20 == 0:
# Track if we've seen audio before (to detect start of speech)
had_audio = getattr(peer, "_whisper_had_audio", False)
# Define thresholds for "real audio" detection
audio_threshold = 0.001 # RMS threshold for detecting speech
has_audio = frame_rms > audio_threshold
# Log important audio events
if has_audio and not had_audio:
# Started receiving audio
frame_info = f"{frame.sample_rate}Hz, {frame.format.name}, {frame.layout.name}"
logger.info(
f"Audio frame #{frame_count} from {peer.peer_name}: {frame_info}, {len(audio_data_float32)} samples, RMS: {frame_rms:.4f}, Peak: {frame_peak:.4f}"
f"🎤 AUDIO DETECTED from {peer.peer_name}! Frame #{frame_count}: {frame_info}, RMS: {frame_rms:.4f}, Peak: {frame_peak:.4f}"
)
else:
setattr(peer, "_whisper_had_audio", True)
setattr(peer, "_whisper_last_audio_frame", frame_count)
elif not has_audio and had_audio:
# Stopped receiving audio
last_audio_frame = getattr(peer, "_whisper_last_audio_frame", 0)
logger.info(
f"🔇 Audio stopped from {peer.peer_name} at frame #{frame_count} (last audio was frame #{last_audio_frame})"
)
setattr(peer, "_whisper_had_audio", False)
elif has_audio:
# Continue receiving audio - update last audio frame but don't spam logs
setattr(peer, "_whisper_last_audio_frame", frame_count)
# Only log every 100 frames when continuously receiving audio
if frame_count % 100 == 0:
logger.info(
f"🎤 Audio continuing from {peer.peer_name}: Frame #{frame_count}, RMS: {frame_rms:.4f}"
)
# Log connection info much less frequently (every 200 frames when silent)
if not has_audio and frame_count % 200 == 0:
logger.debug(
f"Audio frame #{frame_count}: RMS: {frame_rms:.4f}, Peak: {frame_peak:.4f}"
f"Connection active from {peer.peer_name}: Frame #{frame_count} (silent, RMS: {frame_rms:.6f})"
)
# Send to audio processor

View File

@ -795,7 +795,7 @@ class WebRTCSignalingClient:
peer_id=peer_id, peer_name=peer_name, candidate=candidate_model
)
logger.info(
f"on_ice_candidate: Sending relayICECandidate for {peer_name}: {candidate_model}"
f"on_ice_candidate: Sending relayICECandidate for {peer_name}: candidate='{candidate_model.candidate}' sdpMid={candidate_model.sdpMid} sdpMLineIndex={candidate_model.sdpMLineIndex}"
)
asyncio.ensure_future(
self._send_message("relayICECandidate", payload_model.model_dump())
@ -971,11 +971,7 @@ class WebRTCSignalingClient:
current_section_mid = str(current_media_index)
elif line.startswith("a=candidate:"):
candidate_sdp = line[2:] # Remove 'a=' prefix
# Ensure candidate has the proper SDP format
if candidate_sdp and not candidate_sdp.startswith("candidate:"):
candidate_sdp = f"candidate:{candidate_sdp}"
candidate_sdp = line[2:] # Remove 'a=' prefix, keeping "candidate:..."
# Clean up any extra spaces
if candidate_sdp:
@ -995,7 +991,7 @@ class WebRTCSignalingClient:
)
logger.debug(
f"_extract_and_send_candidates: Sending ICE candidate for {peer_name} (mid={current_section_mid}, idx={current_media_index}): {candidate_sdp[:60]}..."
f"_extract_and_send_candidates: Sending ICE candidate for {peer_name} (mid={current_section_mid}, idx={current_media_index}): candidate='{candidate_sdp}'"
)
await self._send_message(
"relayICECandidate", payload_candidate.model_dump()