diff --git a/voicebot/bots/whisper.py b/voicebot/bots/whisper.py index 5bb9967..03d2e32 100644 --- a/voicebot/bots/whisper.py +++ b/voicebot/bots/whisper.py @@ -1052,17 +1052,17 @@ class WaveformVideoTrack(MediaStreamTrack): status_text = _model_loading_status progress = _model_loading_progress - # Draw status background - cv2.rectangle(frame_array, (0, 0), (self.width, 60), (0, 0, 0), -1) + # Draw status background (increased height for larger text) + cv2.rectangle(frame_array, (0, 0), (self.width, 80), (0, 0, 0), -1) # Draw progress bar if loading if progress < 1.0 and "Ready" not in status_text: bar_width = int(progress * (self.width - 40)) - cv2.rectangle(frame_array, (20, 40), (20 + bar_width, 50), (0, 255, 0), -1) - cv2.rectangle(frame_array, (20, 40), (self.width - 20, 50), (255, 255, 255), 1) + cv2.rectangle(frame_array, (20, 55), (20 + bar_width, 70), (0, 255, 0), -1) + cv2.rectangle(frame_array, (20, 55), (self.width - 20, 70), (255, 255, 255), 2) - # Draw status text - cv2.putText(frame_array, f"Status: {status_text}", (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Draw status text (larger font) + cv2.putText(frame_array, f"Status: {status_text}", (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 255), 3) # Select the most active processor (highest RMS) and draw its waveform best_proc = None @@ -1113,16 +1113,16 @@ class WaveformVideoTrack(MediaStreamTrack): points: list[tuple[int, int]] = [] for x in range(self.width): v = float(norm[x]) if x < norm.size and not np.isnan(norm[x]) else 0.0 - y = int((1.0 - ((v + 1.0) / 2.0)) * (self.height - 70)) + 60 # Offset below status bar - points.append((x, max(60, min(self.height - 1, y)))) + y = int((1.0 - ((v + 1.0) / 2.0)) * (self.height - 90)) + 80 # Offset below taller status bar + points.append((x, max(80, min(self.height - 1, y)))) if len(points) > 1: pts_np = np.array(points, dtype=np.int32) cv2.polylines(frame_array, [pts_np], isClosed=False, color=(0, 200, 80), thickness=2) - cv2.putText(frame_array, f"Waveform: {pname}", (10, self.height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + cv2.putText(frame_array, f"Waveform: {pname}", (10, self.height - 15), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2) else: - cv2.putText(frame_array, "No audio", (10, self.height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 200, 200), 1) + cv2.putText(frame_array, "No audio", (10, self.height - 15), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (200, 200, 200), 2) frame = VideoFrame.from_ndarray(frame_array, format="bgr24") frame.pts = pts