Before claude rewrite
This commit is contained in:
parent
ed8967f394
commit
6620c0ac74
@ -12,3 +12,4 @@
|
|||||||
**/*.key
|
**/*.key
|
||||||
**/package-lock.json
|
**/package-lock.json
|
||||||
**/*.pyc
|
**/*.pyc
|
||||||
|
**/VibeVoice
|
||||||
|
2
.github/copilot-instructions.md
vendored
2
.github/copilot-instructions.md
vendored
@ -15,7 +15,7 @@
|
|||||||
- Always run tests inside the appropriate Docker containers using `docker compose exec`
|
- Always run tests inside the appropriate Docker containers using `docker compose exec`
|
||||||
- Use `uv run` for Python commands in voicebot and server containers
|
- Use `uv run` for Python commands in voicebot and server containers
|
||||||
- Tests should be placed in the `tests/` directory (bind mounted to `/tests` in containers)
|
- Tests should be placed in the `tests/` directory (bind mounted to `/tests` in containers)
|
||||||
- Use proper PYTHONPATH when running Python code: `PYTHONPATH=/shared:/voicebot` for voicebot, `PYTHONPATH=/shared:/server` for server
|
- Use proper PYTHONPATH when running Python code: `PYTHONPATH=/:/voicebot` for voicebot, `PYTHONPATH=/:/server` for server
|
||||||
- Check container logs with `docker compose logs --since 10m SERVICE_NAME` for debugging
|
- Check container logs with `docker compose logs --since 10m SERVICE_NAME` for debugging
|
||||||
|
|
||||||
### Voicebot Testing (Python with uv)
|
### Voicebot Testing (Python with uv)
|
||||||
|
@ -2,228 +2,3 @@ body {
|
|||||||
font-family: 'Droid Sans', 'Arial Narrow', Arial, sans-serif;
|
font-family: 'Droid Sans', 'Arial Narrow', Arial, sans-serif;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
}
|
}
|
||||||
|
|
||||||
#root {
|
|
||||||
width: 100vw;
|
|
||||||
/* height: 100vh; breaks on mobile -- not needed */
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table {
|
|
||||||
display: flex;
|
|
||||||
position: absolute;
|
|
||||||
top: 0;
|
|
||||||
left: 0;
|
|
||||||
width: 100%;
|
|
||||||
bottom: 0;
|
|
||||||
flex-direction: row;
|
|
||||||
/* background-image: url("./assets/tabletop.png"); */
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs {
|
|
||||||
z-index: 10000;
|
|
||||||
display: flex;
|
|
||||||
justify-content: space-around;
|
|
||||||
align-items: center;
|
|
||||||
position: absolute;
|
|
||||||
top: 0;
|
|
||||||
left: 0;
|
|
||||||
bottom: 0;
|
|
||||||
right: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs .Dialog {
|
|
||||||
display: flex;
|
|
||||||
position: absolute;
|
|
||||||
flex-shrink: 1;
|
|
||||||
flex-direction: column;
|
|
||||||
padding: 0.25rem;
|
|
||||||
left: 0;
|
|
||||||
right: 0;
|
|
||||||
top: 0;
|
|
||||||
bottom: 0;
|
|
||||||
justify-content: space-around;
|
|
||||||
align-items: center;
|
|
||||||
z-index: 60000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs .Dialog > div {
|
|
||||||
display: flex;
|
|
||||||
padding: 1rem;
|
|
||||||
flex-direction: column;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs .Dialog > div > div:first-child {
|
|
||||||
padding: 1rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs .TurnNoticeDialog {
|
|
||||||
background-color: #7a680060;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs .ErrorDialog {
|
|
||||||
background-color: #40000060;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs .WarningDialog {
|
|
||||||
background-color: #00000060;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Game {
|
|
||||||
position: relative;
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
flex-grow: 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Board {
|
|
||||||
display: flex;
|
|
||||||
position: relative;
|
|
||||||
flex-grow: 1;
|
|
||||||
z-index: 500;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .PlayersStatus {
|
|
||||||
z-index: 500; /* Under Hand */
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .PlayersStatus.ActivePlayer {
|
|
||||||
z-index: 1500; /* On top of Hand */
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Hand {
|
|
||||||
display: flex;
|
|
||||||
position: relative;
|
|
||||||
height: 11rem;
|
|
||||||
z-index: 10000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Sidebar {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
justify-content: space-between;
|
|
||||||
width: 25rem;
|
|
||||||
max-width: 25rem;
|
|
||||||
overflow: hidden;
|
|
||||||
z-index: 5000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Sidebar .Chat {
|
|
||||||
display: flex;
|
|
||||||
position: relative;
|
|
||||||
flex-grow: 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Trade {
|
|
||||||
display: flex;
|
|
||||||
position: relative;
|
|
||||||
z-index: 25000;
|
|
||||||
align-self: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs {
|
|
||||||
position: absolute;
|
|
||||||
display: flex;
|
|
||||||
top: 0;
|
|
||||||
bottom: 0;
|
|
||||||
right: 0;
|
|
||||||
left: 0;
|
|
||||||
justify-content: space-around;
|
|
||||||
align-items: center;
|
|
||||||
z-index: 20000;
|
|
||||||
pointer-events: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Dialogs > * {
|
|
||||||
pointer-events: all;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .ViewCard {
|
|
||||||
display: flex;
|
|
||||||
position: absolute;
|
|
||||||
top: 0;
|
|
||||||
left: 0;
|
|
||||||
right: 0;
|
|
||||||
bottom: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .Winner {
|
|
||||||
display: flex;
|
|
||||||
position: absolute;
|
|
||||||
top: 0;
|
|
||||||
left: 0;
|
|
||||||
right: 0;
|
|
||||||
bottom: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
.Table .HouseRules {
|
|
||||||
display: flex;
|
|
||||||
position: absolute;
|
|
||||||
top: 0;
|
|
||||||
left: 0;
|
|
||||||
right: 0;
|
|
||||||
bottom: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .ChooseCard {
|
|
||||||
display: flex;
|
|
||||||
position: relative;
|
|
||||||
top: 0;
|
|
||||||
left: 0;
|
|
||||||
right: 0;
|
|
||||||
bottom: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table button {
|
|
||||||
margin: 0.25rem;
|
|
||||||
background-color: white;
|
|
||||||
border: 1px solid black; /* why !important */
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .MuiButton-text {
|
|
||||||
padding: 0.25rem 0.55rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table button:disabled {
|
|
||||||
opacity: 0.5;
|
|
||||||
border: 1px solid #ccc; /* why !important */
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .ActivitiesBox {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
position: absolute;
|
|
||||||
left: 1em;
|
|
||||||
top: 1em;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .DiceRoll {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
position: relative;
|
|
||||||
/*
|
|
||||||
left: 1rem;
|
|
||||||
top: 5rem;*/
|
|
||||||
flex-wrap: wrap;
|
|
||||||
justify-content: left;
|
|
||||||
align-items: left;
|
|
||||||
z-index: 1000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .DiceRoll div:not(:last-child) {
|
|
||||||
border: 1px solid black;
|
|
||||||
background-color: white;
|
|
||||||
padding: 0.25rem 0.5rem;
|
|
||||||
border-radius: 0.25rem;
|
|
||||||
}
|
|
||||||
.Table .DiceRoll div:last-child {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: row;
|
|
||||||
}
|
|
||||||
|
|
||||||
.Table .DiceRoll .Dice {
|
|
||||||
margin: 0.25rem;
|
|
||||||
width: 2.75rem;
|
|
||||||
height: 2.75rem;
|
|
||||||
border-radius: 0.5rem;
|
|
||||||
}
|
|
@ -191,8 +191,8 @@ const LobbyView: React.FC<LobbyProps> = (props: LobbyProps) => {
|
|||||||
sx={{
|
sx={{
|
||||||
p: { xs: 1, sm: 2 },
|
p: { xs: 1, sm: 2 },
|
||||||
m: { xs: 0, sm: 2 },
|
m: { xs: 0, sm: 2 },
|
||||||
width: { xs: "100%", sm: "fit-content" },
|
// width: { xs: "100%", sm: "fit-content" },
|
||||||
maxWidth: { xs: "100%", sm: 600 },
|
// maxWidth: { xs: "100%", sm: 600 },
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{readyState !== ReadyState.OPEN || !session ? (
|
{readyState !== ReadyState.OPEN || !session ? (
|
||||||
@ -299,7 +299,7 @@ const App = () => {
|
|||||||
<Box
|
<Box
|
||||||
sx={{
|
sx={{
|
||||||
p: { xs: 1, sm: 2 },
|
p: { xs: 1, sm: 2 },
|
||||||
maxWidth: { xs: "100%", sm: 800 },
|
// maxWidth: { xs: "100%", sm: 800 },
|
||||||
margin: "0 auto",
|
margin: "0 auto",
|
||||||
height: "100vh",
|
height: "100vh",
|
||||||
overflowY: "auto",
|
overflowY: "auto",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
.lobby-chat {
|
.lobby-chat {
|
||||||
min-width: 300px;
|
min-width: 300px;
|
||||||
max-width: 400px;
|
max-width: 100%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.chat-messages {
|
.chat-messages {
|
||||||
@ -35,9 +35,9 @@
|
|||||||
border-bottom-left-radius: 4px !important;
|
border-bottom-left-radius: 4px !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
@media (max-width: 768px) {
|
/* @media (max-width: 768px) {
|
||||||
.lobby-chat {
|
.lobby-chat {
|
||||||
min-width: 250px;
|
min-width: 250px;
|
||||||
max-width: 300px;
|
max-width: 300px;
|
||||||
}
|
}
|
||||||
}
|
} */
|
||||||
|
@ -1,32 +1,13 @@
|
|||||||
/*@media only screen and (max-height: 512px) {
|
|
||||||
html {
|
|
||||||
font-size: 6.75px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@media only screen and (min-height: 513px) and (max-height: 800px) {*/
|
|
||||||
html {
|
|
||||||
font-size: 2vh;/*10px;*/
|
|
||||||
}
|
|
||||||
/*}
|
|
||||||
|
|
||||||
@media only screen and (min-height: 2000px) {
|
|
||||||
html {
|
|
||||||
font-size: 30px;
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
html {
|
html {
|
||||||
height: 100%;
|
height: 100dvh;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
body {
|
body {
|
||||||
display: flex;
|
display: block;
|
||||||
position: relative;
|
position: relative;
|
||||||
height: 100%;
|
|
||||||
width: 100%;
|
width: 100%;
|
||||||
height: 100dvh;
|
height: 100dvh;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
|
1574
voicebot/bots/vibevoice.py
Normal file
1574
voicebot/bots/vibevoice.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -58,13 +58,153 @@ AudioArray = npt.NDArray[np.float32]
|
|||||||
ModelConfig = Dict[str, Union[str, int, bool]]
|
ModelConfig = Dict[str, Union[str, int, bool]]
|
||||||
CalibrationData = List[Dict[str, Any]]
|
CalibrationData = List[Dict[str, Any]]
|
||||||
|
|
||||||
_device = "GPU.1" # Default to Intel Arc B580 GPU
|
|
||||||
|
|
||||||
# Global lock to serialize calls into the OpenVINO model.generate/decode
|
# Global lock to serialize calls into the OpenVINO model.generate/decode
|
||||||
# since some backends are not safe for concurrent generate calls.
|
# since some backends are not safe for concurrent generate calls.
|
||||||
_generate_global_lock = threading.Lock()
|
_generate_global_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _do_generate_once(model, *args, **kwargs):
|
||||||
|
"""Submit a single generate call to the serialized worker and return result.
|
||||||
|
|
||||||
|
Raises any exception raised by the underlying generate call.
|
||||||
|
"""
|
||||||
|
return _submit_generate_to_worker(model.generate, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_generate_with_retries(model, *args, max_retries: int = 20, initial_delay: float = 0.05, **kwargs):
|
||||||
|
"""Call model.generate while handling OpenVINO 'Infer Request is busy' by retrying.
|
||||||
|
|
||||||
|
This helper retries on RuntimeError containing 'Infer Request is busy' with
|
||||||
|
exponential backoff. It raises the last exception if retries are exhausted.
|
||||||
|
"""
|
||||||
|
delay = initial_delay
|
||||||
|
last_exc = None
|
||||||
|
for attempt in range(1, max_retries + 1):
|
||||||
|
try:
|
||||||
|
# Submit the actual blocking generate to the serialized worker
|
||||||
|
return _do_generate_once(model, *args, **kwargs)
|
||||||
|
except RuntimeError as e:
|
||||||
|
last_exc = e
|
||||||
|
msg = str(e)
|
||||||
|
# Match the specific OpenVINO busy error message
|
||||||
|
if "Infer Request is busy" in msg:
|
||||||
|
logger.warning(
|
||||||
|
f"OpenVINO infer busy (attempt {attempt}/{max_retries}), retrying after {delay:.3f}s..."
|
||||||
|
)
|
||||||
|
time.sleep(delay)
|
||||||
|
delay = min(delay * 2.0, 1.0)
|
||||||
|
continue
|
||||||
|
# Not the busy error - re-raise immediately
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
raise
|
||||||
|
# Retries exhausted
|
||||||
|
logger.error(f"OpenVINO generate retries exhausted ({max_retries}) - raising last error: {last_exc}")
|
||||||
|
raise last_exc
|
||||||
|
|
||||||
|
|
||||||
|
# Global serialized generate worker to ensure OpenVINO infer requests are not
|
||||||
|
# called concurrently across threads. Some OpenVINO backends will error with
|
||||||
|
# "Infer Request is busy" if multiple infer calls overlap on the same
|
||||||
|
# compiled model; queueing here serializes calls at the process level.
|
||||||
|
_generate_queue = Queue()
|
||||||
|
_generate_worker_started = False
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_worker() -> None:
|
||||||
|
while True:
|
||||||
|
fn, args, kwargs, ev, out = _generate_queue.get()
|
||||||
|
try:
|
||||||
|
# Perform internal retries if OpenVINO reports the request as busy.
|
||||||
|
delay = 0.02
|
||||||
|
max_inner_retries = 20
|
||||||
|
last_exc = None
|
||||||
|
for attempt in range(1, max_inner_retries + 1):
|
||||||
|
try:
|
||||||
|
res = fn(*args, **kwargs)
|
||||||
|
out['result'] = res
|
||||||
|
out['exc'] = None
|
||||||
|
break
|
||||||
|
except RuntimeError as e:
|
||||||
|
last_exc = e
|
||||||
|
msg = str(e)
|
||||||
|
if "Infer Request is busy" in msg:
|
||||||
|
# log at debug to avoid noise but keep visibility
|
||||||
|
logger.debug(f"Worker: infer busy (attempt {attempt}/{max_inner_retries}), sleeping {delay:.3f}s")
|
||||||
|
time.sleep(delay)
|
||||||
|
delay = min(delay * 2.0, 1.0)
|
||||||
|
continue
|
||||||
|
# not a busy error - surface immediately
|
||||||
|
out['result'] = None
|
||||||
|
out['exc'] = e
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
out['result'] = None
|
||||||
|
out['exc'] = e
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# exhausted retries
|
||||||
|
out['result'] = None
|
||||||
|
out['exc'] = last_exc
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
ev.set()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_generate_worker() -> None:
|
||||||
|
global _generate_worker_started
|
||||||
|
if _generate_worker_started:
|
||||||
|
return
|
||||||
|
t = threading.Thread(target=_generate_worker, daemon=True)
|
||||||
|
t.start()
|
||||||
|
_generate_worker_started = True
|
||||||
|
|
||||||
|
|
||||||
|
def _submit_generate_to_worker(fn, *args, **kwargs):
|
||||||
|
"""Submit a blocking generate fn to the serialized worker and wait for result."""
|
||||||
|
_ensure_generate_worker()
|
||||||
|
ev = threading.Event()
|
||||||
|
out: Dict[str, Any] = {}
|
||||||
|
_generate_queue.put((fn, args, kwargs, ev, out))
|
||||||
|
ev.wait()
|
||||||
|
if out.get('exc'):
|
||||||
|
raise out['exc']
|
||||||
|
return out.get('result')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def _safe_generate_with_retries_async(model, *args, max_retries: int = 20, initial_delay: float = 0.05, **kwargs):
|
||||||
|
"""Async variant of the generate retry helper that uses asyncio.sleep.
|
||||||
|
|
||||||
|
Should be awaited from asynchronous contexts to avoid blocking the event loop.
|
||||||
|
"""
|
||||||
|
delay = initial_delay
|
||||||
|
last_exc = None
|
||||||
|
for attempt in range(1, max_retries + 1):
|
||||||
|
try:
|
||||||
|
# Delegate to the serialized worker in an executor so the event loop
|
||||||
|
# isn't blocked waiting on the worker event.
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
return await loop.run_in_executor(None, lambda: _do_generate_once(model, *args, **kwargs))
|
||||||
|
except RuntimeError as e:
|
||||||
|
last_exc = e
|
||||||
|
msg = str(e)
|
||||||
|
if "Infer Request is busy" in msg:
|
||||||
|
logger.warning(
|
||||||
|
f"OpenVINO infer busy (async attempt {attempt}/{max_retries}), retrying after {delay:.3f}s..."
|
||||||
|
)
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
delay = min(delay * 2.0, 1.0)
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
raise
|
||||||
|
logger.error(f"OpenVINO async generate retries exhausted ({max_retries}) - raising last error: {last_exc}")
|
||||||
|
raise last_exc
|
||||||
|
|
||||||
|
|
||||||
def get_available_devices() -> list[dict[str, Any]]:
|
def get_available_devices() -> list[dict[str, Any]]:
|
||||||
"""List available OpenVINO devices with their properties."""
|
"""List available OpenVINO devices with their properties."""
|
||||||
try:
|
try:
|
||||||
@ -125,9 +265,27 @@ def print_available_devices(device: str | None = None):
|
|||||||
logger.info(f" Type: {d.get('type')}")
|
logger.info(f" Type: {d.get('type')}")
|
||||||
|
|
||||||
|
|
||||||
|
def find_best_device(preferred_type: str = "DISCRETE") -> str:
|
||||||
|
"""Find the best available OpenVINO device, preferring the specified type (e.g., 'DISCRETE', 'INTEGRATED', 'CPU', 'GPU')."""
|
||||||
|
devices = get_available_devices()
|
||||||
|
if not devices:
|
||||||
|
logger.warning("No OpenVINO devices found, defaulting to CPU")
|
||||||
|
return "CPU"
|
||||||
|
for d in devices:
|
||||||
|
device_type = str(d.get("type", "")).upper()
|
||||||
|
if device_type == preferred_type.upper():
|
||||||
|
logger.info(f"Using preferred device: {preferred_type}")
|
||||||
|
return d.get("name", "CPU")
|
||||||
|
logger.info("Preferred device not found, using first available device")
|
||||||
|
return devices[0].get("name", "CPU")
|
||||||
|
|
||||||
|
_device = find_best_device(preferred_type="Type.DISCRETE")
|
||||||
|
|
||||||
print_available_devices(_device)
|
print_available_devices(_device)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class AudioQueueItem(BaseModel):
|
class AudioQueueItem(BaseModel):
|
||||||
"""Audio data with timestamp for processing queue."""
|
"""Audio data with timestamp for processing queue."""
|
||||||
|
|
||||||
@ -536,11 +694,39 @@ class OpenVINOWhisperModel:
|
|||||||
logger.info("Whisper processor loaded successfully")
|
logger.info("Whisper processor loaded successfully")
|
||||||
|
|
||||||
# Export the model to OpenVINO IR if not already converted
|
# Export the model to OpenVINO IR if not already converted
|
||||||
self.ov_model = OVModelForSpeechSeq2Seq.from_pretrained( # type: ignore
|
try:
|
||||||
self.model_id, export=True, device=self.device
|
self.ov_model = OVModelForSpeechSeq2Seq.from_pretrained( # type: ignore
|
||||||
) # type: ignore
|
self.model_id, export=True, device=self.device
|
||||||
|
) # type: ignore
|
||||||
|
logger.info("Whisper model exported as OpenVINO IR")
|
||||||
|
except Exception as export_e:
|
||||||
|
logger.warning(f"Initial OpenVINO export failed: {export_e}")
|
||||||
|
# Retry using processor-derived example_inputs if possible
|
||||||
|
try:
|
||||||
|
if self.processor is None:
|
||||||
|
self.processor = WhisperProcessor.from_pretrained(self.model_id, use_fast=True) # type: ignore
|
||||||
|
dummy_audio = np.random.randn(16000).astype(np.float32)
|
||||||
|
try:
|
||||||
|
example_inputs = self.processor(# type: ignore
|
||||||
|
dummy_audio, sampling_rate=16000, return_tensors="pt"
|
||||||
|
).input_features # type: ignore
|
||||||
|
except Exception as ex_inputs:
|
||||||
|
logger.warning(f"Failed to generate example_inputs for export retry: {ex_inputs}")
|
||||||
|
example_inputs = None
|
||||||
|
|
||||||
logger.info("Whisper model exported as OpenVINO IR")
|
if example_inputs is not None:
|
||||||
|
self.ov_model = OVModelForSpeechSeq2Seq.from_pretrained( # type: ignore
|
||||||
|
self.model_id, export=True, device=self.device, example_inputs=example_inputs
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.ov_model = OVModelForSpeechSeq2Seq.from_pretrained( # type: ignore
|
||||||
|
self.model_id, export=True, device=self.device
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("Whisper model exported as OpenVINO IR (retry with example_inputs)")
|
||||||
|
except Exception as retry_export_e:
|
||||||
|
logger.error(f"Export retry failed: {retry_export_e}")
|
||||||
|
raise
|
||||||
|
|
||||||
# # Try to load quantized model first if it exists
|
# # Try to load quantized model first if it exists
|
||||||
# if self.config.enable_quantization and self.quantized_model_path.exists():
|
# if self.config.enable_quantization and self.quantized_model_path.exists():
|
||||||
@ -599,6 +785,60 @@ class OpenVINOWhisperModel:
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Model conversion failed: {e}")
|
logger.error(f"Model conversion failed: {e}")
|
||||||
|
# If conversion failed due to example_input / tracing mismatch
|
||||||
|
# try converting again by providing a correctly-shaped example
|
||||||
|
# input derived from the Whisper processor. This can resolve
|
||||||
|
# mismatches between the default example and model signatures.
|
||||||
|
try:
|
||||||
|
logger.info("Retrying conversion with processor-derived example_inputs...")
|
||||||
|
if self.processor is None:
|
||||||
|
# Ensure processor is available
|
||||||
|
self.processor = WhisperProcessor.from_pretrained(self.model_id, use_fast=True) # type: ignore
|
||||||
|
|
||||||
|
# Create a short dummy audio (1s) to produce input_features
|
||||||
|
try:
|
||||||
|
dummy_audio = np.random.randn(16000).astype(np.float32)
|
||||||
|
example_inputs = self.processor(# type: ignore
|
||||||
|
dummy_audio, sampling_rate=16000, return_tensors="pt"
|
||||||
|
).input_features # type: ignore
|
||||||
|
except Exception as ex_inputs:
|
||||||
|
logger.warning(f"Failed to generate example_inputs from processor: {ex_inputs}")
|
||||||
|
example_inputs = None
|
||||||
|
|
||||||
|
# Attempt conversion again, supplying example_inputs if available
|
||||||
|
if example_inputs is not None:
|
||||||
|
ov_model = OVModelForSpeechSeq2Seq.from_pretrained( # type: ignore
|
||||||
|
self.model_id,
|
||||||
|
ov_config=self.config.to_ov_config(),
|
||||||
|
export=True,
|
||||||
|
compile=False,
|
||||||
|
example_inputs=example_inputs,
|
||||||
|
load_in_8bit=False,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
ov_model = OVModelForSpeechSeq2Seq.from_pretrained( # type: ignore
|
||||||
|
self.model_id,
|
||||||
|
ov_config=self.config.to_ov_config(),
|
||||||
|
export=True,
|
||||||
|
compile=False,
|
||||||
|
load_in_8bit=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
if hasattr(ov_model, 'half'):
|
||||||
|
ov_model.half() # type: ignore
|
||||||
|
ov_model.save_pretrained(self.model_path) # type: ignore
|
||||||
|
logger.info("Model converted and saved in FP16 format (retry with example_inputs)")
|
||||||
|
self.ov_model = ov_model # type: ignore
|
||||||
|
self._compile_model()
|
||||||
|
return
|
||||||
|
except TypeError as te:
|
||||||
|
# from_pretrained may not accept example_inputs in some versions
|
||||||
|
logger.warning(f"Conversion retry with example_inputs not supported: {te}")
|
||||||
|
except Exception as retry_e:
|
||||||
|
logger.warning(f"Retry conversion with example_inputs failed: {retry_e}")
|
||||||
|
|
||||||
|
# If all conversion attempts fail, propagate to fallback path
|
||||||
|
logger.warning("Falling back to basic conversion without advanced export options")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _convert_model_basic(self) -> None:
|
def _convert_model_basic(self) -> None:
|
||||||
@ -816,8 +1056,8 @@ class OpenVINOWhisperModel:
|
|||||||
) # type: ignore
|
) # type: ignore
|
||||||
|
|
||||||
# Run inference to collect calibration data
|
# Run inference to collect calibration data
|
||||||
_ = self.ov_model.generate( # type: ignore
|
_ = _safe_generate_with_retries( # type: ignore
|
||||||
inputs.input_features, max_new_tokens=10 # type: ignore
|
self.ov_model, inputs.input_features, max_new_tokens=10
|
||||||
)
|
)
|
||||||
|
|
||||||
if i % 5 == 0:
|
if i % 5 == 0:
|
||||||
@ -957,7 +1197,7 @@ class OpenVINOWhisperModel:
|
|||||||
|
|
||||||
# Run warmup iterations
|
# Run warmup iterations
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
_ = self.ov_model.generate(dummy_features, max_new_tokens=10)# type: ignore
|
_ = _safe_generate_with_retries(self.ov_model, dummy_features, max_new_tokens=10) # type: ignore
|
||||||
if i == 0:
|
if i == 0:
|
||||||
logger.debug("First warmup iteration completed")
|
logger.debug("First warmup iteration completed")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -1482,9 +1722,7 @@ class OptimizedAudioProcessor:
|
|||||||
# Serialize access to the underlying OpenVINO generation call
|
# Serialize access to the underlying OpenVINO generation call
|
||||||
# to avoid concurrency problems with the OpenVINO runtime.
|
# to avoid concurrency problems with the OpenVINO runtime.
|
||||||
with _generate_global_lock:
|
with _generate_global_lock:
|
||||||
gen_out = ov_model.ov_model.generate(# type: ignore
|
gen_out = _safe_generate_with_retries(ov_model.ov_model, input_features, generation_config=gen_cfg) # type: ignore
|
||||||
input_features, generation_config=gen_cfg# type: ignore
|
|
||||||
)
|
|
||||||
|
|
||||||
# Try to extract sequences if present
|
# Try to extract sequences if present
|
||||||
if hasattr(gen_out, "sequences"): # type: ignore
|
if hasattr(gen_out, "sequences"): # type: ignore
|
||||||
@ -1886,9 +2124,8 @@ class OptimizedAudioProcessor:
|
|||||||
logger.info(f"{self.peer_name}: calling model.generate (async lock) (final)")
|
logger.info(f"{self.peer_name}: calling model.generate (async lock) (final)")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"{self.peer_name}: calling model.generate (async lock)")
|
logger.debug(f"{self.peer_name}: calling model.generate (async lock)")
|
||||||
generation_output = ov_model.ov_model.generate( # type: ignore
|
# Use async-safe retry wrapper to avoid blocking event loop
|
||||||
input_features, generation_config=generation_config
|
generation_output = await _safe_generate_with_retries_async(ov_model.ov_model, input_features, generation_config=generation_config) # type: ignore
|
||||||
)
|
|
||||||
finally:
|
finally:
|
||||||
self._generate_lock.release()
|
self._generate_lock.release()
|
||||||
elif hasattr(self, "_generate_lock") and isinstance(self._generate_lock, threading.Lock):
|
elif hasattr(self, "_generate_lock") and isinstance(self._generate_lock, threading.Lock):
|
||||||
@ -1897,17 +2134,13 @@ class OptimizedAudioProcessor:
|
|||||||
logger.info(f"{self.peer_name}: calling model.generate (thread lock) (final)")
|
logger.info(f"{self.peer_name}: calling model.generate (thread lock) (final)")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"{self.peer_name}: calling model.generate (thread lock)")
|
logger.debug(f"{self.peer_name}: calling model.generate (thread lock)")
|
||||||
generation_output = ov_model.ov_model.generate( # type: ignore
|
generation_output = _safe_generate_with_retries(ov_model.ov_model, input_features, generation_config=generation_config) # type: ignore
|
||||||
input_features, generation_config=generation_config
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
if is_final:
|
if is_final:
|
||||||
logger.info(f"{self.peer_name}: calling model.generate (no lock) (final)")
|
logger.info(f"{self.peer_name}: calling model.generate (no lock) (final)")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"{self.peer_name}: calling model.generate (no lock)")
|
logger.debug(f"{self.peer_name}: calling model.generate (no lock)")
|
||||||
generation_output = ov_model.ov_model.generate( # type: ignore
|
generation_output = _safe_generate_with_retries(ov_model.ov_model, input_features, generation_config=generation_config) # type: ignore
|
||||||
input_features, generation_config=generation_config
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_final:
|
if is_final:
|
||||||
logger.info(f"{self.peer_name}: model.generate complete (final) (type={type(generation_output)})")
|
logger.info(f"{self.peer_name}: model.generate complete (final) (type={type(generation_output)})")
|
||||||
@ -2686,7 +2919,7 @@ def get_config_schema() -> Dict[str, Any]:
|
|||||||
"default_value": _device,
|
"default_value": _device,
|
||||||
"required": True,
|
"required": True,
|
||||||
"options": [
|
"options": [
|
||||||
{"value": "GPU.1", "label": "Intel Arc GPU (GPU.1)"},
|
# {"value": "GPU.1", "label": "Intel Arc GPU (GPU.1)"},
|
||||||
{"value": "GPU", "label": "GPU"},
|
{"value": "GPU", "label": "GPU"},
|
||||||
{"value": "CPU", "label": "CPU"}
|
{"value": "CPU", "label": "CPU"}
|
||||||
]
|
]
|
||||||
@ -2959,7 +3192,7 @@ def handle_config_update(lobby_id: str, config_values: Dict[str, Any]) -> bool:
|
|||||||
if "device" in config_values:
|
if "device" in config_values:
|
||||||
new_device = config_values["device"] # type: ignore
|
new_device = config_values["device"] # type: ignore
|
||||||
available_devices = [d["name"] for d in get_available_devices()]
|
available_devices = [d["name"] for d in get_available_devices()]
|
||||||
if new_device in available_devices or new_device in ["CPU", "GPU", "GPU.1"]:
|
if new_device in available_devices or new_device in ["CPU", "GPU"]:#, "GPU.1"]:
|
||||||
_device = new_device
|
_device = new_device
|
||||||
_ov_config.device = new_device
|
_ov_config.device = new_device
|
||||||
config_applied = True
|
config_applied = True
|
||||||
|
@ -1,175 +1,183 @@
|
|||||||
about-time
|
about-time==4.2.1
|
||||||
aiofiles
|
absl-py==2.3.1
|
||||||
aiohappyeyeballs
|
accelerate==1.6.0
|
||||||
aiohttp
|
aiofiles==24.1.0
|
||||||
aioice
|
aiohappyeyeballs==2.6.1
|
||||||
aiortc
|
aiohttp==3.12.15
|
||||||
aiosignal
|
aioice==0.10.1
|
||||||
alive-progress
|
aiortc==1.13.0
|
||||||
annotated-types
|
aiosignal==1.4.0
|
||||||
anthropic
|
alive-progress==3.2.0
|
||||||
anyio
|
annotated-types==0.7.0
|
||||||
attrs
|
anthropic==0.67.0
|
||||||
audioread
|
anyio==4.10.0
|
||||||
autograd
|
attrs==25.3.0
|
||||||
av
|
audioread==3.0.1
|
||||||
brotli
|
autograd==1.8.0
|
||||||
certifi
|
av==14.4.0
|
||||||
cffi
|
brotli==1.1.0
|
||||||
charset-normalizer
|
certifi==2025.8.3
|
||||||
click
|
cffi==2.0.0
|
||||||
cma
|
charset-normalizer==3.4.3
|
||||||
contourpy
|
click==8.2.1
|
||||||
cryptography
|
cma==4.3.0
|
||||||
cycler
|
contourpy==1.3.3
|
||||||
datasets
|
cryptography==45.0.7
|
||||||
decorator
|
cycler==0.12.1
|
||||||
deprecated
|
datasets==4.1.0
|
||||||
dill
|
decorator==5.2.1
|
||||||
distro
|
deprecated==1.2.18
|
||||||
dnspython
|
diffusers==0.35.1
|
||||||
fastapi
|
dill==0.4.0
|
||||||
ffmpy
|
distro==1.9.0
|
||||||
filelock
|
dnspython==2.8.0
|
||||||
fonttools
|
fastapi==0.116.1
|
||||||
frozenlist
|
ffmpy==0.6.1
|
||||||
fsspec
|
filelock==3.19.1
|
||||||
google-crc32c
|
fonttools==4.59.2
|
||||||
gradio
|
frozenlist==1.7.0
|
||||||
gradio-client
|
fsspec==2025.9.0
|
||||||
grapheme
|
google-crc32c==1.7.1
|
||||||
graphemeu
|
gradio==5.45.0
|
||||||
groovy
|
gradio-client==1.13.0
|
||||||
h11
|
grapheme==0.6.0
|
||||||
hf-xet
|
graphemeu==0.8.0
|
||||||
httpcore
|
groovy==0.1.2
|
||||||
httpx
|
h11==0.16.0
|
||||||
huggingface-hub
|
hf-xet==1.1.10
|
||||||
idna
|
httpcore==1.0.9
|
||||||
ifaddr
|
httpx==0.28.1
|
||||||
iniconfig
|
huggingface-hub==0.34.5
|
||||||
jinja2
|
idna==3.10
|
||||||
jiter
|
ifaddr==0.2.0
|
||||||
jiwer
|
importlib-metadata==8.7.0
|
||||||
joblib
|
iniconfig==2.1.0
|
||||||
jsonschema
|
jinja2==3.1.6
|
||||||
jsonschema-specifications
|
jiter==0.11.0
|
||||||
kiwisolver
|
jiwer==4.0.0
|
||||||
lazy-loader
|
joblib==1.5.2
|
||||||
librosa
|
jsonschema==4.25.1
|
||||||
llvmlite
|
jsonschema-specifications==2025.9.1
|
||||||
markdown-it-py
|
kiwisolver==1.4.9
|
||||||
markupsafe
|
lazy-loader==0.4
|
||||||
matplotlib
|
librosa==0.11.0
|
||||||
mdurl
|
llvmlite==0.44.0
|
||||||
ml-dtypes
|
markdown-it-py==4.0.0
|
||||||
more-itertools
|
markupsafe==3.0.2
|
||||||
mpmath
|
matplotlib==3.10.6
|
||||||
msgpack
|
mdurl==0.1.2
|
||||||
multidict
|
ml-collections==1.1.0
|
||||||
multiprocess
|
ml-dtypes==0.5.3
|
||||||
natsort
|
more-itertools==10.8.0
|
||||||
networkx
|
mpmath==1.3.0
|
||||||
ninja
|
msgpack==1.1.1
|
||||||
nncf
|
multidict==6.6.4
|
||||||
numba
|
multiprocess==0.70.16
|
||||||
numpy
|
natsort==8.4.0
|
||||||
nvidia-cublas-cu12
|
networkx==3.4.2
|
||||||
nvidia-cuda-cupti-cu12
|
ninja==1.13.0
|
||||||
nvidia-cuda-nvrtc-cu12
|
nncf==2.18.0
|
||||||
nvidia-cuda-runtime-cu12
|
numba==0.61.2
|
||||||
nvidia-cudnn-cu12
|
numpy==2.2.6
|
||||||
nvidia-cufft-cu12
|
nvidia-cublas-cu12==12.8.4.1
|
||||||
nvidia-cufile-cu12
|
nvidia-cuda-cupti-cu12==12.8.90
|
||||||
nvidia-curand-cu12
|
nvidia-cuda-nvrtc-cu12==12.8.93
|
||||||
nvidia-cusolver-cu12
|
nvidia-cuda-runtime-cu12==12.8.90
|
||||||
nvidia-cusparse-cu12
|
nvidia-cudnn-cu12==9.10.2.21
|
||||||
nvidia-cusparselt-cu12
|
nvidia-cufft-cu12==11.3.3.83
|
||||||
nvidia-nccl-cu12
|
nvidia-cufile-cu12==1.13.1.3
|
||||||
nvidia-nvjitlink-cu12
|
nvidia-curand-cu12==10.3.9.90
|
||||||
nvidia-nvtx-cu12
|
nvidia-cusolver-cu12==11.7.3.90
|
||||||
onnx
|
nvidia-cusparse-cu12==12.5.8.93
|
||||||
openai
|
nvidia-cusparselt-cu12==0.7.1
|
||||||
|
nvidia-nccl-cu12==2.27.3
|
||||||
|
nvidia-nvjitlink-cu12==12.8.93
|
||||||
|
nvidia-nvtx-cu12==12.8.90
|
||||||
|
onnx==1.19.0
|
||||||
|
openai==1.107.2
|
||||||
openai-whisper @ git+https://github.com/openai/whisper.git@c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
|
openai-whisper @ git+https://github.com/openai/whisper.git@c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
|
||||||
opencv-python
|
opencv-python==4.12.0.88
|
||||||
openvino
|
openvino==2025.3.0
|
||||||
openvino-genai
|
openvino-genai==2025.3.0.0
|
||||||
openvino-telemetry
|
openvino-telemetry==2025.2.0
|
||||||
openvino-tokenizers
|
openvino-tokenizers==2025.3.0.0
|
||||||
optimum
|
optimum==1.27.0
|
||||||
optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@b9c151fec6b414d9ca78be8643d08e267b133bfc
|
optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@b9c151fec6b414d9ca78be8643d08e267b133bfc
|
||||||
orjson
|
orjson==3.11.3
|
||||||
packaging
|
packaging==25.0
|
||||||
pandas
|
pandas==2.3.2
|
||||||
pillow
|
peft==0.17.1
|
||||||
platformdirs
|
pillow==11.3.0
|
||||||
pluggy
|
platformdirs==4.4.0
|
||||||
pooch
|
pluggy==1.6.0
|
||||||
propcache
|
pooch==1.8.2
|
||||||
protobuf
|
propcache==0.3.2
|
||||||
psutil
|
protobuf==6.32.1
|
||||||
pyarrow
|
psutil==7.0.0
|
||||||
pycparser
|
pyarrow==21.0.0
|
||||||
pydantic
|
pycparser==2.23
|
||||||
pydantic-core
|
pydantic==2.11.9
|
||||||
pydot
|
pydantic-core==2.33.2
|
||||||
pydub
|
pydot==3.0.4
|
||||||
pyee
|
pydub==0.25.1
|
||||||
pygments
|
pyee==13.0.0
|
||||||
pylibsrtp
|
pygments==2.19.2
|
||||||
pymoo
|
pylibsrtp==0.12.0
|
||||||
pyopencl
|
pymoo==0.6.1.5
|
||||||
pyopenssl
|
pyopencl==2025.2.6
|
||||||
pyparsing
|
pyopenssl==25.2.0
|
||||||
pytest
|
pyparsing==3.2.4
|
||||||
pytest-asyncio
|
pytest==8.4.2
|
||||||
python-dateutil
|
pytest-asyncio==1.2.0
|
||||||
python-ffmpeg
|
python-dateutil==2.9.0.post0
|
||||||
python-multipart
|
python-ffmpeg==2.0.12
|
||||||
pytools
|
python-multipart==0.0.20
|
||||||
pytz
|
pytools==2025.2.4
|
||||||
pyyaml
|
pytz==2025.2
|
||||||
rapidfuzz
|
pyyaml==6.0.2
|
||||||
referencing
|
rapidfuzz==3.14.1
|
||||||
regex
|
referencing==0.36.2
|
||||||
requests
|
regex==2025.9.1
|
||||||
resampy
|
requests==2.32.5
|
||||||
rich
|
resampy==0.4.3
|
||||||
rpds-py
|
rich==14.1.0
|
||||||
ruff
|
rpds-py==0.27.1
|
||||||
safehttpx
|
ruff==0.13.0
|
||||||
safetensors
|
safehttpx==0.1.6
|
||||||
scikit-learn
|
safetensors==0.6.2
|
||||||
scipy
|
scikit-learn==1.7.2
|
||||||
semantic-version
|
scipy==1.16.2
|
||||||
setuptools
|
semantic-version==2.10.0
|
||||||
shellingham
|
setuptools==80.9.0
|
||||||
siphash24
|
shellingham==1.5.4
|
||||||
six
|
siphash24==1.8
|
||||||
sniffio
|
six==1.17.0
|
||||||
soundfile
|
sniffio==1.3.1
|
||||||
soxr
|
soundfile==0.13.1
|
||||||
speechrecognition
|
soxr==1.0.0
|
||||||
starlette
|
speechrecognition==3.14.3
|
||||||
sympy
|
starlette==0.47.3
|
||||||
tabulate
|
sympy==1.14.0
|
||||||
threadpoolctl
|
tabulate==0.9.0
|
||||||
tiktoken
|
threadpoolctl==3.6.0
|
||||||
tokenizers
|
tiktoken==0.11.0
|
||||||
tomlkit
|
tokenizers==0.21.4
|
||||||
torch
|
tomlkit==0.13.3
|
||||||
torchvision
|
torch==2.8.0
|
||||||
tqdm
|
torchvision==0.23.0
|
||||||
transformers
|
tqdm==4.67.1
|
||||||
triton
|
transformers==4.53.3
|
||||||
typer
|
triton==3.4.0
|
||||||
typing-extensions
|
typer==0.17.4
|
||||||
typing-inspection
|
typing-extensions==4.15.0
|
||||||
tzdata
|
typing-inspection==0.4.1
|
||||||
urllib3
|
tzdata==2025.2
|
||||||
uvicorn
|
urllib3==2.5.0
|
||||||
watchdog
|
uvicorn==0.35.0
|
||||||
websockets
|
-e file:///voicebot/VibeVoice
|
||||||
wrapt
|
watchdog==6.0.0
|
||||||
xxhash
|
websockets==15.0.1
|
||||||
yarl
|
wrapt==1.17.3
|
||||||
|
xxhash==3.5.0
|
||||||
|
yarl==1.20.1
|
||||||
|
zipp==3.23.0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user