import base64 import json import os import sys import site import traceback from typing import Any from pathlib import Path import numpy as np from faster_whisper import WhisperModel def configure_windows_dll_search_paths() -> list[str]: if sys.platform != "win32": return [] candidates: list[Path] = [] executable_dir = Path(sys.executable).resolve().parent venv_root = executable_dir.parent candidates.extend( [ venv_root / "Lib" / "site-packages" / "nvidia" / "cublas" / "bin", venv_root / "Lib" / "site-packages" / "nvidia" / "cudnn" / "bin", ] ) for package_path in site.getsitepackages(): base = Path(package_path) candidates.extend( [ base / "nvidia" / "cublas" / "bin", base / "nvidia" / "cudnn" / "bin", ] ) added: list[str] = [] seen: set[str] = set() for candidate in candidates: normalized = str(candidate.resolve()) if normalized in seen: continue seen.add(normalized) if not candidate.exists(): continue os.add_dll_directory(normalized) if normalized not in os.environ.get("PATH", ""): os.environ["PATH"] = normalized + os.pathsep + os.environ.get("PATH", "") added.append(normalized) return added os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1") CONFIGURED_DLL_PATHS = configure_windows_dll_search_paths() if CONFIGURED_DLL_PATHS: print( f"configured CUDA DLL search paths: {', '.join(CONFIGURED_DLL_PATHS)}", file=sys.stderr, flush=True, ) def resolve_model() -> WhisperModel: model_name = os.environ.get("WHISPER_MODEL", "large-v3-turbo") requested_device = os.environ.get("WHISPER_DEVICE", "auto") requested_compute = os.environ.get("WHISPER_COMPUTE_TYPE", "auto") attempts: list[tuple[str, str]] = [] if requested_device == "auto": if requested_compute == "auto": attempts.extend( [ ("cuda", "float16"), ("cuda", "int8_float16"), ("cpu", "int8"), ("cpu", "float32"), ] ) else: attempts.extend( [ ("cuda", requested_compute), ("cpu", requested_compute), ] ) else: if requested_compute == "auto": compute = "float16" if requested_device == "cuda" else "int8" else: compute = requested_compute attempts.append((requested_device, compute)) last_error: Exception | None = None for device, compute_type in attempts: try: model = WhisperModel(model_name, device=device, compute_type=compute_type) setattr(model, "_resolved_device", device) setattr(model, "_resolved_compute_type", compute_type) return model except Exception as error: # noqa: BLE001 last_error = error assert last_error is not None raise last_error MODEL = resolve_model() LANGUAGE = os.environ.get("WHISPER_LANGUAGE", "ko") BEAM_SIZE = int(os.environ.get("WHISPER_BEAM_SIZE", "1")) def write(payload: dict[str, Any]) -> None: sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n") sys.stdout.flush() def transcribe_pcm16_base64(pcm16_base64: str) -> str: audio_bytes = base64.b64decode(pcm16_base64) audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0 segments, _info = MODEL.transcribe( audio, language=LANGUAGE, task="transcribe", beam_size=BEAM_SIZE, condition_on_previous_text=False, vad_filter=False, without_timestamps=True, word_timestamps=False, temperature=0.0, ) text_parts: list[str] = [] for segment in segments: if segment.text: text_parts.append(segment.text.strip()) return " ".join(part for part in text_parts if part).strip() for raw_line in sys.stdin: line = raw_line.strip() if not line: continue request = json.loads(line) request_id = request["id"] method = request["method"] params = request.get("params", {}) try: if method == "ping": write( { "id": request_id, "result": { "model": os.environ.get("WHISPER_MODEL", "large-v3-turbo"), "device": getattr(MODEL, "_resolved_device", "unknown"), "compute_type": getattr(MODEL, "_resolved_compute_type", "unknown"), }, } ) continue if method == "transcribe": text = transcribe_pcm16_base64(params["pcm16_base64"]) write( { "id": request_id, "result": { "text": text, }, } ) continue raise RuntimeError(f"unknown method: {method}") except Exception as error: # noqa: BLE001 traceback.print_exc(file=sys.stderr) write( { "id": request_id, "error": f"{type(error).__name__}: {error}", } )