diff --git a/.env.example b/.env.example index 4f98eb8..06ad168 100644 --- a/.env.example +++ b/.env.example @@ -12,6 +12,7 @@ LOCAL_AI_CACHE_DIR=.local-ai/cache # Windows면 `python` 또는 `py -3` LOCAL_AI_PYTHON= LOCAL_STT_MODEL=tiny +# CUDA dll 오류가 나면 `cpu` LOCAL_STT_DEVICE=auto LOCAL_STT_COMPUTE_TYPE=auto LOCAL_STT_BEAM_SIZE=1 diff --git a/README.md b/README.md index 776d6b0..2b604e2 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,14 @@ Discord 모드에서만 필수: - `DEBUG_TEXT_EVENTS` - `true`면 transcript/reply를 콘솔에 같이 출력 +Windows에서 STT 시작 시 `cublas64_12.dll` 같은 CUDA 오류가 나면: + +```env +LOCAL_STT_DEVICE=cpu +``` + +로 두면 바로 우회됩니다. 최신 코드는 `auto`일 때도 가능한 경우 CPU로 자동 fallback 합니다. + ## 속도 우선 기본값 - STT 기본 모델은 `tiny` diff --git a/python/local_stt_worker.py b/python/local_stt_worker.py index a94783f..fd910d3 100644 --- a/python/local_stt_worker.py +++ b/python/local_stt_worker.py @@ -58,14 +58,36 @@ class SttWorker: from faster_whisper import WhisperModel self.model_name = os.environ.get("LOCAL_STT_MODEL", "tiny").strip() or "tiny" - self.device = resolve_device() - self.compute_type = resolve_compute_type(self.device) + requested_device = resolve_device() + requested_compute_type = resolve_compute_type(requested_device) self.beam_size = int(os.environ.get("LOCAL_STT_BEAM_SIZE", "1")) - self.model = WhisperModel( - self.model_name, - device=self.device, - compute_type=self.compute_type, - ) + auto_requested = os.environ.get("LOCAL_STT_DEVICE", "auto").strip().lower() in {"", "auto"} + + try: + self.model = WhisperModel( + self.model_name, + device=requested_device, + compute_type=requested_compute_type, + ) + self.device = requested_device + self.compute_type = requested_compute_type + except RuntimeError as exc: + lowered = str(exc).lower() + should_fallback = auto_requested and requested_device == "cuda" and any( + token in lowered for token in ("cublas", "cudnn", "cuda") + ) + if not should_fallback: + raise + + log("CUDA runtime is incomplete; falling back to CPU STT") + self.model = WhisperModel( + self.model_name, + device="cpu", + compute_type=resolve_compute_type("cpu"), + ) + self.device = "cpu" + self.compute_type = resolve_compute_type("cpu") + log( f"local-stt ready model={self.model_name} device={self.device} compute={self.compute_type} beam={self.beam_size}" )