Fallback to CPU when CUDA STT runtime is missing

This commit is contained in:
2026-04-30 17:52:59 +09:00
parent ab4e0b38b0
commit 60dce65b0f
3 changed files with 38 additions and 7 deletions

View File

@@ -12,6 +12,7 @@ LOCAL_AI_CACHE_DIR=.local-ai/cache
# Windows면 `python` 또는 `py -3` # Windows면 `python` 또는 `py -3`
LOCAL_AI_PYTHON= LOCAL_AI_PYTHON=
LOCAL_STT_MODEL=tiny LOCAL_STT_MODEL=tiny
# CUDA dll 오류가 나면 `cpu`
LOCAL_STT_DEVICE=auto LOCAL_STT_DEVICE=auto
LOCAL_STT_COMPUTE_TYPE=auto LOCAL_STT_COMPUTE_TYPE=auto
LOCAL_STT_BEAM_SIZE=1 LOCAL_STT_BEAM_SIZE=1

View File

@@ -118,6 +118,14 @@ Discord 모드에서만 필수:
- `DEBUG_TEXT_EVENTS` - `DEBUG_TEXT_EVENTS`
- `true`면 transcript/reply를 콘솔에 같이 출력 - `true`면 transcript/reply를 콘솔에 같이 출력
Windows에서 STT 시작 시 `cublas64_12.dll` 같은 CUDA 오류가 나면:
```env
LOCAL_STT_DEVICE=cpu
```
로 두면 바로 우회됩니다. 최신 코드는 `auto`일 때도 가능한 경우 CPU로 자동 fallback 합니다.
## 속도 우선 기본값 ## 속도 우선 기본값
- STT 기본 모델은 `tiny` - STT 기본 모델은 `tiny`

View File

@@ -58,14 +58,36 @@ class SttWorker:
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
self.model_name = os.environ.get("LOCAL_STT_MODEL", "tiny").strip() or "tiny" self.model_name = os.environ.get("LOCAL_STT_MODEL", "tiny").strip() or "tiny"
self.device = resolve_device() requested_device = resolve_device()
self.compute_type = resolve_compute_type(self.device) requested_compute_type = resolve_compute_type(requested_device)
self.beam_size = int(os.environ.get("LOCAL_STT_BEAM_SIZE", "1")) self.beam_size = int(os.environ.get("LOCAL_STT_BEAM_SIZE", "1"))
auto_requested = os.environ.get("LOCAL_STT_DEVICE", "auto").strip().lower() in {"", "auto"}
try:
self.model = WhisperModel( self.model = WhisperModel(
self.model_name, self.model_name,
device=self.device, device=requested_device,
compute_type=self.compute_type, compute_type=requested_compute_type,
) )
self.device = requested_device
self.compute_type = requested_compute_type
except RuntimeError as exc:
lowered = str(exc).lower()
should_fallback = auto_requested and requested_device == "cuda" and any(
token in lowered for token in ("cublas", "cudnn", "cuda")
)
if not should_fallback:
raise
log("CUDA runtime is incomplete; falling back to CPU STT")
self.model = WhisperModel(
self.model_name,
device="cpu",
compute_type=resolve_compute_type("cpu"),
)
self.device = "cpu"
self.compute_type = resolve_compute_type("cpu")
log( log(
f"local-stt ready model={self.model_name} device={self.device} compute={self.compute_type} beam={self.beam_size}" f"local-stt ready model={self.model_name} device={self.device} compute={self.compute_type} beam={self.beam_size}"
) )