Fallback to CPU when CUDA STT runtime is missing
This commit is contained in:
@@ -58,14 +58,36 @@ class SttWorker:
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
self.model_name = os.environ.get("LOCAL_STT_MODEL", "tiny").strip() or "tiny"
|
||||
self.device = resolve_device()
|
||||
self.compute_type = resolve_compute_type(self.device)
|
||||
requested_device = resolve_device()
|
||||
requested_compute_type = resolve_compute_type(requested_device)
|
||||
self.beam_size = int(os.environ.get("LOCAL_STT_BEAM_SIZE", "1"))
|
||||
self.model = WhisperModel(
|
||||
self.model_name,
|
||||
device=self.device,
|
||||
compute_type=self.compute_type,
|
||||
)
|
||||
auto_requested = os.environ.get("LOCAL_STT_DEVICE", "auto").strip().lower() in {"", "auto"}
|
||||
|
||||
try:
|
||||
self.model = WhisperModel(
|
||||
self.model_name,
|
||||
device=requested_device,
|
||||
compute_type=requested_compute_type,
|
||||
)
|
||||
self.device = requested_device
|
||||
self.compute_type = requested_compute_type
|
||||
except RuntimeError as exc:
|
||||
lowered = str(exc).lower()
|
||||
should_fallback = auto_requested and requested_device == "cuda" and any(
|
||||
token in lowered for token in ("cublas", "cudnn", "cuda")
|
||||
)
|
||||
if not should_fallback:
|
||||
raise
|
||||
|
||||
log("CUDA runtime is incomplete; falling back to CPU STT")
|
||||
self.model = WhisperModel(
|
||||
self.model_name,
|
||||
device="cpu",
|
||||
compute_type=resolve_compute_type("cpu"),
|
||||
)
|
||||
self.device = "cpu"
|
||||
self.compute_type = resolve_compute_type("cpu")
|
||||
|
||||
log(
|
||||
f"local-stt ready model={self.model_name} device={self.device} compute={self.compute_type} beam={self.beam_size}"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user