Improve local startup checks and Korean STT defaults

This commit is contained in:
2026-04-30 18:08:38 +09:00
parent 4c7cef8c18
commit 645a5109a2
6 changed files with 92 additions and 31 deletions

View File

@@ -11,11 +11,11 @@ LOCAL_AI_VENV_PATH=.local-ai/.venv
LOCAL_AI_CACHE_DIR=.local-ai/cache LOCAL_AI_CACHE_DIR=.local-ai/cache
# Windows면 `python` 또는 `py -3` # Windows면 `python` 또는 `py -3`
LOCAL_AI_PYTHON= LOCAL_AI_PYTHON=
LOCAL_STT_MODEL=tiny LOCAL_STT_MODEL=small
# CUDA dll 오류가 나면 `cpu` # CUDA dll 오류가 나면 `cpu`
LOCAL_STT_DEVICE=auto LOCAL_STT_DEVICE=auto
LOCAL_STT_COMPUTE_TYPE=auto LOCAL_STT_COMPUTE_TYPE=auto
LOCAL_STT_BEAM_SIZE=1 LOCAL_STT_BEAM_SIZE=3
LOCAL_TTS_MODEL_PATH=.local-ai/models/kokoro-v1.0.onnx LOCAL_TTS_MODEL_PATH=.local-ai/models/kokoro-v1.0.onnx
LOCAL_TTS_VOICES_PATH=.local-ai/models/voices-v1.0.bin LOCAL_TTS_VOICES_PATH=.local-ai/models/voices-v1.0.bin
LOCAL_TTS_LANGUAGE=ko LOCAL_TTS_LANGUAGE=ko

View File

@@ -122,15 +122,23 @@ Windows에서 GPU STT를 쓰려면 `LOCAL_STT_DEVICE=auto` 그대로 두고 `bun
## 속도 우선 기본값 ## 속도 우선 기본값
- STT 기본 모델은 `tiny` - STT 기본 권장 모델은 `small`
- LLM 기본 모델은 `qwen3:0.6b` - LLM 기본 모델은 `qwen3:0.6b`
- TTS 기본 보이스는 `af_heart` - TTS 기본 보이스는 `af_heart`
- TTS 기본 속도는 `1.12` - TTS 기본 속도는 `1.12`
더 빠르게 돌리고 싶으면:
```env
LOCAL_STT_MODEL=tiny
LOCAL_STT_BEAM_SIZE=1
```
정확도가 아쉬우면: 정확도가 아쉬우면:
```env ```env
LOCAL_STT_MODEL=small LOCAL_STT_MODEL=small
LOCAL_STT_BEAM_SIZE=3
OLLAMA_MODEL=qwen3:1.7b OLLAMA_MODEL=qwen3:1.7b
``` ```

View File

@@ -22,10 +22,10 @@ const envSchema = z.object({
LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"), LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"),
LOCAL_AI_CACHE_DIR: z.string().min(1).default(".local-ai/cache"), LOCAL_AI_CACHE_DIR: z.string().min(1).default(".local-ai/cache"),
LOCAL_AI_PYTHON: emptyToUndefined, LOCAL_AI_PYTHON: emptyToUndefined,
LOCAL_STT_MODEL: z.string().min(1).default("tiny"), LOCAL_STT_MODEL: z.string().min(1).default("small"),
LOCAL_STT_DEVICE: z.string().min(1).default("auto"), LOCAL_STT_DEVICE: z.string().min(1).default("auto"),
LOCAL_STT_COMPUTE_TYPE: z.string().min(1).default("auto"), LOCAL_STT_COMPUTE_TYPE: z.string().min(1).default("auto"),
LOCAL_STT_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(1), LOCAL_STT_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(3),
LOCAL_TTS_MODEL_PATH: z.string().min(1).default(".local-ai/models/kokoro-v1.0.onnx"), LOCAL_TTS_MODEL_PATH: z.string().min(1).default(".local-ai/models/kokoro-v1.0.onnx"),
LOCAL_TTS_VOICES_PATH: z.string().min(1).default(".local-ai/models/voices-v1.0.bin"), LOCAL_TTS_VOICES_PATH: z.string().min(1).default(".local-ai/models/voices-v1.0.bin"),
LOCAL_TTS_LANGUAGE: z.string().min(1).default("ko"), LOCAL_TTS_LANGUAGE: z.string().min(1).default("ko"),

View File

@@ -77,6 +77,13 @@ export async function runLocalAssistant(config: AssistantRuntimeConfig, logger:
await stt.warmup(); await stt.warmup();
await tts.warmup(); await tts.warmup();
await llm.warmup?.();
if (config.BOT_DEFAULT_LANGUAGE.startsWith("ko") && config.LOCAL_STT_MODEL === "tiny") {
logger.warn(
"LOCAL_STT_MODEL=tiny 는 한국어 인식률이 낮을 수 있습니다. GPU 환경이면 small 이상을 권장합니다.",
);
}
const session = new LocalVoiceSession({ const session = new LocalVoiceSession({
config, config,

View File

@@ -1,5 +1,6 @@
import type { ConversationMemory, UserUtterance } from "./conversation.js"; import type { ConversationMemory, UserUtterance } from "./conversation.js";
export interface LlmService { export interface LlmService {
warmup?(): Promise<void>;
generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string>; generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string>;
} }

View File

@@ -21,6 +21,13 @@ interface OllamaChatResponse {
error?: string; error?: string;
} }
interface OllamaTagsResponse {
models?: Array<{
name?: string;
model?: string;
}>;
}
function normalizeReply(text: string): string { function normalizeReply(text: string): string {
const strippedThink = text.replace(/<think>[\s\S]*?<\/think>/gi, " "); const strippedThink = text.replace(/<think>[\s\S]*?<\/think>/gi, " ");
const compact = strippedThink.replace(/\s+/g, " ").trim(); const compact = strippedThink.replace(/\s+/g, " ").trim();
@@ -39,9 +46,42 @@ function normalizeReply(text: string): string {
export class OllamaLlmService implements LlmService { export class OllamaLlmService implements LlmService {
constructor(private readonly config: AssistantRuntimeConfig) {} constructor(private readonly config: AssistantRuntimeConfig) {}
async warmup(): Promise<void> {
const url = new URL("/api/tags", this.config.OLLAMA_BASE_URL);
let response: Response;
try {
response = await fetch(url);
} catch {
throw new Error(
`Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱 또는 \`ollama serve\` 를 실행하고 \`ollama pull ${this.config.OLLAMA_MODEL}\` 까지 끝내 주세요.`,
);
}
const body = (await response.json().catch(() => ({}))) as OllamaTagsResponse & { error?: string };
if (!response.ok) {
throw new Error(body.error ?? `Ollama 상태 확인 실패: HTTP ${response.status}`);
}
const models = body.models ?? [];
const exists = models.some((model) => {
const name = model.name?.trim();
const alias = model.model?.trim();
return name === this.config.OLLAMA_MODEL || alias === this.config.OLLAMA_MODEL;
});
if (!exists) {
throw new Error(
`Ollama 모델 ${this.config.OLLAMA_MODEL} 이 없습니다. \`ollama pull ${this.config.OLLAMA_MODEL}\` 를 먼저 실행해 주세요.`,
);
}
}
async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> { async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> {
const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL); const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL);
const response = await fetch(url, { let response: Response;
try {
response = await fetch(url, {
method: "POST", method: "POST",
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
@@ -68,6 +108,11 @@ export class OllamaLlmService implements LlmService {
}, },
}), }),
}); });
} catch {
throw new Error(
`Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱 또는 \`ollama serve\` 를 실행해 주세요.`,
);
}
const body = (await response.json().catch(() => ({}))) as OllamaChatResponse; const body = (await response.json().catch(() => ({}))) as OllamaChatResponse;