From 645a5109a26b72876a141dc2e01ec24ed12dc991 Mon Sep 17 00:00:00 2001 From: claude-bot Date: Thu, 30 Apr 2026 18:08:38 +0900 Subject: [PATCH] Improve local startup checks and Korean STT defaults --- .env.example | 4 +- README.md | 10 +++- src/config.ts | 4 +- src/local-main.ts | 7 +++ src/services/llm.ts | 1 + src/services/ollama-llm.ts | 97 ++++++++++++++++++++++++++++---------- 6 files changed, 92 insertions(+), 31 deletions(-) diff --git a/.env.example b/.env.example index 06ad168..44979bf 100644 --- a/.env.example +++ b/.env.example @@ -11,11 +11,11 @@ LOCAL_AI_VENV_PATH=.local-ai/.venv LOCAL_AI_CACHE_DIR=.local-ai/cache # Windows면 `python` 또는 `py -3` LOCAL_AI_PYTHON= -LOCAL_STT_MODEL=tiny +LOCAL_STT_MODEL=small # CUDA dll 오류가 나면 `cpu` LOCAL_STT_DEVICE=auto LOCAL_STT_COMPUTE_TYPE=auto -LOCAL_STT_BEAM_SIZE=1 +LOCAL_STT_BEAM_SIZE=3 LOCAL_TTS_MODEL_PATH=.local-ai/models/kokoro-v1.0.onnx LOCAL_TTS_VOICES_PATH=.local-ai/models/voices-v1.0.bin LOCAL_TTS_LANGUAGE=ko diff --git a/README.md b/README.md index 14447a8..687885c 100644 --- a/README.md +++ b/README.md @@ -122,15 +122,23 @@ Windows에서 GPU STT를 쓰려면 `LOCAL_STT_DEVICE=auto` 그대로 두고 `bun ## 속도 우선 기본값 -- STT 기본 모델은 `tiny` +- STT 기본 권장 모델은 `small` - LLM 기본 모델은 `qwen3:0.6b` - TTS 기본 보이스는 `af_heart` - TTS 기본 속도는 `1.12` +더 빠르게 돌리고 싶으면: + +```env +LOCAL_STT_MODEL=tiny +LOCAL_STT_BEAM_SIZE=1 +``` + 정확도가 아쉬우면: ```env LOCAL_STT_MODEL=small +LOCAL_STT_BEAM_SIZE=3 OLLAMA_MODEL=qwen3:1.7b ``` diff --git a/src/config.ts b/src/config.ts index 6259d4c..4e20dfb 100644 --- a/src/config.ts +++ b/src/config.ts @@ -22,10 +22,10 @@ const envSchema = z.object({ LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"), LOCAL_AI_CACHE_DIR: z.string().min(1).default(".local-ai/cache"), LOCAL_AI_PYTHON: emptyToUndefined, - LOCAL_STT_MODEL: z.string().min(1).default("tiny"), + LOCAL_STT_MODEL: z.string().min(1).default("small"), LOCAL_STT_DEVICE: z.string().min(1).default("auto"), LOCAL_STT_COMPUTE_TYPE: z.string().min(1).default("auto"), - LOCAL_STT_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(1), + LOCAL_STT_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(3), LOCAL_TTS_MODEL_PATH: z.string().min(1).default(".local-ai/models/kokoro-v1.0.onnx"), LOCAL_TTS_VOICES_PATH: z.string().min(1).default(".local-ai/models/voices-v1.0.bin"), LOCAL_TTS_LANGUAGE: z.string().min(1).default("ko"), diff --git a/src/local-main.ts b/src/local-main.ts index c35a9c6..7199c72 100644 --- a/src/local-main.ts +++ b/src/local-main.ts @@ -77,6 +77,13 @@ export async function runLocalAssistant(config: AssistantRuntimeConfig, logger: await stt.warmup(); await tts.warmup(); + await llm.warmup?.(); + + if (config.BOT_DEFAULT_LANGUAGE.startsWith("ko") && config.LOCAL_STT_MODEL === "tiny") { + logger.warn( + "LOCAL_STT_MODEL=tiny 는 한국어 인식률이 낮을 수 있습니다. GPU 환경이면 small 이상을 권장합니다.", + ); + } const session = new LocalVoiceSession({ config, diff --git a/src/services/llm.ts b/src/services/llm.ts index 5c01e73..0d2b8af 100644 --- a/src/services/llm.ts +++ b/src/services/llm.ts @@ -1,5 +1,6 @@ import type { ConversationMemory, UserUtterance } from "./conversation.js"; export interface LlmService { + warmup?(): Promise; generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise; } diff --git a/src/services/ollama-llm.ts b/src/services/ollama-llm.ts index 58992d6..a93ad2b 100644 --- a/src/services/ollama-llm.ts +++ b/src/services/ollama-llm.ts @@ -21,6 +21,13 @@ interface OllamaChatResponse { error?: string; } +interface OllamaTagsResponse { + models?: Array<{ + name?: string; + model?: string; + }>; +} + function normalizeReply(text: string): string { const strippedThink = text.replace(/[\s\S]*?<\/think>/gi, " "); const compact = strippedThink.replace(/\s+/g, " ").trim(); @@ -39,35 +46,73 @@ function normalizeReply(text: string): string { export class OllamaLlmService implements LlmService { constructor(private readonly config: AssistantRuntimeConfig) {} + async warmup(): Promise { + const url = new URL("/api/tags", this.config.OLLAMA_BASE_URL); + let response: Response; + + try { + response = await fetch(url); + } catch { + throw new Error( + `Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱 또는 \`ollama serve\` 를 실행하고 \`ollama pull ${this.config.OLLAMA_MODEL}\` 까지 끝내 주세요.`, + ); + } + + const body = (await response.json().catch(() => ({}))) as OllamaTagsResponse & { error?: string }; + if (!response.ok) { + throw new Error(body.error ?? `Ollama 상태 확인 실패: HTTP ${response.status}`); + } + + const models = body.models ?? []; + const exists = models.some((model) => { + const name = model.name?.trim(); + const alias = model.model?.trim(); + return name === this.config.OLLAMA_MODEL || alias === this.config.OLLAMA_MODEL; + }); + + if (!exists) { + throw new Error( + `Ollama 모델 ${this.config.OLLAMA_MODEL} 이 없습니다. \`ollama pull ${this.config.OLLAMA_MODEL}\` 를 먼저 실행해 주세요.`, + ); + } + } + async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise { const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL); - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model: this.config.OLLAMA_MODEL, - messages: [ - { - role: "system", - content: ASSISTANT_INSTRUCTIONS, - }, - { - role: "user", - content: memory.buildPrompt(utterance), - }, - ], - think: false, - stream: false, - keep_alive: this.config.OLLAMA_KEEP_ALIVE, - options: { - num_ctx: this.config.OLLAMA_NUM_CTX, - temperature: 0.4, - num_predict: 120, + let response: Response; + try { + response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", }, - }), - }); + body: JSON.stringify({ + model: this.config.OLLAMA_MODEL, + messages: [ + { + role: "system", + content: ASSISTANT_INSTRUCTIONS, + }, + { + role: "user", + content: memory.buildPrompt(utterance), + }, + ], + think: false, + stream: false, + keep_alive: this.config.OLLAMA_KEEP_ALIVE, + options: { + num_ctx: this.config.OLLAMA_NUM_CTX, + temperature: 0.4, + num_predict: 120, + }, + }), + }); + } catch { + throw new Error( + `Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱 또는 \`ollama serve\` 를 실행해 주세요.`, + ); + } const body = (await response.json().catch(() => ({}))) as OllamaChatResponse;