import type { AssistantRuntimeConfig } from "../config.js"; import type { ConversationMemory, UserUtterance } from "./conversation.js"; import type { LlmService } from "./llm.js"; const ASSISTANT_INSTRUCTIONS = [ "너는 디스코드 음성 채널 또는 로컬 마이크 테스트에서 동작하는 한국어 음성 비서다.", "답변은 짧고 실용적으로 한다.", "기본은 한 문장, 길어도 두 문장을 넘기지 않는다.", "말투는 자연스러운 한국어로 유지한다.", "speaker_id와 speaker_name은 화자 구분용이므로 필요할 때만 자연스럽게 반영한다.", "잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.", "목록, 마크다운, 코드블록은 쓰지 않는다.", "생각 과정을 드러내지 말고 최종 답변만 말한다.", ].join(" "); interface OllamaChatResponse { message?: { content?: string; thinking?: string; }; error?: string; } interface OllamaTagsResponse { models?: Array<{ name?: string; model?: string; }>; } function normalizeReply(text: string): string { const strippedThink = text.replace(/[\s\S]*?<\/think>/gi, " "); const compact = strippedThink.replace(/\s+/g, " ").trim(); if (compact.length <= 180) { return compact; } const sentences = compact.match(/[^.!?]+[.!?]?/g); if (!sentences || sentences.length === 0) { return compact.slice(0, 180).trim(); } return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim(); } export class OllamaLlmService implements LlmService { constructor(private readonly config: AssistantRuntimeConfig) {} async warmup(): Promise { const url = new URL("/api/tags", this.config.OLLAMA_BASE_URL); let response: Response; try { response = await fetch(url); } catch { throw new Error( `Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱이 실행 중인지 확인해 주세요. Windows에서는 \`localhost\` 대신 \`http://127.0.0.1:11434\` 를 권장합니다. 모델이 없으면 \`ollama pull ${this.config.OLLAMA_MODEL}\` 를 먼저 실행하세요.`, ); } const body = (await response.json().catch(() => ({}))) as OllamaTagsResponse & { error?: string }; if (!response.ok) { throw new Error(body.error ?? `Ollama 상태 확인 실패: HTTP ${response.status}`); } const models = body.models ?? []; const exists = models.some((model) => { const name = model.name?.trim(); const alias = model.model?.trim(); return name === this.config.OLLAMA_MODEL || alias === this.config.OLLAMA_MODEL; }); if (!exists) { throw new Error( `Ollama 모델 ${this.config.OLLAMA_MODEL} 이 없습니다. \`ollama pull ${this.config.OLLAMA_MODEL}\` 를 먼저 실행해 주세요.`, ); } } async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise { const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL); let response: Response; try { response = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ model: this.config.OLLAMA_MODEL, messages: [ { role: "system", content: ASSISTANT_INSTRUCTIONS, }, { role: "user", content: memory.buildPrompt(utterance), }, ], think: false, stream: false, keep_alive: this.config.OLLAMA_KEEP_ALIVE, options: { num_ctx: this.config.OLLAMA_NUM_CTX, temperature: 0.4, num_predict: 120, }, }), }); } catch { throw new Error( `Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱이 실행 중인지 확인해 주세요. Windows에서는 \`localhost\` 대신 \`http://127.0.0.1:11434\` 를 권장합니다.`, ); } const body = (await response.json().catch(() => ({}))) as OllamaChatResponse; if (!response.ok) { throw new Error(body.error ?? `Ollama request failed with status ${response.status}`); } const output = body.message?.content?.trim(); if (!output) { return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요."; } return normalizeReply(output); } }