feat: switch llm to local ollama qwen3

2026-04-30 02:53:00 +09:00
parent 23bc534b23
commit 24aa58fc42
12 changed files with 136 additions and 91 deletions
--- a/src/services/llm.ts
+++ b/src/services/llm.ts
@@ -0,0 +1,5 @@
+import type { ConversationMemory, UserUtterance } from "./conversation.js";
+
+export interface LlmService {
+  generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string>;
+}
--- a/src/services/ollama-llm.ts
+++ b/src/services/ollama-llm.ts
@@ -0,0 +1,85 @@
+import type { AssistantRuntimeConfig } from "../config.js";
+import type { ConversationMemory, UserUtterance } from "./conversation.js";
+import type { LlmService } from "./llm.js";
+
+const ASSISTANT_INSTRUCTIONS = [
+  "너는 디스코드 음성 채널 또는 로컬 마이크 테스트에서 동작하는 한국어 음성 비서다.",
+  "답변은 짧고 실용적으로 한다.",
+  "기본은 한 문장, 길어도 두 문장을 넘기지 않는다.",
+  "말투는 자연스러운 한국어로 유지한다.",
+  "speaker_id와 speaker_name은 화자 구분용이므로 필요할 때만 자연스럽게 반영한다.",
+  "잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.",
+  "목록, 마크다운, 코드블록은 쓰지 않는다.",
+  "생각 과정을 드러내지 말고 최종 답변만 말한다.",
+].join(" ");
+
+interface OllamaChatResponse {
+  message?: {
+    content?: string;
+    thinking?: string;
+  };
+  error?: string;
+}
+
+function normalizeReply(text: string): string {
+  const strippedThink = text.replace(/<think>[\s\S]*?<\/think>/gi, " ");
+  const compact = strippedThink.replace(/\s+/g, " ").trim();
+  if (compact.length <= 180) {
+    return compact;
+  }
+
+  const sentences = compact.match(/[^.!?]+[.!?]?/g);
+  if (!sentences || sentences.length === 0) {
+    return compact.slice(0, 180).trim();
+  }
+
+  return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim();
+}
+
+export class OllamaLlmService implements LlmService {
+  constructor(private readonly config: AssistantRuntimeConfig) {}
+
+  async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> {
+    const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL);
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model: this.config.OLLAMA_MODEL,
+        messages: [
+          {
+            role: "system",
+            content: ASSISTANT_INSTRUCTIONS,
+          },
+          {
+            role: "user",
+            content: memory.buildPrompt(utterance),
+          },
+        ],
+        think: false,
+        stream: false,
+        keep_alive: this.config.OLLAMA_KEEP_ALIVE,
+        options: {
+          num_ctx: this.config.OLLAMA_NUM_CTX,
+          temperature: 0.4,
+          num_predict: 120,
+        },
+      }),
+    });
+
+    const body = (await response.json().catch(() => ({}))) as OllamaChatResponse;
+
+    if (!response.ok) {
+      throw new Error(body.error ?? `Ollama request failed with status ${response.status}`);
+    }
+
+    const output = body.message?.content?.trim();
+    if (!output) {
+      return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요.";
+    }
+
+    return normalizeReply(output);
+  }
+}
--- a/src/services/openai-llm.ts
+++ b/src/services/openai-llm.ts
@@ -1,64 +0,0 @@
-import OpenAI from "openai";
-
-import type { AssistantRuntimeConfig } from "../config.js";
-import type { ConversationMemory, UserUtterance } from "./conversation.js";
-
-const ASSISTANT_INSTRUCTIONS = [
-  "너는 디스코드 음성 채널에서 동작하는 한국어 음성 비서다.",
-  "답변은 짧고 실용적으로 한다.",
-  "기본은 한 문장, 길어도 두 문장을 넘기지 않는다.",
-  "말투는 자연스러운 한국어로 유지한다.",
-  "speaker_id와 speaker_name은 화자 구분용이므로 필요할 때만 자연스럽게 반영한다.",
-  "잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.",
-  "목록, 마크다운, 코드블록은 쓰지 않는다.",
-].join(" ");
-
-function normalizeReply(text: string): string {
-  const compact = text.replace(/\s+/g, " ").trim();
-  if (compact.length <= 180) {
-    return compact;
-  }
-
-  const sentences = compact.match(/[^.!?]+[.!?]?/g);
-  if (!sentences || sentences.length === 0) {
-    return compact.slice(0, 180).trim();
-  }
-
-  return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim();
-}
-
-export class OpenAiLlmService {
-  private readonly client: OpenAI;
-
-  constructor(private readonly config: AssistantRuntimeConfig) {
-    this.client = new OpenAI({
-      apiKey: this.config.OPENAI_API_KEY,
-    });
-  }
-
-  async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> {
-    const response = await this.client.responses.create({
-      model: this.config.OPENAI_MODEL,
-      instructions: ASSISTANT_INSTRUCTIONS,
-      input: [
-        {
-          role: "user",
-          content: [
-            {
-              type: "input_text",
-              text: memory.buildPrompt(utterance),
-            },
-          ],
-        },
-      ],
-      max_output_tokens: 120,
-    });
-
-    const output = response.output_text?.trim();
-    if (!output) {
-      return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요.";
-    }
-
-    return normalizeReply(output);
-  }
-}