Integrate LLM into STT flow with reply gating

2026-05-03 01:00:44 +09:00
parent b28f163217
commit c53dcc853d
3 changed files with 171 additions and 9 deletions
--- a/src/services/ollama-llm.ts
+++ b/src/services/ollama-llm.ts
@@ -42,6 +42,16 @@ interface OllamaToolResultMessage {
  content: string;
 }

+interface GenerateReplyOptions {
+  onProgress?: (message: string) => void;
+}
+
+export interface ReplyAssessment {
+  shouldReply: boolean;
+  likelyNeedsLookup: boolean;
+  reason: string;
+}
+
 const SYSTEM_PROMPT =
  "너는 한국어로 짧고 자연스럽게 답하는 로컬 음성 비서다. 사용자의 말에 바로 답하고, 군더더기 없는 1~3문장으로 답해라. 정확한 시간, 설정 확인, 계산이 필요하면 도구를 우선 사용해라. 최신 정보, 오늘/최근 정보, 뉴스, 검색 요청, 사실 확인, 외부 웹페이지 내용이 필요한 경우에만 web_search 와 fetch_url 을 사용해라. 내부 지식만으로 충분한 일반 대화에는 웹 도구를 쓰지 마라. 너는 도구 호출 루프 안에 있으며 필요하면 여러 번 도구를 호출할 수 있다.";

@@ -158,14 +168,40 @@ export class OllamaLlmService {
    this.logger.info("LLM warmup finished", { model: this.config.OLLAMA_MODEL, reply: reply.content });
  }

-  async generateReply(userText: string): Promise<string> {
+  async assessReplyNeed(userText: string): Promise<ReplyAssessment> {
+    const heuristic = this.assessReplyNeedHeuristically(userText);
+    if (heuristic) {
+      return heuristic;
+    }
+
+    const prompt =
+      '다음 텍스트에 로컬 비서가 실제로 대답해야 하는지 판정해라. 의미 없는 감탄사, 중얼거림, 문맥 없는 짧은 파편, 노래 가사 조각, 잡음성 문장은 false. 질문, 요청, 확인, 명령, 대화 시도는 true. 최신 정보나 사실 확인이 필요하면 likely_needs_lookup 를 true 로 해라. JSON만 출력: {"should_reply":true,"likely_needs_lookup":false,"reason":"짧게"}';
+
+    const reply = await this.chat([
+      { role: "system", content: prompt },
+      { role: "user", content: userText },
+    ], { enableTools: false });
+
+    const parsed = this.parseAssessment(reply.content);
+    if (parsed) {
+      return parsed;
+    }
+
+    return {
+      shouldReply: true,
+      likelyNeedsLookup: this.mightNeedLookup(userText),
+      reason: "fallback",
+    };
+  }
+
+  async generateReply(userText: string, options?: GenerateReplyOptions): Promise<string> {
    const messages: Array<OllamaChatMessage | OllamaToolResultMessage> = [
      { role: "system", content: SYSTEM_PROMPT },
      ...this.history,
      { role: "user", content: userText },
    ];

-    const reply = await this.runAgentLoop(messages);
+    const reply = await this.runAgentLoop(messages, options);

    this.history.push({ role: "user", content: userText });
    this.history.push({ role: "assistant", content: reply });
@@ -186,9 +222,14 @@ export class OllamaLlmService {
    this.history = this.history.slice(-maxMessages);
  }

-  private async runAgentLoop(messages: Array<OllamaChatMessage | OllamaToolResultMessage>): Promise<string> {
+  private async runAgentLoop(
+    messages: Array<OllamaChatMessage | OllamaToolResultMessage>,
+    options?: GenerateReplyOptions,
+  ): Promise<string> {
+    let progressEmitted = false;
+
    for (let step = 0; step < 6; step += 1) {
-      const response = await this.chat(messages);
+      const response = await this.chat(messages, { enableTools: true });
      const toolCalls = response.toolCalls ?? [];

      messages.push({
@@ -202,6 +243,13 @@ export class OllamaLlmService {
      }

      for (const call of toolCalls) {
+        if (!progressEmitted) {
+          const progressMessage = this.getProgressMessage(call.function.name);
+          if (progressMessage) {
+            options?.onProgress?.(progressMessage);
+            progressEmitted = true;
+          }
+        }
        const result = await this.executeTool(call);
        this.logger.info("LLM tool call", {
          name: call.function.name,
@@ -221,6 +269,7 @@ export class OllamaLlmService {

  private async chat(
    messages: Array<OllamaChatMessage | OllamaToolResultMessage>,
+    options?: { enableTools: boolean },
  ): Promise<{ content: string; toolCalls: OllamaToolCall[] }> {
    const response = await fetch(`${this.config.OLLAMA_BASE_URL}/api/chat`, {
      method: "POST",
@@ -230,7 +279,7 @@ export class OllamaLlmService {
      body: JSON.stringify({
        model: this.config.OLLAMA_MODEL,
        messages,
-        tools: TOOL_DEFINITIONS,
+        tools: options?.enableTools ? TOOL_DEFINITIONS : undefined,
        stream: false,
        think: false,
        keep_alive: this.config.OLLAMA_KEEP_ALIVE,
@@ -365,4 +414,66 @@ export class OllamaLlmService {
    }
    return fallback;
  }
+
+  private getProgressMessage(toolName: string): string | null {
+    switch (toolName) {
+      case "web_search":
+      case "fetch_url":
+        return "검색해볼게요.";
+      default:
+        return null;
+    }
+  }
+
+  private parseAssessment(content: string): ReplyAssessment | null {
+    const match = content.match(/\{[\s\S]*\}/);
+    if (!match) {
+      return null;
+    }
+
+    try {
+      const parsed = JSON.parse(match[0]) as Record<string, unknown>;
+      return {
+        shouldReply: parsed.should_reply === true || parsed.shouldReply === true,
+        likelyNeedsLookup: parsed.likely_needs_lookup === true || parsed.likelyNeedsLookup === true,
+        reason: typeof parsed.reason === "string" ? parsed.reason : "parsed",
+      };
+    } catch {
+      return null;
+    }
+  }
+
+  private assessReplyNeedHeuristically(userText: string): ReplyAssessment | null {
+    const normalized = userText.trim();
+
+    if (!normalized) {
+      return {
+        shouldReply: false,
+        likelyNeedsLookup: false,
+        reason: "empty",
+      };
+    }
+
+    if (/^(아+|어+|음+|으+|흠+|엉+|어어+|음음+|하+|호+|와+|오+|응+|네+|예+|끝\.?)$/u.test(normalized)) {
+      return {
+        shouldReply: false,
+        likelyNeedsLookup: false,
+        reason: "filler",
+      };
+    }
+
+    if (normalized.length <= 2 && !/[?？]/.test(normalized)) {
+      return {
+        shouldReply: false,
+        likelyNeedsLookup: false,
+        reason: "too_short",
+      };
+    }
+
+    return null;
+  }
+
+  private mightNeedLookup(text: string): boolean {
+    return /(최신|오늘|최근|뉴스|검색|찾아|알아봐|확인|업데이트|가격|날씨|현재|실시간)/u.test(text);
+  }
 }