Integrate LLM into STT flow with reply gating

2026-05-03 01:00:44 +09:00
parent b28f163217
commit c53dcc853d
3 changed files with 171 additions and 9 deletions
--- a/src/index.ts
+++ b/src/index.ts
@@ -14,6 +14,7 @@ async function runSttTest(): Promise<void> {
  const config = loadConfig();
  const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
  const stt = new FasterWhisperSttService(config, logger);
+  const llm = new OllamaLlmService(config, logger);
  let capture = null as ReturnType<typeof spawnLoopbackCapture> | null;
  let shuttingDown: Promise<void> | null = null;
  let receivedChunks = 0;
@@ -70,6 +71,8 @@ async function runSttTest(): Promise<void> {

  await stt.warmup();
  logger.info("STT warmup finished");
+  await llm.warmup();
+  logger.info("LLM warmup finished");

  const transcriptionQueue: Array<{ pcm16: Buffer; queuedAt: number; index: number }> = [];
  let transcribing = false;
@@ -102,11 +105,52 @@ async function runSttTest(): Promise<void> {
            console.log(`\n[text] ${text}\n`);
          }
        } else {
-          console.log(text);
+          console.log(`사용자> ${text}`);
+        }
+
+        const assessmentStartedAt = Date.now();
+        const assessment = await llm.assessReplyNeed(text);
+        logger.info("Reply assessment", {
+          index: next.index,
+          should_reply: assessment.shouldReply,
+          likely_needs_lookup: assessment.likelyNeedsLookup,
+          reason: assessment.reason,
+          assessment_ms: Date.now() - assessmentStartedAt,
+        });
+
+        if (!assessment.shouldReply) {
+          if (config.DEBUG) {
+            console.log(`[skip] ${assessment.reason}\n`);
+          }
+          return;
+        }
+
+        const llmStartedAt = Date.now();
+        const reply = await llm.generateReply(text, {
+          onProgress: (message) => {
+            if (config.DEBUG) {
+              console.log(`[assistant] ${message}`);
+              return;
+            }
+            console.log(`답변> ${message}`);
+          },
+        });
+        logger.info("LLM latency", {
+          index: next.index,
+          llm_ms: Date.now() - llmStartedAt,
+        });
+        logger.info("LLM reply", { index: next.index, text: reply });
+
+        if (config.DEBUG) {
+          if (config.DEBUG_TRANSCRIPTS) {
+            console.log(`[assistant] ${reply}\n`);
+          }
+        } else {
+          console.log(`답변> ${reply}`);
        }
      }
    } catch (error) {
-      logger.error("STT failed", error);
+      logger.error("STT/LLM failed", error);
    } finally {
      transcribing = false;
      void runNext();
@@ -254,7 +298,11 @@ async function runLlmCli(): Promise<void> {

    try {
      const startedAt = Date.now();
-      const reply = await llm.generateReply(text);
+      const reply = await llm.generateReply(text, {
+        onProgress: (message) => {
+          console.log(`assistant> ${message}`);
+        },
+      });
      logger.info("LLM latency", {
        llm_ms: Date.now() - startedAt,
      });
--- a/src/services/ollama-llm.ts
+++ b/src/services/ollama-llm.ts
@@ -42,6 +42,16 @@ interface OllamaToolResultMessage {
  content: string;
 }

+interface GenerateReplyOptions {
+  onProgress?: (message: string) => void;
+}
+
+export interface ReplyAssessment {
+  shouldReply: boolean;
+  likelyNeedsLookup: boolean;
+  reason: string;
+}
+
 const SYSTEM_PROMPT =
  "너는 한국어로 짧고 자연스럽게 답하는 로컬 음성 비서다. 사용자의 말에 바로 답하고, 군더더기 없는 1~3문장으로 답해라. 정확한 시간, 설정 확인, 계산이 필요하면 도구를 우선 사용해라. 최신 정보, 오늘/최근 정보, 뉴스, 검색 요청, 사실 확인, 외부 웹페이지 내용이 필요한 경우에만 web_search 와 fetch_url 을 사용해라. 내부 지식만으로 충분한 일반 대화에는 웹 도구를 쓰지 마라. 너는 도구 호출 루프 안에 있으며 필요하면 여러 번 도구를 호출할 수 있다.";

@@ -158,14 +168,40 @@ export class OllamaLlmService {
    this.logger.info("LLM warmup finished", { model: this.config.OLLAMA_MODEL, reply: reply.content });
  }

-  async generateReply(userText: string): Promise<string> {
+  async assessReplyNeed(userText: string): Promise<ReplyAssessment> {
+    const heuristic = this.assessReplyNeedHeuristically(userText);
+    if (heuristic) {
+      return heuristic;
+    }
+
+    const prompt =
+      '다음 텍스트에 로컬 비서가 실제로 대답해야 하는지 판정해라. 의미 없는 감탄사, 중얼거림, 문맥 없는 짧은 파편, 노래 가사 조각, 잡음성 문장은 false. 질문, 요청, 확인, 명령, 대화 시도는 true. 최신 정보나 사실 확인이 필요하면 likely_needs_lookup 를 true 로 해라. JSON만 출력: {"should_reply":true,"likely_needs_lookup":false,"reason":"짧게"}';
+
+    const reply = await this.chat([
+      { role: "system", content: prompt },
+      { role: "user", content: userText },
+    ], { enableTools: false });
+
+    const parsed = this.parseAssessment(reply.content);
+    if (parsed) {
+      return parsed;
+    }
+
+    return {
+      shouldReply: true,
+      likelyNeedsLookup: this.mightNeedLookup(userText),
+      reason: "fallback",
+    };
+  }
+
+  async generateReply(userText: string, options?: GenerateReplyOptions): Promise<string> {
    const messages: Array<OllamaChatMessage | OllamaToolResultMessage> = [
      { role: "system", content: SYSTEM_PROMPT },
      ...this.history,
      { role: "user", content: userText },
    ];

-    const reply = await this.runAgentLoop(messages);
+    const reply = await this.runAgentLoop(messages, options);

    this.history.push({ role: "user", content: userText });
    this.history.push({ role: "assistant", content: reply });
@@ -186,9 +222,14 @@ export class OllamaLlmService {
    this.history = this.history.slice(-maxMessages);
  }

-  private async runAgentLoop(messages: Array<OllamaChatMessage | OllamaToolResultMessage>): Promise<string> {
+  private async runAgentLoop(
+    messages: Array<OllamaChatMessage | OllamaToolResultMessage>,
+    options?: GenerateReplyOptions,
+  ): Promise<string> {
+    let progressEmitted = false;
+
    for (let step = 0; step < 6; step += 1) {
-      const response = await this.chat(messages);
+      const response = await this.chat(messages, { enableTools: true });
      const toolCalls = response.toolCalls ?? [];

      messages.push({
@@ -202,6 +243,13 @@ export class OllamaLlmService {
      }

      for (const call of toolCalls) {
+        if (!progressEmitted) {
+          const progressMessage = this.getProgressMessage(call.function.name);
+          if (progressMessage) {
+            options?.onProgress?.(progressMessage);
+            progressEmitted = true;
+          }
+        }
        const result = await this.executeTool(call);
        this.logger.info("LLM tool call", {
          name: call.function.name,
@@ -221,6 +269,7 @@ export class OllamaLlmService {

  private async chat(
    messages: Array<OllamaChatMessage | OllamaToolResultMessage>,
+    options?: { enableTools: boolean },
  ): Promise<{ content: string; toolCalls: OllamaToolCall[] }> {
    const response = await fetch(`${this.config.OLLAMA_BASE_URL}/api/chat`, {
      method: "POST",
@@ -230,7 +279,7 @@ export class OllamaLlmService {
      body: JSON.stringify({
        model: this.config.OLLAMA_MODEL,
        messages,
-        tools: TOOL_DEFINITIONS,
+        tools: options?.enableTools ? TOOL_DEFINITIONS : undefined,
        stream: false,
        think: false,
        keep_alive: this.config.OLLAMA_KEEP_ALIVE,
@@ -365,4 +414,66 @@ export class OllamaLlmService {
    }
    return fallback;
  }
+
+  private getProgressMessage(toolName: string): string | null {
+    switch (toolName) {
+      case "web_search":
+      case "fetch_url":
+        return "검색해볼게요.";
+      default:
+        return null;
+    }
+  }
+
+  private parseAssessment(content: string): ReplyAssessment | null {
+    const match = content.match(/\{[\s\S]*\}/);
+    if (!match) {
+      return null;
+    }
+
+    try {
+      const parsed = JSON.parse(match[0]) as Record<string, unknown>;
+      return {
+        shouldReply: parsed.should_reply === true || parsed.shouldReply === true,
+        likelyNeedsLookup: parsed.likely_needs_lookup === true || parsed.likelyNeedsLookup === true,
+        reason: typeof parsed.reason === "string" ? parsed.reason : "parsed",
+      };
+    } catch {
+      return null;
+    }
+  }
+
+  private assessReplyNeedHeuristically(userText: string): ReplyAssessment | null {
+    const normalized = userText.trim();
+
+    if (!normalized) {
+      return {
+        shouldReply: false,
+        likelyNeedsLookup: false,
+        reason: "empty",
+      };
+    }
+
+    if (/^(아+|어+|음+|으+|흠+|엉+|어어+|음음+|하+|호+|와+|오+|응+|네+|예+|끝\.?)$/u.test(normalized)) {
+      return {
+        shouldReply: false,
+        likelyNeedsLookup: false,
+        reason: "filler",
+      };
+    }
+
+    if (normalized.length <= 2 && !/[?？]/.test(normalized)) {
+      return {
+        shouldReply: false,
+        likelyNeedsLookup: false,
+        reason: "too_short",
+      };
+    }
+
+    return null;
+  }
+
+  private mightNeedLookup(text: string): boolean {
+    return /(최신|오늘|최근|뉴스|검색|찾아|알아봐|확인|업데이트|가격|날씨|현재|실시간)/u.test(text);
+  }
 }