Integrate LLM into STT flow with reply gating

2026-05-03 01:00:44 +09:00
parent b28f163217
commit c53dcc853d
3 changed files with 171 additions and 9 deletions
--- a/src/index.ts
+++ b/src/index.ts
@@ -14,6 +14,7 @@ async function runSttTest(): Promise<void> {
  const config = loadConfig();
  const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
  const stt = new FasterWhisperSttService(config, logger);
+  const llm = new OllamaLlmService(config, logger);
  let capture = null as ReturnType<typeof spawnLoopbackCapture> | null;
  let shuttingDown: Promise<void> | null = null;
  let receivedChunks = 0;
@@ -70,6 +71,8 @@ async function runSttTest(): Promise<void> {

  await stt.warmup();
  logger.info("STT warmup finished");
+  await llm.warmup();
+  logger.info("LLM warmup finished");

  const transcriptionQueue: Array<{ pcm16: Buffer; queuedAt: number; index: number }> = [];
  let transcribing = false;
@@ -102,11 +105,52 @@ async function runSttTest(): Promise<void> {
            console.log(`\n[text] ${text}\n`);
          }
        } else {
-          console.log(text);
+          console.log(`사용자> ${text}`);
+        }
+
+        const assessmentStartedAt = Date.now();
+        const assessment = await llm.assessReplyNeed(text);
+        logger.info("Reply assessment", {
+          index: next.index,
+          should_reply: assessment.shouldReply,
+          likely_needs_lookup: assessment.likelyNeedsLookup,
+          reason: assessment.reason,
+          assessment_ms: Date.now() - assessmentStartedAt,
+        });
+
+        if (!assessment.shouldReply) {
+          if (config.DEBUG) {
+            console.log(`[skip] ${assessment.reason}\n`);
+          }
+          return;
+        }
+
+        const llmStartedAt = Date.now();
+        const reply = await llm.generateReply(text, {
+          onProgress: (message) => {
+            if (config.DEBUG) {
+              console.log(`[assistant] ${message}`);
+              return;
+            }
+            console.log(`답변> ${message}`);
+          },
+        });
+        logger.info("LLM latency", {
+          index: next.index,
+          llm_ms: Date.now() - llmStartedAt,
+        });
+        logger.info("LLM reply", { index: next.index, text: reply });
+
+        if (config.DEBUG) {
+          if (config.DEBUG_TRANSCRIPTS) {
+            console.log(`[assistant] ${reply}\n`);
+          }
+        } else {
+          console.log(`답변> ${reply}`);
        }
      }
    } catch (error) {
-      logger.error("STT failed", error);
+      logger.error("STT/LLM failed", error);
    } finally {
      transcribing = false;
      void runNext();
@@ -254,7 +298,11 @@ async function runLlmCli(): Promise<void> {

    try {
      const startedAt = Date.now();
-      const reply = await llm.generateReply(text);
+      const reply = await llm.generateReply(text, {
+        onProgress: (message) => {
+          console.log(`assistant> ${message}`);
+        },
+      });
      logger.info("LLM latency", {
        llm_ms: Date.now() - startedAt,
      });