Integrate LLM into STT flow with reply gating
This commit is contained in:
54
src/index.ts
54
src/index.ts
@@ -14,6 +14,7 @@ async function runSttTest(): Promise<void> {
|
||||
const config = loadConfig();
|
||||
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
|
||||
const stt = new FasterWhisperSttService(config, logger);
|
||||
const llm = new OllamaLlmService(config, logger);
|
||||
let capture = null as ReturnType<typeof spawnLoopbackCapture> | null;
|
||||
let shuttingDown: Promise<void> | null = null;
|
||||
let receivedChunks = 0;
|
||||
@@ -70,6 +71,8 @@ async function runSttTest(): Promise<void> {
|
||||
|
||||
await stt.warmup();
|
||||
logger.info("STT warmup finished");
|
||||
await llm.warmup();
|
||||
logger.info("LLM warmup finished");
|
||||
|
||||
const transcriptionQueue: Array<{ pcm16: Buffer; queuedAt: number; index: number }> = [];
|
||||
let transcribing = false;
|
||||
@@ -102,11 +105,52 @@ async function runSttTest(): Promise<void> {
|
||||
console.log(`\n[text] ${text}\n`);
|
||||
}
|
||||
} else {
|
||||
console.log(text);
|
||||
console.log(`사용자> ${text}`);
|
||||
}
|
||||
|
||||
const assessmentStartedAt = Date.now();
|
||||
const assessment = await llm.assessReplyNeed(text);
|
||||
logger.info("Reply assessment", {
|
||||
index: next.index,
|
||||
should_reply: assessment.shouldReply,
|
||||
likely_needs_lookup: assessment.likelyNeedsLookup,
|
||||
reason: assessment.reason,
|
||||
assessment_ms: Date.now() - assessmentStartedAt,
|
||||
});
|
||||
|
||||
if (!assessment.shouldReply) {
|
||||
if (config.DEBUG) {
|
||||
console.log(`[skip] ${assessment.reason}\n`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const llmStartedAt = Date.now();
|
||||
const reply = await llm.generateReply(text, {
|
||||
onProgress: (message) => {
|
||||
if (config.DEBUG) {
|
||||
console.log(`[assistant] ${message}`);
|
||||
return;
|
||||
}
|
||||
console.log(`답변> ${message}`);
|
||||
},
|
||||
});
|
||||
logger.info("LLM latency", {
|
||||
index: next.index,
|
||||
llm_ms: Date.now() - llmStartedAt,
|
||||
});
|
||||
logger.info("LLM reply", { index: next.index, text: reply });
|
||||
|
||||
if (config.DEBUG) {
|
||||
if (config.DEBUG_TRANSCRIPTS) {
|
||||
console.log(`[assistant] ${reply}\n`);
|
||||
}
|
||||
} else {
|
||||
console.log(`답변> ${reply}`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error("STT failed", error);
|
||||
logger.error("STT/LLM failed", error);
|
||||
} finally {
|
||||
transcribing = false;
|
||||
void runNext();
|
||||
@@ -254,7 +298,11 @@ async function runLlmCli(): Promise<void> {
|
||||
|
||||
try {
|
||||
const startedAt = Date.now();
|
||||
const reply = await llm.generateReply(text);
|
||||
const reply = await llm.generateReply(text, {
|
||||
onProgress: (message) => {
|
||||
console.log(`assistant> ${message}`);
|
||||
},
|
||||
});
|
||||
logger.info("LLM latency", {
|
||||
llm_ms: Date.now() - startedAt,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user