Add loopback STT debug logging

This commit is contained in:
2026-05-02 21:05:28 +09:00
parent f0f62c2307
commit c4baca1739
2 changed files with 65 additions and 0 deletions

View File

@@ -1,5 +1,9 @@
interface RealtimeSegmenterOptions {
onSegment: (pcm16: Buffer) => void;
onLevel?: (peak: number) => void;
onSpeechStart?: (peak: number) => void;
onSpeechDiscarded?: (samples: number) => void;
onSpeechReady?: (samples: number) => void;
}
export class RealtimeSegmenter {
@@ -44,6 +48,8 @@ export class RealtimeSegmenter {
}
}
this.options.onLevel?.(peak);
if (!this.speechActive) {
appendWithCap(this.preRoll, frame, this.preRollSamples);
if (peak >= this.speechStartThreshold) {
@@ -60,6 +66,7 @@ export class RealtimeSegmenter {
this.silenceFrames = 0;
this.speech.splice(0, this.speech.length, ...this.preRoll);
this.preRoll.splice(0, this.preRoll.length);
this.options.onSpeechStart?.(peak);
}
this.speech.push(...frame);
@@ -81,9 +88,11 @@ export class RealtimeSegmenter {
this.speechCandidateFrames = 0;
if (speechPcm.length < this.minSpeechSamples * 2) {
this.options.onSpeechDiscarded?.(speechPcm.length / 2);
return;
}
this.options.onSpeechReady?.(speechPcm.length / 2);
this.options.onSegment(speechPcm);
}
}

View File

@@ -14,6 +14,13 @@ async function runLoopback(): Promise<void> {
const stt = new FasterWhisperSttService(config, logger);
let capture = null as ReturnType<typeof spawnLoopbackCapture> | null;
let shuttingDown: Promise<void> | null = null;
let receivedChunks = 0;
let receivedBytes = 0;
let maxPeak = 0;
let lastChunkAt = 0;
let lastLevelLogAt = 0;
let sawSpeechStart = false;
let emittedSegmentCount = 0;
const shutdown = async (exitCode: number, reason: string, error?: unknown): Promise<void> => {
if (shuttingDown) {
@@ -60,6 +67,7 @@ async function runLoopback(): Promise<void> {
});
await stt.warmup();
logger.info("STT warmup finished");
const transcriptionQueue: Buffer[] = [];
let transcribing = false;
@@ -93,14 +101,50 @@ async function runLoopback(): Promise<void> {
};
const segmenter = new RealtimeSegmenter({
onLevel: (peak) => {
if (peak > maxPeak) {
maxPeak = peak;
}
const now = Date.now();
if (now - lastLevelLogAt >= 3000) {
lastLevelLogAt = now;
logger.info("Audio input heartbeat", {
chunks: receivedChunks,
bytes: receivedBytes,
peak: maxPeak,
speech_started: sawSpeechStart,
emitted_segments: emittedSegmentCount,
});
maxPeak = 0;
}
},
onSpeechStart: (peak) => {
sawSpeechStart = true;
logger.info("Speech start detected", { peak });
},
onSpeechDiscarded: (samples) => {
logger.info("Discarded short speech segment", { samples });
},
onSpeechReady: (samples) => {
emittedSegmentCount += 1;
logger.info("Speech segment ready", { index: emittedSegmentCount, samples });
},
onSegment: (pcm16) => {
transcriptionQueue.push(pcm16);
logger.info("Queued segment for STT", {
queue: transcriptionQueue.length,
bytes: pcm16.length,
});
void runNext();
},
});
capture = spawnLoopbackCapture(config, logger);
capture.stdout.on("data", (chunk: Buffer) => {
receivedChunks += 1;
receivedBytes += chunk.length;
lastChunkAt = Date.now();
segmenter.pushChunk(chunk);
});
capture.stderr.on("data", (chunk: Buffer) => {
@@ -123,6 +167,18 @@ async function runLoopback(): Promise<void> {
console.log(`source: ${config.AUDIO_SOURCE ?? "unset"}`);
console.log(`model: ${config.WHISPER_MODEL}`);
console.log(`language: ${config.WHISPER_LANGUAGE}`);
setInterval(() => {
const now = Date.now();
if (lastChunkAt === 0 && !shuttingDown) {
logger.warn("아직 캡처 PCM 데이터가 들어오지 않았습니다. AUDIO_SOURCE 가 잘못됐거나 loopback 입력이 아닌 장치일 수 있습니다.");
return;
}
if (lastChunkAt > 0 && now - lastChunkAt >= 5000 && !shuttingDown) {
logger.warn("최근 5초 동안 새 PCM chunk 가 들어오지 않았습니다.");
}
}, 5000).unref();
}
async function main(): Promise<void> {