Tune realtime STT defaults
This commit is contained in:
31
src/index.ts
31
src/index.ts
@@ -69,8 +69,9 @@ async function runLoopback(): Promise<void> {
|
||||
await stt.warmup();
|
||||
logger.info("STT warmup finished");
|
||||
|
||||
const transcriptionQueue: Buffer[] = [];
|
||||
const transcriptionQueue: Array<{ pcm16: Buffer; queuedAt: number; index: number }> = [];
|
||||
let transcribing = false;
|
||||
let nextSegmentIndex = 1;
|
||||
|
||||
const runNext = async (): Promise<void> => {
|
||||
if (transcribing) {
|
||||
@@ -83,11 +84,17 @@ async function runLoopback(): Promise<void> {
|
||||
|
||||
transcribing = true;
|
||||
try {
|
||||
const text = await stt.transcribePcm16(next);
|
||||
const startedAt = Date.now();
|
||||
const text = await stt.transcribePcm16(next.pcm16);
|
||||
logger.info("STT latency", {
|
||||
index: next.index,
|
||||
wait_ms: startedAt - next.queuedAt,
|
||||
transcribe_ms: Date.now() - startedAt,
|
||||
});
|
||||
if (!text) {
|
||||
logger.info("빈 전사 결과");
|
||||
} else {
|
||||
logger.info("Transcript", text);
|
||||
logger.info("Transcript", { index: next.index, text });
|
||||
if (config.DEBUG_TRANSCRIPTS) {
|
||||
console.log(`\n[text] ${text}\n`);
|
||||
}
|
||||
@@ -101,6 +108,13 @@ async function runLoopback(): Promise<void> {
|
||||
};
|
||||
|
||||
const segmenter = new RealtimeSegmenter({
|
||||
preRollSamples: config.SEGMENT_PREROLL_SAMPLES,
|
||||
speechStartThreshold: config.SEGMENT_START_THRESHOLD,
|
||||
speechContinueThreshold: config.SEGMENT_CONTINUE_THRESHOLD,
|
||||
speechStartFrames: config.SEGMENT_START_FRAMES,
|
||||
speechEndFrames: config.SEGMENT_END_FRAMES,
|
||||
minSpeechSamples: config.SEGMENT_MIN_SPEECH_SAMPLES,
|
||||
maxSpeechSamples: config.SEGMENT_MAX_SPEECH_SAMPLES,
|
||||
onLevel: (peak) => {
|
||||
if (peak > maxPeak) {
|
||||
maxPeak = peak;
|
||||
@@ -128,11 +142,17 @@ async function runLoopback(): Promise<void> {
|
||||
},
|
||||
onSpeechReady: (samples) => {
|
||||
emittedSegmentCount += 1;
|
||||
logger.info("Speech segment ready", { index: emittedSegmentCount, samples });
|
||||
logger.info("Speech segment ready", { index: emittedSegmentCount, samples, ms: Math.round((samples / 16000) * 1000) });
|
||||
},
|
||||
onSegment: (pcm16) => {
|
||||
transcriptionQueue.push(pcm16);
|
||||
const index = nextSegmentIndex++;
|
||||
transcriptionQueue.push({
|
||||
pcm16,
|
||||
queuedAt: Date.now(),
|
||||
index,
|
||||
});
|
||||
logger.info("Queued segment for STT", {
|
||||
index,
|
||||
queue: transcriptionQueue.length,
|
||||
bytes: pcm16.length,
|
||||
});
|
||||
@@ -167,6 +187,7 @@ async function runLoopback(): Promise<void> {
|
||||
console.log(`source: ${config.AUDIO_SOURCE ?? "unset"}`);
|
||||
console.log(`model: ${config.WHISPER_MODEL}`);
|
||||
console.log(`language: ${config.WHISPER_LANGUAGE}`);
|
||||
console.log(`beam: ${config.WHISPER_BEAM_SIZE}`);
|
||||
|
||||
setInterval(() => {
|
||||
const now = Date.now();
|
||||
|
||||
Reference in New Issue
Block a user