Tune realtime STT defaults

This commit is contained in:
2026-05-02 21:24:41 +09:00
parent 11cfd7cc04
commit 962ff7037b
5 changed files with 79 additions and 17 deletions

View File

@@ -4,6 +4,13 @@ interface RealtimeSegmenterOptions {
onSpeechStart?: (peak: number) => void;
onSpeechDiscarded?: (samples: number) => void;
onSpeechReady?: (samples: number) => void;
preRollSamples?: number;
speechStartThreshold?: number;
speechContinueThreshold?: number;
speechStartFrames?: number;
speechEndFrames?: number;
minSpeechSamples?: number;
maxSpeechSamples?: number;
}
export class RealtimeSegmenter {
@@ -12,18 +19,27 @@ export class RealtimeSegmenter {
private readonly speech: number[] = [];
private readonly frameSamples = 320;
private readonly preRollSamples = 3200;
private readonly speechStartThreshold = 900;
private readonly speechContinueThreshold = 450;
private readonly speechStartFrames = 2;
private readonly speechEndFrames = 18;
private readonly minSpeechSamples = 6400;
private readonly preRollSamples: number;
private readonly speechStartThreshold: number;
private readonly speechContinueThreshold: number;
private readonly speechStartFrames: number;
private readonly speechEndFrames: number;
private readonly minSpeechSamples: number;
private readonly maxSpeechSamples: number;
private speechActive = false;
private speechCandidateFrames = 0;
private silenceFrames = 0;
constructor(private readonly options: RealtimeSegmenterOptions) {}
constructor(private readonly options: RealtimeSegmenterOptions) {
this.preRollSamples = options.preRollSamples ?? 3200;
this.speechStartThreshold = options.speechStartThreshold ?? 900;
this.speechContinueThreshold = options.speechContinueThreshold ?? 450;
this.speechStartFrames = options.speechStartFrames ?? 2;
this.speechEndFrames = options.speechEndFrames ?? 24;
this.minSpeechSamples = options.minSpeechSamples ?? 7200;
this.maxSpeechSamples = options.maxSpeechSamples ?? 160000;
}
pushChunk(chunk: Buffer): void {
for (let offset = 0; offset + 1 < chunk.length; offset += 2) {
@@ -78,7 +94,9 @@ export class RealtimeSegmenter {
}
if (this.silenceFrames < this.speechEndFrames) {
return;
if (this.speech.length < this.maxSpeechSamples) {
return;
}
}
const speechPcm = int16ArrayToBuffer(Int16Array.from(this.speech));