Tune realtime STT defaults
This commit is contained in:
@@ -4,6 +4,13 @@ interface RealtimeSegmenterOptions {
|
||||
onSpeechStart?: (peak: number) => void;
|
||||
onSpeechDiscarded?: (samples: number) => void;
|
||||
onSpeechReady?: (samples: number) => void;
|
||||
preRollSamples?: number;
|
||||
speechStartThreshold?: number;
|
||||
speechContinueThreshold?: number;
|
||||
speechStartFrames?: number;
|
||||
speechEndFrames?: number;
|
||||
minSpeechSamples?: number;
|
||||
maxSpeechSamples?: number;
|
||||
}
|
||||
|
||||
export class RealtimeSegmenter {
|
||||
@@ -12,18 +19,27 @@ export class RealtimeSegmenter {
|
||||
private readonly speech: number[] = [];
|
||||
|
||||
private readonly frameSamples = 320;
|
||||
private readonly preRollSamples = 3200;
|
||||
private readonly speechStartThreshold = 900;
|
||||
private readonly speechContinueThreshold = 450;
|
||||
private readonly speechStartFrames = 2;
|
||||
private readonly speechEndFrames = 18;
|
||||
private readonly minSpeechSamples = 6400;
|
||||
private readonly preRollSamples: number;
|
||||
private readonly speechStartThreshold: number;
|
||||
private readonly speechContinueThreshold: number;
|
||||
private readonly speechStartFrames: number;
|
||||
private readonly speechEndFrames: number;
|
||||
private readonly minSpeechSamples: number;
|
||||
private readonly maxSpeechSamples: number;
|
||||
|
||||
private speechActive = false;
|
||||
private speechCandidateFrames = 0;
|
||||
private silenceFrames = 0;
|
||||
|
||||
constructor(private readonly options: RealtimeSegmenterOptions) {}
|
||||
constructor(private readonly options: RealtimeSegmenterOptions) {
|
||||
this.preRollSamples = options.preRollSamples ?? 3200;
|
||||
this.speechStartThreshold = options.speechStartThreshold ?? 900;
|
||||
this.speechContinueThreshold = options.speechContinueThreshold ?? 450;
|
||||
this.speechStartFrames = options.speechStartFrames ?? 2;
|
||||
this.speechEndFrames = options.speechEndFrames ?? 24;
|
||||
this.minSpeechSamples = options.minSpeechSamples ?? 7200;
|
||||
this.maxSpeechSamples = options.maxSpeechSamples ?? 160000;
|
||||
}
|
||||
|
||||
pushChunk(chunk: Buffer): void {
|
||||
for (let offset = 0; offset + 1 < chunk.length; offset += 2) {
|
||||
@@ -78,7 +94,9 @@ export class RealtimeSegmenter {
|
||||
}
|
||||
|
||||
if (this.silenceFrames < this.speechEndFrames) {
|
||||
return;
|
||||
if (this.speech.length < this.maxSpeechSamples) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const speechPcm = int16ArrayToBuffer(Int16Array.from(this.speech));
|
||||
|
||||
Reference in New Issue
Block a user