711 lines
20 KiB
TypeScript
711 lines
20 KiB
TypeScript
import { spawn, type ChildProcess, type ChildProcessByStdio } from "node:child_process";
|
|
import { once } from "node:events";
|
|
import { promises as fs } from "node:fs";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import type { Readable, Writable } from "node:stream";
|
|
|
|
import { RealTimeVAD } from "avr-vad";
|
|
|
|
import type { AssistantRuntimeConfig } from "../config.js";
|
|
import { Logger } from "../logger.js";
|
|
import { requireFfmpegPath } from "./ffmpeg-path.js";
|
|
import { takeFrame, int16ArrayToFloat32, float32ToPcm16Buffer } from "./pcm.js";
|
|
import { ConversationMemory, type UserUtterance } from "../services/conversation.js";
|
|
import type { LlmService } from "../services/llm.js";
|
|
import type { SttService } from "../services/stt.js";
|
|
import type { PreparedSpeechAudio, TtsService } from "../services/tts.js";
|
|
|
|
interface LocalVoiceSessionOptions {
|
|
config: AssistantRuntimeConfig;
|
|
logger: Logger;
|
|
stt: SttService;
|
|
tts: TtsService;
|
|
llm: LlmService;
|
|
}
|
|
|
|
interface SpeechJob {
|
|
text: string;
|
|
source: "assistant" | "manual";
|
|
}
|
|
|
|
export class LocalVoiceSession {
|
|
private readonly memory: ConversationMemory;
|
|
private readonly queue: SpeechJob[] = [];
|
|
private readonly pendingSamples: number[] = [];
|
|
private readonly silenceThreshold = 900;
|
|
private readonly windowsFrameSamples = 320;
|
|
private readonly windowsPreRollSamples = 3_200;
|
|
private readonly windowsSpeechStartThreshold = 520;
|
|
private readonly windowsSpeechContinueThreshold = 260;
|
|
private readonly windowsSpeechStartFrames = 2;
|
|
private readonly windowsSpeechEndFrames = 18;
|
|
private readonly windowsMinSpeechSamples = 7_200;
|
|
|
|
private vad: RealTimeVAD | null = null;
|
|
private recorder: ChildProcessByStdio<null, Readable, Readable> | null = null;
|
|
private currentPlayer: ChildProcess | null = null;
|
|
private currentAbortController: AbortController | null = null;
|
|
private currentPlayback: PreparedSpeechAudio | null = null;
|
|
private processing = Promise.resolve();
|
|
private draining = false;
|
|
private destroyed = false;
|
|
private inputWatchdog: NodeJS.Timeout | null = null;
|
|
private recorderStartedAt = 0;
|
|
private lastPcmChunkAt = 0;
|
|
private lastNonSilentAudioAt = 0;
|
|
private warnedNoPcm = false;
|
|
private warnedSilent = false;
|
|
private windowsSpeechBuffer: number[] = [];
|
|
private windowsPreRollBuffer: number[] = [];
|
|
private windowsSpeechActive = false;
|
|
private windowsSpeechCandidateFrames = 0;
|
|
private windowsSilenceFrames = 0;
|
|
|
|
constructor(private readonly options: LocalVoiceSessionOptions) {
|
|
this.memory = new ConversationMemory(options.config.MAX_CONVERSATION_TURNS);
|
|
}
|
|
|
|
async start(): Promise<void> {
|
|
if (process.platform !== "win32") {
|
|
this.vad = await RealTimeVAD.new({
|
|
model: "v5",
|
|
sampleRate: 16000,
|
|
frameSamples: 1536,
|
|
positiveSpeechThreshold: 0.55,
|
|
negativeSpeechThreshold: 0.35,
|
|
redemptionFrames: 8,
|
|
preSpeechPadFrames: 2,
|
|
minSpeechFrames: 3,
|
|
onFrameProcessed: () => undefined,
|
|
onVADMisfire: () => undefined,
|
|
onSpeechStart: () => {
|
|
this.interruptPlayback("local-barge-in");
|
|
},
|
|
onSpeechRealStart: () => undefined,
|
|
onSpeechEnd: (audio: Float32Array) => {
|
|
void this.handleSpeechEnd(audio);
|
|
},
|
|
});
|
|
} else {
|
|
this.options.logger.info("Windows local mode uses amplitude-based speech detection");
|
|
}
|
|
|
|
this.recorder = this.spawnRecorder();
|
|
this.recorderStartedAt = Date.now();
|
|
this.lastPcmChunkAt = 0;
|
|
this.lastNonSilentAudioAt = 0;
|
|
this.warnedNoPcm = false;
|
|
this.warnedSilent = false;
|
|
this.recorder.stdout.on("data", (chunk: Buffer) => {
|
|
this.pushPcm16Chunk(chunk);
|
|
});
|
|
this.recorder.stderr.on("data", (chunk: Buffer) => {
|
|
const text = chunk.toString().trim();
|
|
if (text.length > 0) {
|
|
this.options.logger.debug("[pw-record]", text);
|
|
}
|
|
});
|
|
this.recorder.on("exit", (code, signal) => {
|
|
if (!this.destroyed) {
|
|
this.options.logger.warn("pw-record exited unexpectedly", { code, signal });
|
|
}
|
|
});
|
|
|
|
this.inputWatchdog = setInterval(() => {
|
|
this.reportInputHealth();
|
|
}, 3_000);
|
|
}
|
|
|
|
async destroy(): Promise<void> {
|
|
this.destroyed = true;
|
|
this.interruptPlayback("local-shutdown");
|
|
|
|
if (this.inputWatchdog) {
|
|
clearInterval(this.inputWatchdog);
|
|
this.inputWatchdog = null;
|
|
}
|
|
|
|
if (this.recorder && !this.recorder.killed) {
|
|
this.recorder.kill("SIGTERM");
|
|
await once(this.recorder, "exit").catch(() => null);
|
|
}
|
|
|
|
if (this.vad) {
|
|
await this.vad.destroy().catch((error) => {
|
|
this.options.logger.warn("Local VAD destroy failed", error);
|
|
});
|
|
this.vad = null;
|
|
}
|
|
}
|
|
|
|
clearConversation(): void {
|
|
this.memory.clear();
|
|
this.interruptPlayback("local-reset");
|
|
}
|
|
|
|
async speakText(text: string): Promise<void> {
|
|
this.queue.push({
|
|
text,
|
|
source: "manual",
|
|
});
|
|
await this.drainQueue();
|
|
}
|
|
|
|
statusSummary(): string {
|
|
return [
|
|
"모드: local",
|
|
`플랫폼: ${process.platform}`,
|
|
`입력 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`,
|
|
`출력 sink: ${this.describeSink()}`,
|
|
`대기열: ${this.queue.length}`,
|
|
`최근 대화 턴: ${this.memory.recentTurns().length}`,
|
|
].join("\n");
|
|
}
|
|
|
|
private spawnRecorder(): ChildProcessByStdio<null, Readable, Readable> {
|
|
if (process.platform === "win32") {
|
|
return this.spawnWindowsRecorder();
|
|
}
|
|
|
|
const args = [
|
|
"--rate",
|
|
"16000",
|
|
"--channels",
|
|
"1",
|
|
"--format",
|
|
"s16",
|
|
"--raw",
|
|
];
|
|
|
|
if (this.options.config.LOCAL_AUDIO_SOURCE) {
|
|
args.push("--target", this.options.config.LOCAL_AUDIO_SOURCE);
|
|
}
|
|
|
|
args.push("-");
|
|
|
|
this.options.logger.info("Starting local recorder", {
|
|
source: this.options.config.LOCAL_AUDIO_SOURCE ?? "default",
|
|
});
|
|
|
|
return spawn("pw-record", args, {
|
|
stdio: ["ignore", "pipe", "pipe"],
|
|
});
|
|
}
|
|
|
|
private spawnWindowsRecorder(): ChildProcessByStdio<null, Readable, Readable> {
|
|
const ffmpegPath = this.getFfmpegPath();
|
|
const sourceName = this.options.config.LOCAL_AUDIO_SOURCE;
|
|
if (!sourceName) {
|
|
throw new Error("Windows 로컬 모드는 LOCAL_AUDIO_SOURCE 설정이 필요합니다. `bun run audio:devices` 로 이름을 확인해 주세요.");
|
|
}
|
|
|
|
const args = [
|
|
"-hide_banner",
|
|
"-loglevel",
|
|
"warning",
|
|
"-f",
|
|
"dshow",
|
|
"-i",
|
|
`audio=${sourceName}`,
|
|
"-ac",
|
|
"1",
|
|
"-ar",
|
|
"16000",
|
|
"-f",
|
|
"s16le",
|
|
"pipe:1",
|
|
];
|
|
|
|
this.options.logger.info("Starting local recorder", {
|
|
source: sourceName,
|
|
backend: "ffmpeg-dshow",
|
|
});
|
|
|
|
return spawn(ffmpegPath, args, {
|
|
stdio: ["ignore", "pipe", "pipe"],
|
|
});
|
|
}
|
|
|
|
private pushPcm16Chunk(chunk: Buffer): void {
|
|
if (this.destroyed) {
|
|
return;
|
|
}
|
|
|
|
this.lastPcmChunkAt = Date.now();
|
|
let peak = 0;
|
|
|
|
for (let offset = 0; offset + 1 < chunk.length; offset += 2) {
|
|
const sample = chunk.readInt16LE(offset);
|
|
const abs = Math.abs(sample);
|
|
if (abs > peak) {
|
|
peak = abs;
|
|
}
|
|
this.pendingSamples.push(sample);
|
|
}
|
|
|
|
if (peak >= this.silenceThreshold) {
|
|
this.lastNonSilentAudioAt = Date.now();
|
|
}
|
|
|
|
if (process.platform === "win32") {
|
|
this.processWindowsSpeechFrames();
|
|
return;
|
|
}
|
|
|
|
if (!this.vad) {
|
|
return;
|
|
}
|
|
|
|
while (true) {
|
|
const frame = takeFrame(this.pendingSamples, 1536);
|
|
if (!frame) {
|
|
return;
|
|
}
|
|
|
|
const floatFrame = int16ArrayToFloat32(frame);
|
|
this.processing = this.processing
|
|
.then(() => this.vad?.processAudio(floatFrame))
|
|
.catch((error) => {
|
|
this.options.logger.warn("Local VAD processing failed", error);
|
|
});
|
|
}
|
|
}
|
|
|
|
private processWindowsSpeechFrames(): void {
|
|
while (true) {
|
|
const frame = takeFrame(this.pendingSamples, this.windowsFrameSamples);
|
|
if (!frame) {
|
|
return;
|
|
}
|
|
|
|
let peak = 0;
|
|
for (const sample of frame) {
|
|
const abs = Math.abs(sample);
|
|
if (abs > peak) {
|
|
peak = abs;
|
|
}
|
|
}
|
|
|
|
if (!this.windowsSpeechActive) {
|
|
this.appendWithCap(this.windowsPreRollBuffer, frame, this.windowsPreRollSamples);
|
|
|
|
if (peak >= this.windowsSpeechStartThreshold) {
|
|
this.windowsSpeechCandidateFrames += 1;
|
|
} else {
|
|
this.windowsSpeechCandidateFrames = 0;
|
|
}
|
|
|
|
if (this.windowsSpeechCandidateFrames >= this.windowsSpeechStartFrames) {
|
|
this.windowsSpeechActive = true;
|
|
this.windowsSilenceFrames = 0;
|
|
this.windowsSpeechBuffer = [...this.windowsPreRollBuffer];
|
|
this.windowsPreRollBuffer = [];
|
|
this.interruptPlayback("local-barge-in");
|
|
this.options.logger.debug("Windows speech start detected", { peak });
|
|
} else {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
this.windowsSpeechBuffer.push(...frame);
|
|
|
|
if (peak >= this.windowsSpeechContinueThreshold) {
|
|
this.windowsSilenceFrames = 0;
|
|
} else {
|
|
this.windowsSilenceFrames += 1;
|
|
}
|
|
|
|
if (this.windowsSilenceFrames < this.windowsSpeechEndFrames) {
|
|
continue;
|
|
}
|
|
|
|
const speech = Int16Array.from(this.windowsSpeechBuffer);
|
|
this.windowsSpeechActive = false;
|
|
this.windowsSpeechBuffer = [];
|
|
this.windowsSilenceFrames = 0;
|
|
this.windowsSpeechCandidateFrames = 0;
|
|
|
|
if (speech.length < this.windowsMinSpeechSamples) {
|
|
this.options.logger.debug("Ignored short Windows speech segment", { samples: speech.length });
|
|
continue;
|
|
}
|
|
|
|
this.options.logger.debug("Windows speech end detected", { samples: speech.length });
|
|
void this.handleSpeechEnd(int16ArrayToFloat32(speech));
|
|
}
|
|
}
|
|
|
|
private async handleSpeechEnd(audio: Float32Array): Promise<void> {
|
|
if (audio.length < 16000 * 0.25) {
|
|
this.options.logger.debug("Ignored short local speech segment", { samples: audio.length });
|
|
return;
|
|
}
|
|
|
|
const utterance: UserUtterance = {
|
|
speakerId: "local-user",
|
|
speakerName: this.options.config.LOCAL_SPEAKER_NAME,
|
|
text: "",
|
|
};
|
|
|
|
let transcript: string | null = null;
|
|
try {
|
|
transcript = await this.options.stt.transcribePcm16(float32ToPcm16Buffer(audio));
|
|
} catch (error) {
|
|
this.options.logger.warn("Local STT failed", error);
|
|
return;
|
|
}
|
|
|
|
if (!transcript || transcript.trim().length === 0) {
|
|
this.options.logger.info("Local STT returned empty transcript");
|
|
return;
|
|
}
|
|
|
|
utterance.text = transcript.trim();
|
|
this.options.logger.info("Local transcript", utterance.text);
|
|
if (this.options.config.DEBUG_TEXT_EVENTS) {
|
|
console.log(`\n[you] ${utterance.text}`);
|
|
}
|
|
|
|
let reply: string;
|
|
try {
|
|
reply = await this.options.llm.generateReply(this.memory, utterance);
|
|
} catch (error) {
|
|
this.options.logger.warn("Local LLM failed", error);
|
|
reply = "지금은 답변 생성에 실패했습니다. 잠시 후 다시 말씀해 주세요.";
|
|
}
|
|
|
|
this.memory.addUserTurn(utterance);
|
|
this.memory.addAssistantTurn(reply);
|
|
this.options.logger.info("Local reply", reply);
|
|
if (this.options.config.DEBUG_TEXT_EVENTS) {
|
|
console.log(`[bot] ${reply}\n`);
|
|
}
|
|
|
|
this.queue.push({
|
|
text: reply,
|
|
source: "assistant",
|
|
});
|
|
await this.drainQueue();
|
|
}
|
|
|
|
private interruptPlayback(reason: string): void {
|
|
if (this.queue.length > 0 || this.currentPlayer) {
|
|
this.options.logger.info("Interrupting local playback", reason);
|
|
}
|
|
|
|
this.queue.splice(0, this.queue.length);
|
|
this.currentAbortController?.abort();
|
|
this.currentAbortController = null;
|
|
this.currentPlayback?.dispose();
|
|
this.currentPlayback = null;
|
|
|
|
if (this.currentPlayer && !this.currentPlayer.killed) {
|
|
this.currentPlayer.kill("SIGKILL");
|
|
}
|
|
this.currentPlayer = null;
|
|
}
|
|
|
|
private async drainQueue(): Promise<void> {
|
|
if (this.draining || this.destroyed) {
|
|
return;
|
|
}
|
|
|
|
this.draining = true;
|
|
|
|
try {
|
|
while (this.queue.length > 0 && !this.destroyed) {
|
|
const job = this.queue.shift();
|
|
if (!job) {
|
|
continue;
|
|
}
|
|
|
|
const abortController = new AbortController();
|
|
this.currentAbortController = abortController;
|
|
|
|
try {
|
|
this.currentPlayback = await this.options.tts.preparePlayback(job.text, abortController.signal);
|
|
} catch (error) {
|
|
if (!abortController.signal.aborted) {
|
|
this.options.logger.warn("Local TTS synthesis failed", error);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
await this.playToSink(this.currentPlayback, abortController.signal);
|
|
} catch (error) {
|
|
if (!abortController.signal.aborted) {
|
|
this.options.logger.warn("Local playback failed", error);
|
|
}
|
|
} finally {
|
|
this.currentPlayback?.dispose();
|
|
this.currentPlayback = null;
|
|
if (this.currentAbortController === abortController) {
|
|
this.currentAbortController = null;
|
|
}
|
|
}
|
|
}
|
|
} finally {
|
|
this.draining = false;
|
|
}
|
|
}
|
|
|
|
private async playToSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
|
|
if (process.platform === "win32") {
|
|
await this.playToWindowsDefaultSink(playback, signal);
|
|
return;
|
|
}
|
|
|
|
const args = [
|
|
"--rate",
|
|
"48000",
|
|
"--channels",
|
|
"2",
|
|
"--format",
|
|
"s16",
|
|
"--raw",
|
|
];
|
|
|
|
if (this.options.config.LOCAL_AUDIO_SINK) {
|
|
args.push("--target", this.options.config.LOCAL_AUDIO_SINK);
|
|
}
|
|
|
|
args.push("-");
|
|
|
|
const player = spawn("pw-play", args, {
|
|
stdio: ["pipe", "ignore", "pipe"],
|
|
});
|
|
this.currentPlayer = player;
|
|
|
|
player.stderr.on("data", (chunk: Buffer) => {
|
|
const text = chunk.toString().trim();
|
|
if (text.length > 0) {
|
|
this.options.logger.debug("[pw-play]", text);
|
|
}
|
|
});
|
|
|
|
signal.addEventListener(
|
|
"abort",
|
|
() => {
|
|
playback.stream.destroy();
|
|
if (!player.killed) {
|
|
player.kill("SIGKILL");
|
|
}
|
|
},
|
|
{ once: true },
|
|
);
|
|
|
|
playback.stream.pipe(player.stdin);
|
|
|
|
const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null];
|
|
this.currentPlayer = null;
|
|
|
|
if (signal.aborted) {
|
|
return;
|
|
}
|
|
|
|
if (code !== 0) {
|
|
throw new Error(`pw-play exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
|
|
}
|
|
}
|
|
|
|
private async playToWindowsDefaultSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
|
|
if (playback.sourceFilePath) {
|
|
await this.playWindowsWaveFile(playback.sourceFilePath, signal);
|
|
return;
|
|
}
|
|
|
|
const chunks: Buffer[] = [];
|
|
|
|
await new Promise<void>((resolve, reject) => {
|
|
playback.stream.on("data", (chunk: Buffer) => {
|
|
chunks.push(Buffer.from(chunk));
|
|
});
|
|
playback.stream.once("end", resolve);
|
|
playback.stream.once("error", reject);
|
|
signal.addEventListener(
|
|
"abort",
|
|
() => {
|
|
playback.stream.destroy();
|
|
reject(new Error("playback aborted"));
|
|
},
|
|
{ once: true },
|
|
);
|
|
}).catch((error) => {
|
|
if (signal.aborted) {
|
|
return;
|
|
}
|
|
throw error;
|
|
});
|
|
|
|
if (signal.aborted) {
|
|
return;
|
|
}
|
|
|
|
const pcm = Buffer.concat(chunks);
|
|
const wav = createWaveFileBuffer(pcm, 48000, 2, 16);
|
|
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-${Date.now()}.wav`);
|
|
await fs.writeFile(tempPath, wav);
|
|
|
|
const psScript = [
|
|
"Add-Type -AssemblyName System;",
|
|
`$player = New-Object System.Media.SoundPlayer('${tempPath.replace(/'/g, "''")}');`,
|
|
"$player.PlaySync();",
|
|
].join(" ");
|
|
|
|
const player = spawn("powershell", ["-NoProfile", "-Command", psScript], {
|
|
stdio: ["ignore", "ignore", "pipe"],
|
|
});
|
|
this.currentPlayer = player;
|
|
|
|
player.stderr.on("data", (chunk: Buffer) => {
|
|
const text = chunk.toString().trim();
|
|
if (text.length > 0) {
|
|
this.options.logger.debug("[powershell-player]", text);
|
|
}
|
|
});
|
|
|
|
signal.addEventListener(
|
|
"abort",
|
|
() => {
|
|
if (!player.killed) {
|
|
player.kill("SIGKILL");
|
|
}
|
|
},
|
|
{ once: true },
|
|
);
|
|
|
|
const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null];
|
|
this.currentPlayer = null;
|
|
await fs.unlink(tempPath).catch(() => null);
|
|
|
|
if (signal.aborted) {
|
|
return;
|
|
}
|
|
|
|
if (code !== 0) {
|
|
throw new Error(`powershell playback exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
|
|
}
|
|
}
|
|
|
|
private async playWindowsWaveFile(filePath: string, signal: AbortSignal): Promise<void> {
|
|
const psScript = [
|
|
"Add-Type -AssemblyName System;",
|
|
`$player = New-Object System.Media.SoundPlayer('${filePath.replace(/'/g, "''")}');`,
|
|
"$player.PlaySync();",
|
|
].join(" ");
|
|
|
|
const player = spawn("powershell", ["-NoProfile", "-Command", psScript], {
|
|
stdio: ["ignore", "ignore", "pipe"],
|
|
});
|
|
this.currentPlayer = player;
|
|
|
|
player.stderr.on("data", (chunk: Buffer) => {
|
|
const text = chunk.toString().trim();
|
|
if (text.length > 0) {
|
|
this.options.logger.debug("[powershell-player]", text);
|
|
}
|
|
});
|
|
|
|
signal.addEventListener(
|
|
"abort",
|
|
() => {
|
|
if (!player.killed) {
|
|
player.kill("SIGKILL");
|
|
}
|
|
},
|
|
{ once: true },
|
|
);
|
|
|
|
const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null];
|
|
this.currentPlayer = null;
|
|
|
|
if (signal.aborted) {
|
|
return;
|
|
}
|
|
|
|
if (code !== 0) {
|
|
throw new Error(`powershell playback exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
|
|
}
|
|
}
|
|
|
|
private getFfmpegPath(): string {
|
|
return requireFfmpegPath();
|
|
}
|
|
|
|
private reportInputHealth(): void {
|
|
if (this.destroyed) {
|
|
return;
|
|
}
|
|
|
|
const now = Date.now();
|
|
|
|
if (!this.warnedNoPcm && this.lastPcmChunkAt === 0 && now - this.recorderStartedAt >= 6_000) {
|
|
this.warnedNoPcm = true;
|
|
this.options.logger.warn(
|
|
[
|
|
"입력 장치에서 PCM 데이터가 들어오지 않습니다.",
|
|
`현재 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`,
|
|
"Windows에서는 마이크 입력이 아니라 SPDIF/ADAT 같은 디지털 입력을 고르면 반응이 없습니다.",
|
|
"`bun run devices`로 실제 마이크 이름을 다시 고르세요.",
|
|
].join("\n"),
|
|
);
|
|
return;
|
|
}
|
|
|
|
if (!this.warnedSilent && this.lastPcmChunkAt > 0 && this.lastNonSilentAudioAt === 0 && now - this.recorderStartedAt >= 6_000) {
|
|
this.warnedSilent = true;
|
|
this.options.logger.warn(
|
|
[
|
|
"입력 장치에서는 데이터가 오지만 말소리 수준으로 올라오지 않습니다.",
|
|
`현재 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`,
|
|
"잘못된 입력 채널이거나, 마이크가 그 장치로 라우팅되지 않은 상태일 가능성이 큽니다.",
|
|
"RME Babyface Pro라면 SPDIF/ADAT 대신 아날로그 마이크 입력 채널 이름을 선택해야 합니다.",
|
|
].join("\n"),
|
|
);
|
|
}
|
|
}
|
|
|
|
private describeSink(): string {
|
|
if (process.platform === "win32") {
|
|
return this.options.config.LOCAL_AUDIO_SINK ?? "system-default";
|
|
}
|
|
return this.options.config.LOCAL_AUDIO_SINK ?? "default";
|
|
}
|
|
|
|
private appendWithCap(target: number[], samples: Int16Array, cap: number): void {
|
|
target.push(...samples);
|
|
if (target.length > cap) {
|
|
target.splice(0, target.length - cap);
|
|
}
|
|
}
|
|
}
|
|
|
|
function createWaveFileBuffer(
|
|
pcm: Buffer,
|
|
sampleRate: number,
|
|
channels: number,
|
|
bitsPerSample: number,
|
|
): Buffer {
|
|
const header = Buffer.alloc(44);
|
|
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
|
const blockAlign = channels * (bitsPerSample / 8);
|
|
|
|
header.write("RIFF", 0, 4, "ascii");
|
|
header.writeUInt32LE(36 + pcm.length, 4);
|
|
header.write("WAVE", 8, 4, "ascii");
|
|
header.write("fmt ", 12, 4, "ascii");
|
|
header.writeUInt32LE(16, 16);
|
|
header.writeUInt16LE(1, 20);
|
|
header.writeUInt16LE(channels, 22);
|
|
header.writeUInt32LE(sampleRate, 24);
|
|
header.writeUInt32LE(byteRate, 28);
|
|
header.writeUInt16LE(blockAlign, 32);
|
|
header.writeUInt16LE(bitsPerSample, 34);
|
|
header.write("data", 36, 4, "ascii");
|
|
header.writeUInt32LE(pcm.length, 40);
|
|
|
|
return Buffer.concat([header, pcm]);
|
|
}
|