fix: support blank env values and windows local audio
This commit is contained in:
@@ -1,7 +1,11 @@
|
|||||||
import { spawn, type ChildProcessByStdio } from "node:child_process";
|
import { spawn, type ChildProcess, type ChildProcessByStdio } from "node:child_process";
|
||||||
import { once } from "node:events";
|
import { once } from "node:events";
|
||||||
|
import { promises as fs } from "node:fs";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
import type { Readable, Writable } from "node:stream";
|
import type { Readable, Writable } from "node:stream";
|
||||||
|
|
||||||
|
import ffmpegStatic from "ffmpeg-static";
|
||||||
import { RealTimeVAD } from "avr-vad";
|
import { RealTimeVAD } from "avr-vad";
|
||||||
|
|
||||||
import type { AssistantRuntimeConfig } from "../config.js";
|
import type { AssistantRuntimeConfig } from "../config.js";
|
||||||
@@ -32,7 +36,7 @@ export class LocalVoiceSession {
|
|||||||
|
|
||||||
private vad: RealTimeVAD | null = null;
|
private vad: RealTimeVAD | null = null;
|
||||||
private recorder: ChildProcessByStdio<null, Readable, Readable> | null = null;
|
private recorder: ChildProcessByStdio<null, Readable, Readable> | null = null;
|
||||||
private currentPlayer: ChildProcessByStdio<Writable, null, Readable> | null = null;
|
private currentPlayer: ChildProcess | null = null;
|
||||||
private currentAbortController: AbortController | null = null;
|
private currentAbortController: AbortController | null = null;
|
||||||
private currentPlayback: PreparedSpeechAudio | null = null;
|
private currentPlayback: PreparedSpeechAudio | null = null;
|
||||||
private processing = Promise.resolve();
|
private processing = Promise.resolve();
|
||||||
@@ -114,14 +118,19 @@ export class LocalVoiceSession {
|
|||||||
statusSummary(): string {
|
statusSummary(): string {
|
||||||
return [
|
return [
|
||||||
"모드: local",
|
"모드: local",
|
||||||
|
`플랫폼: ${process.platform}`,
|
||||||
`입력 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`,
|
`입력 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`,
|
||||||
`출력 sink: ${this.options.config.LOCAL_AUDIO_SINK ?? "default"}`,
|
`출력 sink: ${this.describeSink()}`,
|
||||||
`대기열: ${this.queue.length}`,
|
`대기열: ${this.queue.length}`,
|
||||||
`최근 대화 턴: ${this.memory.recentTurns().length}`,
|
`최근 대화 턴: ${this.memory.recentTurns().length}`,
|
||||||
].join("\n");
|
].join("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
private spawnRecorder(): ChildProcessByStdio<null, Readable, Readable> {
|
private spawnRecorder(): ChildProcessByStdio<null, Readable, Readable> {
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
return this.spawnWindowsRecorder();
|
||||||
|
}
|
||||||
|
|
||||||
const args = [
|
const args = [
|
||||||
"--rate",
|
"--rate",
|
||||||
"16000",
|
"16000",
|
||||||
@@ -147,6 +156,40 @@ export class LocalVoiceSession {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private spawnWindowsRecorder(): ChildProcessByStdio<null, Readable, Readable> {
|
||||||
|
const ffmpegPath = this.getFfmpegPath();
|
||||||
|
const sourceName = this.options.config.LOCAL_AUDIO_SOURCE;
|
||||||
|
if (!sourceName) {
|
||||||
|
throw new Error("Windows 로컬 모드는 LOCAL_AUDIO_SOURCE 설정이 필요합니다. `bun run audio:devices` 로 이름을 확인해 주세요.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const args = [
|
||||||
|
"-hide_banner",
|
||||||
|
"-loglevel",
|
||||||
|
"warning",
|
||||||
|
"-f",
|
||||||
|
"dshow",
|
||||||
|
"-i",
|
||||||
|
`audio=${sourceName}`,
|
||||||
|
"-ac",
|
||||||
|
"1",
|
||||||
|
"-ar",
|
||||||
|
"16000",
|
||||||
|
"-f",
|
||||||
|
"s16le",
|
||||||
|
"pipe:1",
|
||||||
|
];
|
||||||
|
|
||||||
|
this.options.logger.info("Starting local recorder", {
|
||||||
|
source: sourceName,
|
||||||
|
backend: "ffmpeg-dshow",
|
||||||
|
});
|
||||||
|
|
||||||
|
return spawn(ffmpegPath, args, {
|
||||||
|
stdio: ["ignore", "pipe", "pipe"],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
private pushPcm16Chunk(chunk: Buffer): void {
|
private pushPcm16Chunk(chunk: Buffer): void {
|
||||||
if (this.destroyed || !this.vad) {
|
if (this.destroyed || !this.vad) {
|
||||||
return;
|
return;
|
||||||
@@ -284,6 +327,11 @@ export class LocalVoiceSession {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async playToSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
|
private async playToSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
await this.playToWindowsDefaultSink(playback, signal);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const args = [
|
const args = [
|
||||||
"--rate",
|
"--rate",
|
||||||
"48000",
|
"48000",
|
||||||
@@ -336,4 +384,120 @@ export class LocalVoiceSession {
|
|||||||
throw new Error(`pw-play exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
|
throw new Error(`pw-play exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async playToWindowsDefaultSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
|
||||||
|
const chunks: Buffer[] = [];
|
||||||
|
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
playback.stream.on("data", (chunk: Buffer) => {
|
||||||
|
chunks.push(Buffer.from(chunk));
|
||||||
|
});
|
||||||
|
playback.stream.once("end", resolve);
|
||||||
|
playback.stream.once("error", reject);
|
||||||
|
signal.addEventListener(
|
||||||
|
"abort",
|
||||||
|
() => {
|
||||||
|
playback.stream.destroy();
|
||||||
|
reject(new Error("playback aborted"));
|
||||||
|
},
|
||||||
|
{ once: true },
|
||||||
|
);
|
||||||
|
}).catch((error) => {
|
||||||
|
if (signal.aborted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (signal.aborted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pcm = Buffer.concat(chunks);
|
||||||
|
const wav = createWaveFileBuffer(pcm, 48000, 2, 16);
|
||||||
|
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-${Date.now()}.wav`);
|
||||||
|
await fs.writeFile(tempPath, wav);
|
||||||
|
|
||||||
|
const psScript = [
|
||||||
|
"Add-Type -AssemblyName System;",
|
||||||
|
`$player = New-Object System.Media.SoundPlayer('${tempPath.replace(/'/g, "''")}');`,
|
||||||
|
"$player.PlaySync();",
|
||||||
|
].join(" ");
|
||||||
|
|
||||||
|
const player = spawn("powershell", ["-NoProfile", "-Command", psScript], {
|
||||||
|
stdio: ["ignore", "ignore", "pipe"],
|
||||||
|
});
|
||||||
|
this.currentPlayer = player;
|
||||||
|
|
||||||
|
player.stderr.on("data", (chunk: Buffer) => {
|
||||||
|
const text = chunk.toString().trim();
|
||||||
|
if (text.length > 0) {
|
||||||
|
this.options.logger.debug("[powershell-player]", text);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
signal.addEventListener(
|
||||||
|
"abort",
|
||||||
|
() => {
|
||||||
|
if (!player.killed) {
|
||||||
|
player.kill("SIGKILL");
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ once: true },
|
||||||
|
);
|
||||||
|
|
||||||
|
const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null];
|
||||||
|
this.currentPlayer = null;
|
||||||
|
await fs.unlink(tempPath).catch(() => null);
|
||||||
|
|
||||||
|
if (signal.aborted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code !== 0) {
|
||||||
|
throw new Error(`powershell playback exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private getFfmpegPath(): string {
|
||||||
|
const ffmpegPath = ffmpegStatic as unknown as string | null;
|
||||||
|
if (!ffmpegPath) {
|
||||||
|
throw new Error("ffmpeg-static 경로를 찾지 못했습니다.");
|
||||||
|
}
|
||||||
|
return ffmpegPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
private describeSink(): string {
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
return this.options.config.LOCAL_AUDIO_SINK ?? "system-default";
|
||||||
|
}
|
||||||
|
return this.options.config.LOCAL_AUDIO_SINK ?? "default";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function createWaveFileBuffer(
|
||||||
|
pcm: Buffer,
|
||||||
|
sampleRate: number,
|
||||||
|
channels: number,
|
||||||
|
bitsPerSample: number,
|
||||||
|
): Buffer {
|
||||||
|
const header = Buffer.alloc(44);
|
||||||
|
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
||||||
|
const blockAlign = channels * (bitsPerSample / 8);
|
||||||
|
|
||||||
|
header.write("RIFF", 0, 4, "ascii");
|
||||||
|
header.writeUInt32LE(36 + pcm.length, 4);
|
||||||
|
header.write("WAVE", 8, 4, "ascii");
|
||||||
|
header.write("fmt ", 12, 4, "ascii");
|
||||||
|
header.writeUInt32LE(16, 16);
|
||||||
|
header.writeUInt16LE(1, 20);
|
||||||
|
header.writeUInt16LE(channels, 22);
|
||||||
|
header.writeUInt32LE(sampleRate, 24);
|
||||||
|
header.writeUInt32LE(byteRate, 28);
|
||||||
|
header.writeUInt16LE(blockAlign, 32);
|
||||||
|
header.writeUInt16LE(bitsPerSample, 34);
|
||||||
|
header.write("data", 36, 4, "ascii");
|
||||||
|
header.writeUInt32LE(pcm.length, 40);
|
||||||
|
|
||||||
|
return Buffer.concat([header, pcm]);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,20 +3,28 @@ import { z } from "zod";
|
|||||||
|
|
||||||
loadDotenv();
|
loadDotenv();
|
||||||
|
|
||||||
|
const emptyToUndefined = z.preprocess((value) => {
|
||||||
|
if (typeof value !== "string") {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
const trimmed = value.trim();
|
||||||
|
return trimmed.length === 0 ? undefined : trimmed;
|
||||||
|
}, z.string().min(1).optional());
|
||||||
|
|
||||||
const envSchema = z.object({
|
const envSchema = z.object({
|
||||||
DISCORD_BOT_TOKEN: z.string().min(1).optional(),
|
DISCORD_BOT_TOKEN: emptyToUndefined,
|
||||||
DISCORD_APPLICATION_ID: z.string().min(1).optional(),
|
DISCORD_APPLICATION_ID: emptyToUndefined,
|
||||||
DISCORD_COMMAND_GUILD_ID: z.string().min(1).optional(),
|
DISCORD_COMMAND_GUILD_ID: emptyToUndefined,
|
||||||
OPENAI_API_KEY: z.string().min(1).optional(),
|
OPENAI_API_KEY: emptyToUndefined,
|
||||||
OPENAI_MODEL: z.string().min(1).default("gpt-5.4-mini"),
|
OPENAI_MODEL: z.string().min(1).default("gpt-5.4-mini"),
|
||||||
ELEVENLABS_API_KEY: z.string().min(1).optional(),
|
ELEVENLABS_API_KEY: emptyToUndefined,
|
||||||
ELEVENLABS_VOICE_ID: z.string().min(1).optional(),
|
ELEVENLABS_VOICE_ID: emptyToUndefined,
|
||||||
ELEVENLABS_STT_MODEL: z.string().min(1).default("scribe_v2_realtime"),
|
ELEVENLABS_STT_MODEL: z.string().min(1).default("scribe_v2_realtime"),
|
||||||
ELEVENLABS_TTS_MODEL: z.string().min(1).default("eleven_flash_v2_5"),
|
ELEVENLABS_TTS_MODEL: z.string().min(1).default("eleven_flash_v2_5"),
|
||||||
BOT_DEFAULT_LANGUAGE: z.string().min(2).default("ko"),
|
BOT_DEFAULT_LANGUAGE: z.string().min(2).default("ko"),
|
||||||
MAX_CONVERSATION_TURNS: z.coerce.number().int().min(4).max(30).default(12),
|
MAX_CONVERSATION_TURNS: z.coerce.number().int().min(4).max(30).default(12),
|
||||||
LOCAL_AUDIO_SOURCE: z.string().min(1).optional(),
|
LOCAL_AUDIO_SOURCE: emptyToUndefined,
|
||||||
LOCAL_AUDIO_SINK: z.string().min(1).optional(),
|
LOCAL_AUDIO_SINK: emptyToUndefined,
|
||||||
LOCAL_SPEAKER_NAME: z.string().min(1).default("local-user"),
|
LOCAL_SPEAKER_NAME: z.string().min(1).default("local-user"),
|
||||||
DEBUG_TEXT_EVENTS: z
|
DEBUG_TEXT_EVENTS: z
|
||||||
.string()
|
.string()
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import { spawn } from "node:child_process";
|
import { spawn } from "node:child_process";
|
||||||
import process from "node:process";
|
import process from "node:process";
|
||||||
|
|
||||||
|
import ffmpegStatic from "ffmpeg-static";
|
||||||
|
|
||||||
import type { AssistantRuntimeConfig } from "./config.js";
|
import type { AssistantRuntimeConfig } from "./config.js";
|
||||||
import { Logger } from "./logger.js";
|
import { Logger } from "./logger.js";
|
||||||
import { LocalVoiceSession } from "./audio/local-voice-session.js";
|
import { LocalVoiceSession } from "./audio/local-voice-session.js";
|
||||||
@@ -8,7 +10,38 @@ import { ElevenLabsSttService } from "./services/elevenlabs-stt.js";
|
|||||||
import { ElevenLabsTtsService } from "./services/elevenlabs-tts.js";
|
import { ElevenLabsTtsService } from "./services/elevenlabs-tts.js";
|
||||||
import { OpenAiLlmService } from "./services/openai-llm.js";
|
import { OpenAiLlmService } from "./services/openai-llm.js";
|
||||||
|
|
||||||
|
function resolveFfmpegPath(): string {
|
||||||
|
const ffmpegPath = ffmpegStatic as unknown as string | null;
|
||||||
|
if (!ffmpegPath) {
|
||||||
|
throw new Error("ffmpeg-static 경로를 찾지 못했습니다.");
|
||||||
|
}
|
||||||
|
return ffmpegPath;
|
||||||
|
}
|
||||||
|
|
||||||
export async function printLocalAudioDevices(): Promise<void> {
|
export async function printLocalAudioDevices(): Promise<void> {
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
const ffmpegPath = resolveFfmpegPath();
|
||||||
|
|
||||||
|
console.log("\n=== ffmpeg dshow audio devices ===");
|
||||||
|
await new Promise<void>((resolve) => {
|
||||||
|
const child = spawn(
|
||||||
|
ffmpegPath,
|
||||||
|
["-hide_banner", "-list_devices", "true", "-f", "dshow", "-i", "dummy"],
|
||||||
|
{
|
||||||
|
stdio: ["ignore", "ignore", "inherit"],
|
||||||
|
},
|
||||||
|
);
|
||||||
|
child.on("exit", () => resolve());
|
||||||
|
child.on("error", (error) => {
|
||||||
|
throw error;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log("\n위 목록의 오디오 장치 이름을 `LOCAL_AUDIO_SOURCE` 에 그대로 넣으면 됩니다.");
|
||||||
|
console.log("Windows 로컬 모드는 현재 출력 장치 직접 선택 대신 시스템 기본 출력 장치를 사용합니다.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const runs = [
|
const runs = [
|
||||||
{
|
{
|
||||||
label: "wpctl status",
|
label: "wpctl status",
|
||||||
@@ -52,6 +85,9 @@ export async function runLocalAssistant(config: AssistantRuntimeConfig, logger:
|
|||||||
|
|
||||||
console.log(session.statusSummary());
|
console.log(session.statusSummary());
|
||||||
console.log("로컬 음성 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
|
console.log("로컬 음성 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
console.log("Windows 로컬 모드는 현재 시스템 기본 출력 장치로 재생됩니다.");
|
||||||
|
}
|
||||||
if (config.DEBUG_TEXT_EVENTS) {
|
if (config.DEBUG_TEXT_EVENTS) {
|
||||||
console.log("텍스트 로그 출력이 켜져 있습니다.");
|
console.log("텍스트 로그 출력이 켜져 있습니다.");
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user