Add Windows TTS wave dump mode

This commit is contained in:
2026-05-01 03:34:43 +09:00
parent ac88b8c50a
commit 0a88e8dab1
5 changed files with 90 additions and 48 deletions

View File

@@ -71,6 +71,12 @@ TTS만 단독으로 확인:
bun run tts:test -- "안녕하세요. 출력 장치 테스트입니다."
```
TTS WAV 파일만 생성해서 확인:
```bash
bun run tts:dump -- "안녕하세요. WAV 파일 테스트입니다."
```
Discord 모드:
```bash

View File

@@ -9,6 +9,7 @@
"start:discord": "bun src/index.ts discord",
"start:local": "bun src/index.ts local",
"tts:test": "bun src/index.ts local-say",
"tts:dump": "bun src/index.ts local-say-dump",
"setup:local-ai": "bun src/setup-local-ai.ts",
"devices": "bun src/index.ts local-devices",
"audio:devices": "bun src/index.ts local-devices",

View File

@@ -3,7 +3,7 @@ import process from "node:process";
import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js";
import { runDiscordBot } from "./discord-main.js";
import { Logger } from "./logger.js";
import { printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js";
import { dumpLocalTtsWave, printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js";
const mode = process.argv[2] ?? "discord";
const config = loadConfig();
@@ -25,8 +25,13 @@ async function main(): Promise<void> {
await runLocalTtsSmokeTest(requireAssistantRuntimeConfig(config), logger, text);
return;
}
case "local-say-dump": {
const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. TTS WAV 파일 테스트입니다.";
await dumpLocalTtsWave(requireAssistantRuntimeConfig(config), logger, text);
return;
}
default:
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say`);
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say, local-say-dump`);
}
}

View File

@@ -1,4 +1,6 @@
import { spawn } from "node:child_process";
import { mkdir } from "node:fs/promises";
import path from "node:path";
import process from "node:process";
import type { AssistantRuntimeConfig } from "./config.js";
@@ -10,7 +12,7 @@ import { LocalFasterWhisperSttService } from "./services/local-stt.js";
import { LocalKokoroTtsService } from "./services/local-tts.js";
import { OllamaLlmService } from "./services/ollama-llm.js";
import type { SttService } from "./services/stt.js";
import { WindowsSystemTtsService } from "./services/windows-system-tts.js";
import { synthesizeWindowsSpeechToWaveFile, WindowsSystemTtsService } from "./services/windows-system-tts.js";
export async function printLocalAudioDevices(): Promise<void> {
if (process.platform === "win32") {
@@ -166,3 +168,22 @@ export async function runLocalTtsSmokeTest(
await Promise.allSettled([session.destroy(), tts.destroy?.()]);
}
}
export async function dumpLocalTtsWave(
config: AssistantRuntimeConfig,
_logger: Logger,
text: string,
outputPath?: string,
): Promise<void> {
if (process.platform !== "win32") {
throw new Error("현재 TTS WAV 덤프 모드는 Windows에서만 구현되어 있습니다.");
}
const resolvedPath = path.resolve(outputPath?.trim() || "tts-test.wav");
await mkdir(path.dirname(resolvedPath), { recursive: true });
await synthesizeWindowsSpeechToWaveFile(text, config.LOCAL_TTS_SPEED, resolvedPath);
console.log("TTS WAV 파일 생성 완료");
console.log(`출력 파일: ${resolvedPath}`);
console.log("이 파일이 재생되면 TTS 합성은 정상이고, 실시간 재생 경로만 따로 보면 됩니다.");
}

View File

@@ -18,6 +18,59 @@ function toSpeechRate(speed: number): number {
return Math.max(-10, Math.min(10, mapped));
}
export async function synthesizeWindowsSpeechToWaveFile(
text: string,
speed: number,
outputPath: string,
signal?: AbortSignal,
): Promise<void> {
const rate = toSpeechRate(speed);
const script = [
"Add-Type -AssemblyName System.Speech;",
"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
"$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
"if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
`$synth.Rate = ${rate};`,
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(outputPath)}');`,
`$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
"$synth.Dispose();",
].join(" ");
await new Promise<void>((resolve, reject) => {
const child = spawn("powershell", ["-NoProfile", "-Command", script], {
stdio: ["ignore", "ignore", "pipe"],
});
let stderr = "";
child.stderr.on("data", (chunk: Buffer) => {
stderr += chunk.toString();
});
signal?.addEventListener(
"abort",
() => {
if (!child.killed) {
child.kill("SIGKILL");
}
},
{ once: true },
);
child.on("exit", (code) => {
if (signal?.aborted) {
reject(new Error("tts aborted"));
return;
}
if (code === 0) {
resolve();
return;
}
reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
}
export class WindowsSystemTtsService implements TtsService {
constructor(private readonly speed: number) {
const resolvedFfmpegPath = resolveFfmpegPath();
@@ -32,51 +85,7 @@ export class WindowsSystemTtsService implements TtsService {
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
const rate = toSpeechRate(this.speed);
const script = [
"Add-Type -AssemblyName System.Speech;",
"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
"$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
"if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
`$synth.Rate = ${rate};`,
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(tempPath)}');`,
`$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
"$synth.Dispose();",
].join(" ");
await new Promise<void>((resolve, reject) => {
const child = spawn("powershell", ["-NoProfile", "-Command", script], {
stdio: ["ignore", "ignore", "pipe"],
});
let stderr = "";
child.stderr.on("data", (chunk: Buffer) => {
stderr += chunk.toString();
});
signal?.addEventListener(
"abort",
() => {
if (!child.killed) {
child.kill("SIGKILL");
}
},
{ once: true },
);
child.on("exit", (code) => {
if (signal?.aborted) {
reject(new Error("tts aborted"));
return;
}
if (code === 0) {
resolve();
return;
}
reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
});
child.on("error", reject);
}).catch(async (error) => {
await synthesizeWindowsSpeechToWaveFile(text, this.speed, tempPath, signal).catch(async (error) => {
await unlink(tempPath).catch(() => null);
throw error;
});