Add Windows TTS wave dump mode

2026-05-01 03:34:43 +09:00
parent ac88b8c50a
commit 0a88e8dab1
5 changed files with 90 additions and 48 deletions
--- a/README.md
+++ b/README.md
@@ -71,6 +71,12 @@ TTS만 단독으로 확인:
 bun run tts:test -- "안녕하세요. 출력 장치 테스트입니다."
 ```

+TTS WAV 파일만 생성해서 확인:
+
+```bash
+bun run tts:dump -- "안녕하세요. WAV 파일 테스트입니다."
+```
+
 Discord 모드:

 ```bash
--- a/package.json
+++ b/package.json
@@ -9,6 +9,7 @@
    "start:discord": "bun src/index.ts discord",
    "start:local": "bun src/index.ts local",
    "tts:test": "bun src/index.ts local-say",
+    "tts:dump": "bun src/index.ts local-say-dump",
    "setup:local-ai": "bun src/setup-local-ai.ts",
    "devices": "bun src/index.ts local-devices",
    "audio:devices": "bun src/index.ts local-devices",
--- a/src/index.ts
+++ b/src/index.ts
@@ -3,7 +3,7 @@ import process from "node:process";
 import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js";
 import { runDiscordBot } from "./discord-main.js";
 import { Logger } from "./logger.js";
-import { printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js";
+import { dumpLocalTtsWave, printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js";

 const mode = process.argv[2] ?? "discord";
 const config = loadConfig();
@@ -25,8 +25,13 @@ async function main(): Promise<void> {
      await runLocalTtsSmokeTest(requireAssistantRuntimeConfig(config), logger, text);
      return;
    }
+    case "local-say-dump": {
+      const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. TTS WAV 파일 테스트입니다.";
+      await dumpLocalTtsWave(requireAssistantRuntimeConfig(config), logger, text);
+      return;
+    }
    default:
-      throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say`);
+      throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say, local-say-dump`);
  }
 }

--- a/src/local-main.ts
+++ b/src/local-main.ts
@@ -1,4 +1,6 @@
 import { spawn } from "node:child_process";
+import { mkdir } from "node:fs/promises";
+import path from "node:path";
 import process from "node:process";

 import type { AssistantRuntimeConfig } from "./config.js";
@@ -10,7 +12,7 @@ import { LocalFasterWhisperSttService } from "./services/local-stt.js";
 import { LocalKokoroTtsService } from "./services/local-tts.js";
 import { OllamaLlmService } from "./services/ollama-llm.js";
 import type { SttService } from "./services/stt.js";
-import { WindowsSystemTtsService } from "./services/windows-system-tts.js";
+import { synthesizeWindowsSpeechToWaveFile, WindowsSystemTtsService } from "./services/windows-system-tts.js";

 export async function printLocalAudioDevices(): Promise<void> {
  if (process.platform === "win32") {
@@ -166,3 +168,22 @@ export async function runLocalTtsSmokeTest(
    await Promise.allSettled([session.destroy(), tts.destroy?.()]);
  }
 }
+
+export async function dumpLocalTtsWave(
+  config: AssistantRuntimeConfig,
+  _logger: Logger,
+  text: string,
+  outputPath?: string,
+): Promise<void> {
+  if (process.platform !== "win32") {
+    throw new Error("현재 TTS WAV 덤프 모드는 Windows에서만 구현되어 있습니다.");
+  }
+
+  const resolvedPath = path.resolve(outputPath?.trim() || "tts-test.wav");
+  await mkdir(path.dirname(resolvedPath), { recursive: true });
+  await synthesizeWindowsSpeechToWaveFile(text, config.LOCAL_TTS_SPEED, resolvedPath);
+
+  console.log("TTS WAV 파일 생성 완료");
+  console.log(`출력 파일: ${resolvedPath}`);
+  console.log("이 파일이 재생되면 TTS 합성은 정상이고, 실시간 재생 경로만 따로 보면 됩니다.");
+}
--- a/src/services/windows-system-tts.ts
+++ b/src/services/windows-system-tts.ts
@@ -18,28 +18,20 @@ function toSpeechRate(speed: number): number {
  return Math.max(-10, Math.min(10, mapped));
 }

-export class WindowsSystemTtsService implements TtsService {
-  constructor(private readonly speed: number) {
-    const resolvedFfmpegPath = resolveFfmpegPath();
-    if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
-      process.env.FFMPEG_PATH = resolvedFfmpegPath;
-    }
-  }
-
-  async warmup(): Promise<void> {
-    return;
-  }
-
-  async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
-    const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
-    const rate = toSpeechRate(this.speed);
+export async function synthesizeWindowsSpeechToWaveFile(
+  text: string,
+  speed: number,
+  outputPath: string,
+  signal?: AbortSignal,
+): Promise<void> {
+  const rate = toSpeechRate(speed);
  const script = [
    "Add-Type -AssemblyName System.Speech;",
    "$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
    "$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
    "if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
    `$synth.Rate = ${rate};`,
-      `$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(tempPath)}');`,
+    `$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(outputPath)}');`,
    `$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
    "$synth.Dispose();",
  ].join(" ");
@@ -76,7 +68,24 @@ export class WindowsSystemTtsService implements TtsService {
      reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
    });
    child.on("error", reject);
-    }).catch(async (error) => {
+  });
+}
+
+export class WindowsSystemTtsService implements TtsService {
+  constructor(private readonly speed: number) {
+    const resolvedFfmpegPath = resolveFfmpegPath();
+    if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
+      process.env.FFMPEG_PATH = resolvedFfmpegPath;
+    }
+  }
+
+  async warmup(): Promise<void> {
+    return;
+  }
+
+  async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
+    const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
+    await synthesizeWindowsSpeechToWaveFile(text, this.speed, tempPath, signal).catch(async (error) => {
      await unlink(tempPath).catch(() => null);
      throw error;
    });