diff --git a/README.md b/README.md index e0e83aa..a33440f 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,12 @@ TTS만 단독으로 확인: bun run tts:test -- "안녕하세요. 출력 장치 테스트입니다." ``` +TTS WAV 파일만 생성해서 확인: + +```bash +bun run tts:dump -- "안녕하세요. WAV 파일 테스트입니다." +``` + Discord 모드: ```bash diff --git a/package.json b/package.json index 20e47b5..a23a844 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "start:discord": "bun src/index.ts discord", "start:local": "bun src/index.ts local", "tts:test": "bun src/index.ts local-say", + "tts:dump": "bun src/index.ts local-say-dump", "setup:local-ai": "bun src/setup-local-ai.ts", "devices": "bun src/index.ts local-devices", "audio:devices": "bun src/index.ts local-devices", diff --git a/src/index.ts b/src/index.ts index 49a3f59..5dd789f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,7 +3,7 @@ import process from "node:process"; import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js"; import { runDiscordBot } from "./discord-main.js"; import { Logger } from "./logger.js"; -import { printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js"; +import { dumpLocalTtsWave, printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js"; const mode = process.argv[2] ?? "discord"; const config = loadConfig(); @@ -25,8 +25,13 @@ async function main(): Promise { await runLocalTtsSmokeTest(requireAssistantRuntimeConfig(config), logger, text); return; } + case "local-say-dump": { + const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. TTS WAV 파일 테스트입니다."; + await dumpLocalTtsWave(requireAssistantRuntimeConfig(config), logger, text); + return; + } default: - throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say`); + throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say, local-say-dump`); } } diff --git a/src/local-main.ts b/src/local-main.ts index 6fc3e91..dd92b8a 100644 --- a/src/local-main.ts +++ b/src/local-main.ts @@ -1,4 +1,6 @@ import { spawn } from "node:child_process"; +import { mkdir } from "node:fs/promises"; +import path from "node:path"; import process from "node:process"; import type { AssistantRuntimeConfig } from "./config.js"; @@ -10,7 +12,7 @@ import { LocalFasterWhisperSttService } from "./services/local-stt.js"; import { LocalKokoroTtsService } from "./services/local-tts.js"; import { OllamaLlmService } from "./services/ollama-llm.js"; import type { SttService } from "./services/stt.js"; -import { WindowsSystemTtsService } from "./services/windows-system-tts.js"; +import { synthesizeWindowsSpeechToWaveFile, WindowsSystemTtsService } from "./services/windows-system-tts.js"; export async function printLocalAudioDevices(): Promise { if (process.platform === "win32") { @@ -166,3 +168,22 @@ export async function runLocalTtsSmokeTest( await Promise.allSettled([session.destroy(), tts.destroy?.()]); } } + +export async function dumpLocalTtsWave( + config: AssistantRuntimeConfig, + _logger: Logger, + text: string, + outputPath?: string, +): Promise { + if (process.platform !== "win32") { + throw new Error("현재 TTS WAV 덤프 모드는 Windows에서만 구현되어 있습니다."); + } + + const resolvedPath = path.resolve(outputPath?.trim() || "tts-test.wav"); + await mkdir(path.dirname(resolvedPath), { recursive: true }); + await synthesizeWindowsSpeechToWaveFile(text, config.LOCAL_TTS_SPEED, resolvedPath); + + console.log("TTS WAV 파일 생성 완료"); + console.log(`출력 파일: ${resolvedPath}`); + console.log("이 파일이 재생되면 TTS 합성은 정상이고, 실시간 재생 경로만 따로 보면 됩니다."); +} diff --git a/src/services/windows-system-tts.ts b/src/services/windows-system-tts.ts index 22a1317..b3b26e7 100644 --- a/src/services/windows-system-tts.ts +++ b/src/services/windows-system-tts.ts @@ -18,6 +18,59 @@ function toSpeechRate(speed: number): number { return Math.max(-10, Math.min(10, mapped)); } +export async function synthesizeWindowsSpeechToWaveFile( + text: string, + speed: number, + outputPath: string, + signal?: AbortSignal, +): Promise { + const rate = toSpeechRate(speed); + const script = [ + "Add-Type -AssemblyName System.Speech;", + "$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;", + "$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;", + "if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }", + `$synth.Rate = ${rate};`, + `$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(outputPath)}');`, + `$synth.Speak('${escapePowerShellSingleQuoted(text)}');`, + "$synth.Dispose();", + ].join(" "); + + await new Promise((resolve, reject) => { + const child = spawn("powershell", ["-NoProfile", "-Command", script], { + stdio: ["ignore", "ignore", "pipe"], + }); + + let stderr = ""; + child.stderr.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); + + signal?.addEventListener( + "abort", + () => { + if (!child.killed) { + child.kill("SIGKILL"); + } + }, + { once: true }, + ); + + child.on("exit", (code) => { + if (signal?.aborted) { + reject(new Error("tts aborted")); + return; + } + if (code === 0) { + resolve(); + return; + } + reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`)); + }); + child.on("error", reject); + }); +} + export class WindowsSystemTtsService implements TtsService { constructor(private readonly speed: number) { const resolvedFfmpegPath = resolveFfmpegPath(); @@ -32,51 +85,7 @@ export class WindowsSystemTtsService implements TtsService { async preparePlayback(text: string, signal?: AbortSignal): Promise { const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`); - const rate = toSpeechRate(this.speed); - const script = [ - "Add-Type -AssemblyName System.Speech;", - "$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;", - "$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;", - "if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }", - `$synth.Rate = ${rate};`, - `$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(tempPath)}');`, - `$synth.Speak('${escapePowerShellSingleQuoted(text)}');`, - "$synth.Dispose();", - ].join(" "); - - await new Promise((resolve, reject) => { - const child = spawn("powershell", ["-NoProfile", "-Command", script], { - stdio: ["ignore", "ignore", "pipe"], - }); - - let stderr = ""; - child.stderr.on("data", (chunk: Buffer) => { - stderr += chunk.toString(); - }); - - signal?.addEventListener( - "abort", - () => { - if (!child.killed) { - child.kill("SIGKILL"); - } - }, - { once: true }, - ); - - child.on("exit", (code) => { - if (signal?.aborted) { - reject(new Error("tts aborted")); - return; - } - if (code === 0) { - resolve(); - return; - } - reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`)); - }); - child.on("error", reject); - }).catch(async (error) => { + await synthesizeWindowsSpeechToWaveFile(text, this.speed, tempPath, signal).catch(async (error) => { await unlink(tempPath).catch(() => null); throw error; });