Add Windows TTS wave dump mode
This commit is contained in:
@@ -71,6 +71,12 @@ TTS만 단독으로 확인:
|
||||
bun run tts:test -- "안녕하세요. 출력 장치 테스트입니다."
|
||||
```
|
||||
|
||||
TTS WAV 파일만 생성해서 확인:
|
||||
|
||||
```bash
|
||||
bun run tts:dump -- "안녕하세요. WAV 파일 테스트입니다."
|
||||
```
|
||||
|
||||
Discord 모드:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
"start:discord": "bun src/index.ts discord",
|
||||
"start:local": "bun src/index.ts local",
|
||||
"tts:test": "bun src/index.ts local-say",
|
||||
"tts:dump": "bun src/index.ts local-say-dump",
|
||||
"setup:local-ai": "bun src/setup-local-ai.ts",
|
||||
"devices": "bun src/index.ts local-devices",
|
||||
"audio:devices": "bun src/index.ts local-devices",
|
||||
|
||||
@@ -3,7 +3,7 @@ import process from "node:process";
|
||||
import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js";
|
||||
import { runDiscordBot } from "./discord-main.js";
|
||||
import { Logger } from "./logger.js";
|
||||
import { printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js";
|
||||
import { dumpLocalTtsWave, printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js";
|
||||
|
||||
const mode = process.argv[2] ?? "discord";
|
||||
const config = loadConfig();
|
||||
@@ -25,8 +25,13 @@ async function main(): Promise<void> {
|
||||
await runLocalTtsSmokeTest(requireAssistantRuntimeConfig(config), logger, text);
|
||||
return;
|
||||
}
|
||||
case "local-say-dump": {
|
||||
const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. TTS WAV 파일 테스트입니다.";
|
||||
await dumpLocalTtsWave(requireAssistantRuntimeConfig(config), logger, text);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say`);
|
||||
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say, local-say-dump`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
|
||||
import type { AssistantRuntimeConfig } from "./config.js";
|
||||
@@ -10,7 +12,7 @@ import { LocalFasterWhisperSttService } from "./services/local-stt.js";
|
||||
import { LocalKokoroTtsService } from "./services/local-tts.js";
|
||||
import { OllamaLlmService } from "./services/ollama-llm.js";
|
||||
import type { SttService } from "./services/stt.js";
|
||||
import { WindowsSystemTtsService } from "./services/windows-system-tts.js";
|
||||
import { synthesizeWindowsSpeechToWaveFile, WindowsSystemTtsService } from "./services/windows-system-tts.js";
|
||||
|
||||
export async function printLocalAudioDevices(): Promise<void> {
|
||||
if (process.platform === "win32") {
|
||||
@@ -166,3 +168,22 @@ export async function runLocalTtsSmokeTest(
|
||||
await Promise.allSettled([session.destroy(), tts.destroy?.()]);
|
||||
}
|
||||
}
|
||||
|
||||
export async function dumpLocalTtsWave(
|
||||
config: AssistantRuntimeConfig,
|
||||
_logger: Logger,
|
||||
text: string,
|
||||
outputPath?: string,
|
||||
): Promise<void> {
|
||||
if (process.platform !== "win32") {
|
||||
throw new Error("현재 TTS WAV 덤프 모드는 Windows에서만 구현되어 있습니다.");
|
||||
}
|
||||
|
||||
const resolvedPath = path.resolve(outputPath?.trim() || "tts-test.wav");
|
||||
await mkdir(path.dirname(resolvedPath), { recursive: true });
|
||||
await synthesizeWindowsSpeechToWaveFile(text, config.LOCAL_TTS_SPEED, resolvedPath);
|
||||
|
||||
console.log("TTS WAV 파일 생성 완료");
|
||||
console.log(`출력 파일: ${resolvedPath}`);
|
||||
console.log("이 파일이 재생되면 TTS 합성은 정상이고, 실시간 재생 경로만 따로 보면 됩니다.");
|
||||
}
|
||||
|
||||
@@ -18,28 +18,20 @@ function toSpeechRate(speed: number): number {
|
||||
return Math.max(-10, Math.min(10, mapped));
|
||||
}
|
||||
|
||||
export class WindowsSystemTtsService implements TtsService {
|
||||
constructor(private readonly speed: number) {
|
||||
const resolvedFfmpegPath = resolveFfmpegPath();
|
||||
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
|
||||
process.env.FFMPEG_PATH = resolvedFfmpegPath;
|
||||
}
|
||||
}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
return;
|
||||
}
|
||||
|
||||
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
|
||||
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
|
||||
const rate = toSpeechRate(this.speed);
|
||||
export async function synthesizeWindowsSpeechToWaveFile(
|
||||
text: string,
|
||||
speed: number,
|
||||
outputPath: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<void> {
|
||||
const rate = toSpeechRate(speed);
|
||||
const script = [
|
||||
"Add-Type -AssemblyName System.Speech;",
|
||||
"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
|
||||
"$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
|
||||
"if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
|
||||
`$synth.Rate = ${rate};`,
|
||||
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(tempPath)}');`,
|
||||
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(outputPath)}');`,
|
||||
`$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
|
||||
"$synth.Dispose();",
|
||||
].join(" ");
|
||||
@@ -76,7 +68,24 @@ export class WindowsSystemTtsService implements TtsService {
|
||||
reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
|
||||
});
|
||||
child.on("error", reject);
|
||||
}).catch(async (error) => {
|
||||
});
|
||||
}
|
||||
|
||||
export class WindowsSystemTtsService implements TtsService {
|
||||
constructor(private readonly speed: number) {
|
||||
const resolvedFfmpegPath = resolveFfmpegPath();
|
||||
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
|
||||
process.env.FFMPEG_PATH = resolvedFfmpegPath;
|
||||
}
|
||||
}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
return;
|
||||
}
|
||||
|
||||
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
|
||||
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
|
||||
await synthesizeWindowsSpeechToWaveFile(text, this.speed, tempPath, signal).catch(async (error) => {
|
||||
await unlink(tempPath).catch(() => null);
|
||||
throw error;
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user