Add Windows TTS wave dump mode
This commit is contained in:
@@ -71,6 +71,12 @@ TTS만 단독으로 확인:
|
|||||||
bun run tts:test -- "안녕하세요. 출력 장치 테스트입니다."
|
bun run tts:test -- "안녕하세요. 출력 장치 테스트입니다."
|
||||||
```
|
```
|
||||||
|
|
||||||
|
TTS WAV 파일만 생성해서 확인:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bun run tts:dump -- "안녕하세요. WAV 파일 테스트입니다."
|
||||||
|
```
|
||||||
|
|
||||||
Discord 모드:
|
Discord 모드:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
"start:discord": "bun src/index.ts discord",
|
"start:discord": "bun src/index.ts discord",
|
||||||
"start:local": "bun src/index.ts local",
|
"start:local": "bun src/index.ts local",
|
||||||
"tts:test": "bun src/index.ts local-say",
|
"tts:test": "bun src/index.ts local-say",
|
||||||
|
"tts:dump": "bun src/index.ts local-say-dump",
|
||||||
"setup:local-ai": "bun src/setup-local-ai.ts",
|
"setup:local-ai": "bun src/setup-local-ai.ts",
|
||||||
"devices": "bun src/index.ts local-devices",
|
"devices": "bun src/index.ts local-devices",
|
||||||
"audio:devices": "bun src/index.ts local-devices",
|
"audio:devices": "bun src/index.ts local-devices",
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import process from "node:process";
|
|||||||
import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js";
|
import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js";
|
||||||
import { runDiscordBot } from "./discord-main.js";
|
import { runDiscordBot } from "./discord-main.js";
|
||||||
import { Logger } from "./logger.js";
|
import { Logger } from "./logger.js";
|
||||||
import { printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js";
|
import { dumpLocalTtsWave, printLocalAudioDevices, runLocalAssistant, runLocalTtsSmokeTest } from "./local-main.js";
|
||||||
|
|
||||||
const mode = process.argv[2] ?? "discord";
|
const mode = process.argv[2] ?? "discord";
|
||||||
const config = loadConfig();
|
const config = loadConfig();
|
||||||
@@ -25,8 +25,13 @@ async function main(): Promise<void> {
|
|||||||
await runLocalTtsSmokeTest(requireAssistantRuntimeConfig(config), logger, text);
|
await runLocalTtsSmokeTest(requireAssistantRuntimeConfig(config), logger, text);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
case "local-say-dump": {
|
||||||
|
const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. TTS WAV 파일 테스트입니다.";
|
||||||
|
await dumpLocalTtsWave(requireAssistantRuntimeConfig(config), logger, text);
|
||||||
|
return;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say`);
|
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say, local-say-dump`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
import { spawn } from "node:child_process";
|
import { spawn } from "node:child_process";
|
||||||
|
import { mkdir } from "node:fs/promises";
|
||||||
|
import path from "node:path";
|
||||||
import process from "node:process";
|
import process from "node:process";
|
||||||
|
|
||||||
import type { AssistantRuntimeConfig } from "./config.js";
|
import type { AssistantRuntimeConfig } from "./config.js";
|
||||||
@@ -10,7 +12,7 @@ import { LocalFasterWhisperSttService } from "./services/local-stt.js";
|
|||||||
import { LocalKokoroTtsService } from "./services/local-tts.js";
|
import { LocalKokoroTtsService } from "./services/local-tts.js";
|
||||||
import { OllamaLlmService } from "./services/ollama-llm.js";
|
import { OllamaLlmService } from "./services/ollama-llm.js";
|
||||||
import type { SttService } from "./services/stt.js";
|
import type { SttService } from "./services/stt.js";
|
||||||
import { WindowsSystemTtsService } from "./services/windows-system-tts.js";
|
import { synthesizeWindowsSpeechToWaveFile, WindowsSystemTtsService } from "./services/windows-system-tts.js";
|
||||||
|
|
||||||
export async function printLocalAudioDevices(): Promise<void> {
|
export async function printLocalAudioDevices(): Promise<void> {
|
||||||
if (process.platform === "win32") {
|
if (process.platform === "win32") {
|
||||||
@@ -166,3 +168,22 @@ export async function runLocalTtsSmokeTest(
|
|||||||
await Promise.allSettled([session.destroy(), tts.destroy?.()]);
|
await Promise.allSettled([session.destroy(), tts.destroy?.()]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function dumpLocalTtsWave(
|
||||||
|
config: AssistantRuntimeConfig,
|
||||||
|
_logger: Logger,
|
||||||
|
text: string,
|
||||||
|
outputPath?: string,
|
||||||
|
): Promise<void> {
|
||||||
|
if (process.platform !== "win32") {
|
||||||
|
throw new Error("현재 TTS WAV 덤프 모드는 Windows에서만 구현되어 있습니다.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const resolvedPath = path.resolve(outputPath?.trim() || "tts-test.wav");
|
||||||
|
await mkdir(path.dirname(resolvedPath), { recursive: true });
|
||||||
|
await synthesizeWindowsSpeechToWaveFile(text, config.LOCAL_TTS_SPEED, resolvedPath);
|
||||||
|
|
||||||
|
console.log("TTS WAV 파일 생성 완료");
|
||||||
|
console.log(`출력 파일: ${resolvedPath}`);
|
||||||
|
console.log("이 파일이 재생되면 TTS 합성은 정상이고, 실시간 재생 경로만 따로 보면 됩니다.");
|
||||||
|
}
|
||||||
|
|||||||
@@ -18,6 +18,59 @@ function toSpeechRate(speed: number): number {
|
|||||||
return Math.max(-10, Math.min(10, mapped));
|
return Math.max(-10, Math.min(10, mapped));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function synthesizeWindowsSpeechToWaveFile(
|
||||||
|
text: string,
|
||||||
|
speed: number,
|
||||||
|
outputPath: string,
|
||||||
|
signal?: AbortSignal,
|
||||||
|
): Promise<void> {
|
||||||
|
const rate = toSpeechRate(speed);
|
||||||
|
const script = [
|
||||||
|
"Add-Type -AssemblyName System.Speech;",
|
||||||
|
"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
|
||||||
|
"$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
|
||||||
|
"if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
|
||||||
|
`$synth.Rate = ${rate};`,
|
||||||
|
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(outputPath)}');`,
|
||||||
|
`$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
|
||||||
|
"$synth.Dispose();",
|
||||||
|
].join(" ");
|
||||||
|
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
const child = spawn("powershell", ["-NoProfile", "-Command", script], {
|
||||||
|
stdio: ["ignore", "ignore", "pipe"],
|
||||||
|
});
|
||||||
|
|
||||||
|
let stderr = "";
|
||||||
|
child.stderr.on("data", (chunk: Buffer) => {
|
||||||
|
stderr += chunk.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
signal?.addEventListener(
|
||||||
|
"abort",
|
||||||
|
() => {
|
||||||
|
if (!child.killed) {
|
||||||
|
child.kill("SIGKILL");
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ once: true },
|
||||||
|
);
|
||||||
|
|
||||||
|
child.on("exit", (code) => {
|
||||||
|
if (signal?.aborted) {
|
||||||
|
reject(new Error("tts aborted"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (code === 0) {
|
||||||
|
resolve();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
|
||||||
|
});
|
||||||
|
child.on("error", reject);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
export class WindowsSystemTtsService implements TtsService {
|
export class WindowsSystemTtsService implements TtsService {
|
||||||
constructor(private readonly speed: number) {
|
constructor(private readonly speed: number) {
|
||||||
const resolvedFfmpegPath = resolveFfmpegPath();
|
const resolvedFfmpegPath = resolveFfmpegPath();
|
||||||
@@ -32,51 +85,7 @@ export class WindowsSystemTtsService implements TtsService {
|
|||||||
|
|
||||||
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
|
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
|
||||||
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
|
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
|
||||||
const rate = toSpeechRate(this.speed);
|
await synthesizeWindowsSpeechToWaveFile(text, this.speed, tempPath, signal).catch(async (error) => {
|
||||||
const script = [
|
|
||||||
"Add-Type -AssemblyName System.Speech;",
|
|
||||||
"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
|
|
||||||
"$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
|
|
||||||
"if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
|
|
||||||
`$synth.Rate = ${rate};`,
|
|
||||||
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(tempPath)}');`,
|
|
||||||
`$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
|
|
||||||
"$synth.Dispose();",
|
|
||||||
].join(" ");
|
|
||||||
|
|
||||||
await new Promise<void>((resolve, reject) => {
|
|
||||||
const child = spawn("powershell", ["-NoProfile", "-Command", script], {
|
|
||||||
stdio: ["ignore", "ignore", "pipe"],
|
|
||||||
});
|
|
||||||
|
|
||||||
let stderr = "";
|
|
||||||
child.stderr.on("data", (chunk: Buffer) => {
|
|
||||||
stderr += chunk.toString();
|
|
||||||
});
|
|
||||||
|
|
||||||
signal?.addEventListener(
|
|
||||||
"abort",
|
|
||||||
() => {
|
|
||||||
if (!child.killed) {
|
|
||||||
child.kill("SIGKILL");
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{ once: true },
|
|
||||||
);
|
|
||||||
|
|
||||||
child.on("exit", (code) => {
|
|
||||||
if (signal?.aborted) {
|
|
||||||
reject(new Error("tts aborted"));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (code === 0) {
|
|
||||||
resolve();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
|
|
||||||
});
|
|
||||||
child.on("error", reject);
|
|
||||||
}).catch(async (error) => {
|
|
||||||
await unlink(tempPath).catch(() => null);
|
await unlink(tempPath).catch(() => null);
|
||||||
throw error;
|
throw error;
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user