Use Windows system TTS for local mode

This commit is contained in:
2026-04-30 04:01:24 +09:00
parent dc39998241
commit 7ba392c0e7
7 changed files with 158 additions and 8 deletions

View File

@@ -117,6 +117,7 @@ export class PythonJsonWorker {
HF_HOME: cachePath,
TRANSFORMERS_CACHE: cachePath,
PYTHONIOENCODING: "utf-8",
HF_HUB_DISABLE_SYMLINKS_WARNING: "1",
BOT_DEFAULT_LANGUAGE: this.config.BOT_DEFAULT_LANGUAGE,
...this.workerEnv,
},
@@ -132,6 +133,9 @@ export class PythonJsonWorker {
let payload: WorkerResponse;
try {
if (!line.startsWith("{")) {
return;
}
payload = JSON.parse(line) as WorkerResponse;
} catch (error) {
this.logger.warn(`${this.label} stdout parse failed`, error);

View File

@@ -0,0 +1,126 @@
import { spawn } from "node:child_process";
import { createReadStream } from "node:fs";
import { unlink } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import prism from "prism-media";
import { resolveFfmpegPath } from "../audio/ffmpeg-path.js";
import type { PreparedSpeechAudio, TtsService } from "./tts.js";
function escapePowerShellSingleQuoted(text: string): string {
return text.replace(/'/g, "''");
}
function toSpeechRate(speed: number): number {
const mapped = Math.round((speed - 1) * 8);
return Math.max(-10, Math.min(10, mapped));
}
export class WindowsSystemTtsService implements TtsService {
constructor(private readonly speed: number) {
const resolvedFfmpegPath = resolveFfmpegPath();
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
process.env.FFMPEG_PATH = resolvedFfmpegPath;
}
}
async warmup(): Promise<void> {
return;
}
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
const rate = toSpeechRate(this.speed);
const script = [
"Add-Type -AssemblyName System.Speech;",
"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
"$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
"if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
`$synth.Rate = ${rate};`,
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(tempPath)}');`,
`$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
"$synth.Dispose();",
].join(" ");
await new Promise<void>((resolve, reject) => {
const child = spawn("powershell", ["-NoProfile", "-Command", script], {
stdio: ["ignore", "ignore", "pipe"],
});
let stderr = "";
child.stderr.on("data", (chunk: Buffer) => {
stderr += chunk.toString();
});
signal?.addEventListener(
"abort",
() => {
if (!child.killed) {
child.kill("SIGKILL");
}
},
{ once: true },
);
child.on("exit", (code) => {
if (signal?.aborted) {
reject(new Error("tts aborted"));
return;
}
if (code === 0) {
resolve();
return;
}
reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
});
child.on("error", reject);
}).catch(async (error) => {
await unlink(tempPath).catch(() => null);
throw error;
});
const input = createReadStream(tempPath);
const ffmpeg = new prism.FFmpeg({
args: [
"-analyzeduration",
"0",
"-loglevel",
"0",
"-i",
tempPath,
"-f",
"s16le",
"-ar",
"48000",
"-ac",
"2",
"pipe:1",
],
});
signal?.addEventListener(
"abort",
() => {
input.destroy();
ffmpeg.destroy();
void unlink(tempPath).catch(() => null);
},
{ once: true },
);
return {
stream: ffmpeg,
dispose: () => {
input.destroy();
ffmpeg.destroy();
void unlink(tempPath).catch(() => null);
},
};
}
async destroy(): Promise<void> {
return;
}
}