Use Windows system TTS for local mode
This commit is contained in:
@@ -117,6 +117,7 @@ export class PythonJsonWorker {
|
||||
HF_HOME: cachePath,
|
||||
TRANSFORMERS_CACHE: cachePath,
|
||||
PYTHONIOENCODING: "utf-8",
|
||||
HF_HUB_DISABLE_SYMLINKS_WARNING: "1",
|
||||
BOT_DEFAULT_LANGUAGE: this.config.BOT_DEFAULT_LANGUAGE,
|
||||
...this.workerEnv,
|
||||
},
|
||||
@@ -132,6 +133,9 @@ export class PythonJsonWorker {
|
||||
|
||||
let payload: WorkerResponse;
|
||||
try {
|
||||
if (!line.startsWith("{")) {
|
||||
return;
|
||||
}
|
||||
payload = JSON.parse(line) as WorkerResponse;
|
||||
} catch (error) {
|
||||
this.logger.warn(`${this.label} stdout parse failed`, error);
|
||||
|
||||
126
src/services/windows-system-tts.ts
Normal file
126
src/services/windows-system-tts.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { createReadStream } from "node:fs";
|
||||
import { unlink } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
import prism from "prism-media";
|
||||
|
||||
import { resolveFfmpegPath } from "../audio/ffmpeg-path.js";
|
||||
import type { PreparedSpeechAudio, TtsService } from "./tts.js";
|
||||
|
||||
function escapePowerShellSingleQuoted(text: string): string {
|
||||
return text.replace(/'/g, "''");
|
||||
}
|
||||
|
||||
function toSpeechRate(speed: number): number {
|
||||
const mapped = Math.round((speed - 1) * 8);
|
||||
return Math.max(-10, Math.min(10, mapped));
|
||||
}
|
||||
|
||||
export class WindowsSystemTtsService implements TtsService {
|
||||
constructor(private readonly speed: number) {
|
||||
const resolvedFfmpegPath = resolveFfmpegPath();
|
||||
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
|
||||
process.env.FFMPEG_PATH = resolvedFfmpegPath;
|
||||
}
|
||||
}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
return;
|
||||
}
|
||||
|
||||
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
|
||||
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
|
||||
const rate = toSpeechRate(this.speed);
|
||||
const script = [
|
||||
"Add-Type -AssemblyName System.Speech;",
|
||||
"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
|
||||
"$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
|
||||
"if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
|
||||
`$synth.Rate = ${rate};`,
|
||||
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(tempPath)}');`,
|
||||
`$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
|
||||
"$synth.Dispose();",
|
||||
].join(" ");
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn("powershell", ["-NoProfile", "-Command", script], {
|
||||
stdio: ["ignore", "ignore", "pipe"],
|
||||
});
|
||||
|
||||
let stderr = "";
|
||||
child.stderr.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
signal?.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
if (!child.killed) {
|
||||
child.kill("SIGKILL");
|
||||
}
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
|
||||
child.on("exit", (code) => {
|
||||
if (signal?.aborted) {
|
||||
reject(new Error("tts aborted"));
|
||||
return;
|
||||
}
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
|
||||
});
|
||||
child.on("error", reject);
|
||||
}).catch(async (error) => {
|
||||
await unlink(tempPath).catch(() => null);
|
||||
throw error;
|
||||
});
|
||||
|
||||
const input = createReadStream(tempPath);
|
||||
const ffmpeg = new prism.FFmpeg({
|
||||
args: [
|
||||
"-analyzeduration",
|
||||
"0",
|
||||
"-loglevel",
|
||||
"0",
|
||||
"-i",
|
||||
tempPath,
|
||||
"-f",
|
||||
"s16le",
|
||||
"-ar",
|
||||
"48000",
|
||||
"-ac",
|
||||
"2",
|
||||
"pipe:1",
|
||||
],
|
||||
});
|
||||
|
||||
signal?.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
void unlink(tempPath).catch(() => null);
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
|
||||
return {
|
||||
stream: ffmpeg,
|
||||
dispose: () => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
void unlink(tempPath).catch(() => null);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
return;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user