Use Windows system TTS for local mode
This commit is contained in:
@@ -18,6 +18,7 @@ import { Logger } from "./logger.js";
|
||||
import { LocalFasterWhisperSttService } from "./services/local-stt.js";
|
||||
import { LocalKokoroTtsService } from "./services/local-tts.js";
|
||||
import { OllamaLlmService } from "./services/ollama-llm.js";
|
||||
import { WindowsSystemTtsService } from "./services/windows-system-tts.js";
|
||||
|
||||
export async function runDiscordBot(config: DiscordRuntimeConfig, logger: Logger): Promise<void> {
|
||||
const commands = [
|
||||
@@ -38,7 +39,10 @@ export async function runDiscordBot(config: DiscordRuntimeConfig, logger: Logger
|
||||
});
|
||||
|
||||
const stt = new LocalFasterWhisperSttService(config, logger);
|
||||
const tts = new LocalKokoroTtsService(config, logger);
|
||||
const tts =
|
||||
process.platform === "win32"
|
||||
? new WindowsSystemTtsService(config.LOCAL_TTS_SPEED)
|
||||
: new LocalKokoroTtsService(config, logger);
|
||||
const llm = new OllamaLlmService(config);
|
||||
const sessions = new Map<string, GuildVoiceSession>();
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import { requireFfmpegPath } from "./audio/ffmpeg-path.js";
|
||||
import { LocalFasterWhisperSttService } from "./services/local-stt.js";
|
||||
import { LocalKokoroTtsService } from "./services/local-tts.js";
|
||||
import { OllamaLlmService } from "./services/ollama-llm.js";
|
||||
import { WindowsSystemTtsService } from "./services/windows-system-tts.js";
|
||||
|
||||
export async function printLocalAudioDevices(): Promise<void> {
|
||||
if (process.platform === "win32") {
|
||||
@@ -68,7 +69,10 @@ export async function printLocalAudioDevices(): Promise<void> {
|
||||
|
||||
export async function runLocalAssistant(config: AssistantRuntimeConfig, logger: Logger): Promise<void> {
|
||||
const stt = new LocalFasterWhisperSttService(config, logger);
|
||||
const tts = new LocalKokoroTtsService(config, logger);
|
||||
const tts =
|
||||
process.platform === "win32"
|
||||
? new WindowsSystemTtsService(config.LOCAL_TTS_SPEED)
|
||||
: new LocalKokoroTtsService(config, logger);
|
||||
const llm = new OllamaLlmService(config);
|
||||
|
||||
await stt.warmup();
|
||||
|
||||
@@ -117,6 +117,7 @@ export class PythonJsonWorker {
|
||||
HF_HOME: cachePath,
|
||||
TRANSFORMERS_CACHE: cachePath,
|
||||
PYTHONIOENCODING: "utf-8",
|
||||
HF_HUB_DISABLE_SYMLINKS_WARNING: "1",
|
||||
BOT_DEFAULT_LANGUAGE: this.config.BOT_DEFAULT_LANGUAGE,
|
||||
...this.workerEnv,
|
||||
},
|
||||
@@ -132,6 +133,9 @@ export class PythonJsonWorker {
|
||||
|
||||
let payload: WorkerResponse;
|
||||
try {
|
||||
if (!line.startsWith("{")) {
|
||||
return;
|
||||
}
|
||||
payload = JSON.parse(line) as WorkerResponse;
|
||||
} catch (error) {
|
||||
this.logger.warn(`${this.label} stdout parse failed`, error);
|
||||
|
||||
126
src/services/windows-system-tts.ts
Normal file
126
src/services/windows-system-tts.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { createReadStream } from "node:fs";
|
||||
import { unlink } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
import prism from "prism-media";
|
||||
|
||||
import { resolveFfmpegPath } from "../audio/ffmpeg-path.js";
|
||||
import type { PreparedSpeechAudio, TtsService } from "./tts.js";
|
||||
|
||||
function escapePowerShellSingleQuoted(text: string): string {
|
||||
return text.replace(/'/g, "''");
|
||||
}
|
||||
|
||||
function toSpeechRate(speed: number): number {
|
||||
const mapped = Math.round((speed - 1) * 8);
|
||||
return Math.max(-10, Math.min(10, mapped));
|
||||
}
|
||||
|
||||
export class WindowsSystemTtsService implements TtsService {
|
||||
constructor(private readonly speed: number) {
|
||||
const resolvedFfmpegPath = resolveFfmpegPath();
|
||||
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
|
||||
process.env.FFMPEG_PATH = resolvedFfmpegPath;
|
||||
}
|
||||
}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
return;
|
||||
}
|
||||
|
||||
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
|
||||
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`);
|
||||
const rate = toSpeechRate(this.speed);
|
||||
const script = [
|
||||
"Add-Type -AssemblyName System.Speech;",
|
||||
"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
|
||||
"$koVoice = $synth.GetInstalledVoices() | Where-Object { $_.VoiceInfo.Culture.Name -like 'ko*' } | Select-Object -First 1;",
|
||||
"if ($koVoice) { $synth.SelectVoice($koVoice.VoiceInfo.Name) }",
|
||||
`$synth.Rate = ${rate};`,
|
||||
`$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(tempPath)}');`,
|
||||
`$synth.Speak('${escapePowerShellSingleQuoted(text)}');`,
|
||||
"$synth.Dispose();",
|
||||
].join(" ");
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn("powershell", ["-NoProfile", "-Command", script], {
|
||||
stdio: ["ignore", "ignore", "pipe"],
|
||||
});
|
||||
|
||||
let stderr = "";
|
||||
child.stderr.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
signal?.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
if (!child.killed) {
|
||||
child.kill("SIGKILL");
|
||||
}
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
|
||||
child.on("exit", (code) => {
|
||||
if (signal?.aborted) {
|
||||
reject(new Error("tts aborted"));
|
||||
return;
|
||||
}
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(stderr.trim() || `powershell tts exited with code ${code ?? "null"}`));
|
||||
});
|
||||
child.on("error", reject);
|
||||
}).catch(async (error) => {
|
||||
await unlink(tempPath).catch(() => null);
|
||||
throw error;
|
||||
});
|
||||
|
||||
const input = createReadStream(tempPath);
|
||||
const ffmpeg = new prism.FFmpeg({
|
||||
args: [
|
||||
"-analyzeduration",
|
||||
"0",
|
||||
"-loglevel",
|
||||
"0",
|
||||
"-i",
|
||||
tempPath,
|
||||
"-f",
|
||||
"s16le",
|
||||
"-ar",
|
||||
"48000",
|
||||
"-ac",
|
||||
"2",
|
||||
"pipe:1",
|
||||
],
|
||||
});
|
||||
|
||||
signal?.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
void unlink(tempPath).catch(() => null);
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
|
||||
return {
|
||||
stream: ffmpeg,
|
||||
dispose: () => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
void unlink(tempPath).catch(() => null);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -82,11 +82,16 @@ async function main(): Promise<void> {
|
||||
const cachePath = resolveLocalAiCachePath(config);
|
||||
const ttsModelPath = resolveLocalAiTtsModelPath(config);
|
||||
const ttsVoicesPath = resolveLocalAiTtsVoicesPath(config);
|
||||
const requirementsPath = path.resolve(process.cwd(), "python", "requirements.txt");
|
||||
const requirementsPath = path.resolve(
|
||||
process.cwd(),
|
||||
"python",
|
||||
process.platform === "win32" ? "requirements-windows.txt" : "requirements.txt",
|
||||
);
|
||||
const baseEnv = {
|
||||
HF_HOME: cachePath,
|
||||
TRANSFORMERS_CACHE: cachePath,
|
||||
PYTHONIOENCODING: "utf-8",
|
||||
HF_HUB_DISABLE_SYMLINKS_WARNING: "1",
|
||||
};
|
||||
|
||||
await mkdir(cachePath, { recursive: true });
|
||||
@@ -106,9 +111,11 @@ async function main(): Promise<void> {
|
||||
console.log("로컬 AI 의존성 설치를 시작합니다.");
|
||||
await run(venvPython, ["-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], baseEnv);
|
||||
await run(venvPython, ["-m", "pip", "install", "-r", requirementsPath], baseEnv);
|
||||
console.log("로컬 TTS 모델 파일을 확인합니다.");
|
||||
await ensureDownload(KOKORO_MODEL_URL, ttsModelPath);
|
||||
await ensureDownload(KOKORO_VOICES_URL, ttsVoicesPath);
|
||||
if (process.platform !== "win32") {
|
||||
console.log("로컬 TTS 모델 파일을 확인합니다.");
|
||||
await ensureDownload(KOKORO_MODEL_URL, ttsModelPath);
|
||||
await ensureDownload(KOKORO_VOICES_URL, ttsVoicesPath);
|
||||
}
|
||||
|
||||
console.log("설치가 끝났습니다.");
|
||||
console.log("다음 순서:");
|
||||
|
||||
Reference in New Issue
Block a user