Files
realtime_voice_bot/src/services/melo-tts.ts

124 lines
3.6 KiB
TypeScript

import { spawn } from "node:child_process";
import { randomUUID } from "node:crypto";
import { mkdir, rm } from "node:fs/promises";
import path from "node:path";
import type { AppConfig } from "../config.js";
import { resolveDockerCommand } from "../docker-runtime.js";
import type { Logger } from "../logger.js";
import { playWavFile } from "./audio-playback.js";
async function run(command: string, args: string[], stdio: "ignore" | "inherit" = "ignore"): Promise<void> {
await new Promise<void>((resolve, reject) => {
const child = spawn(command, args, {
stdio: ["ignore", stdio, "inherit"],
windowsHide: true,
shell: process.platform === "win32" && !path.isAbsolute(command),
});
child.on("error", (error) => {
if ((error as NodeJS.ErrnoException).code === "ENOENT" && command === "docker") {
reject(new Error("Docker를 찾지 못했습니다. Docker Desktop을 설치하고 실행한 뒤 다시 시도하세요."));
return;
}
reject(error);
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
});
});
}
export class MeloTtsService {
constructor(
private readonly config: AppConfig,
private readonly logger: Logger,
) {}
async warmup(): Promise<void> {
await mkdir(path.resolve(process.cwd(), this.config.TTS_CACHE_DIR), { recursive: true });
await mkdir(path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR), { recursive: true });
const docker = await resolveDockerCommand(this.config);
await run(docker, ["--version"]);
await run(docker, ["image", "inspect", this.config.TTS_IMAGE]);
}
async speak(text: string): Promise<void> {
const trimmed = text.trim();
if (!trimmed) {
return;
}
const fileName = `tts-${Date.now()}-${randomUUID()}.wav`;
const targetPath = path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR, fileName);
try {
await this.synthesizeToFile(trimmed, targetPath);
await playWavFile(targetPath);
} finally {
await rm(targetPath, { force: true }).catch(() => undefined);
}
}
async synthesizeToFile(text: string, targetPath: string): Promise<void> {
await this.warmup();
const outputDir = path.dirname(targetPath);
const cacheDir = path.resolve(process.cwd(), this.config.TTS_CACHE_DIR);
const fileName = path.basename(targetPath);
await mkdir(outputDir, { recursive: true });
const args = [
"run",
"--rm",
"-v",
`${outputDir}:/work/output`,
"-v",
`${cacheDir}:/cache`,
"-e",
"HF_HOME=/cache/huggingface",
"-e",
"HF_HUB_CACHE=/cache/huggingface/hub",
"-e",
"TRANSFORMERS_CACHE=/cache/transformers",
];
if (this.config.TTS_DEVICE !== "cpu") {
args.push("--gpus", "all");
}
args.push(
this.config.TTS_IMAGE,
"--text",
text,
"--output",
`/work/output/${fileName}`,
"--language",
this.config.TTS_LANGUAGE,
"--speaker",
this.config.TTS_SPEAKER,
"--speed",
String(this.config.TTS_SPEED),
"--device",
this.config.TTS_DEVICE,
);
this.logger.info("Starting MeloTTS synthesis", {
image: this.config.TTS_IMAGE,
language: this.config.TTS_LANGUAGE,
speaker: this.config.TTS_SPEAKER,
speed: this.config.TTS_SPEED,
device: this.config.TTS_DEVICE,
});
const docker = await resolveDockerCommand(this.config);
await run(docker, args, "inherit");
}
}