Add local MeloTTS support
This commit is contained in:
113
src/services/melo-tts.ts
Normal file
113
src/services/melo-tts.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { mkdir, rm } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
import type { AppConfig } from "../config.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
import { playWavFile } from "./audio-playback.js";
|
||||
|
||||
async function run(command: string, args: string[], stdio: "ignore" | "inherit" = "ignore"): Promise<void> {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: ["ignore", stdio, "inherit"],
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
child.on("error", reject);
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export class MeloTtsService {
|
||||
constructor(
|
||||
private readonly config: AppConfig,
|
||||
private readonly logger: Logger,
|
||||
) {}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
await mkdir(path.resolve(process.cwd(), this.config.TTS_CACHE_DIR), { recursive: true });
|
||||
await mkdir(path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR), { recursive: true });
|
||||
|
||||
await run("docker", ["--version"]);
|
||||
await run("docker", ["image", "inspect", this.config.TTS_IMAGE]);
|
||||
}
|
||||
|
||||
async speak(text: string): Promise<void> {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return;
|
||||
}
|
||||
|
||||
const fileName = `tts-${Date.now()}-${randomUUID()}.wav`;
|
||||
const targetPath = path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR, fileName);
|
||||
|
||||
try {
|
||||
await this.synthesizeToFile(trimmed, targetPath);
|
||||
await playWavFile(targetPath);
|
||||
} finally {
|
||||
await rm(targetPath, { force: true }).catch(() => undefined);
|
||||
}
|
||||
}
|
||||
|
||||
async synthesizeToFile(text: string, targetPath: string): Promise<void> {
|
||||
await this.warmup();
|
||||
|
||||
const outputDir = path.dirname(targetPath);
|
||||
const cacheDir = path.resolve(process.cwd(), this.config.TTS_CACHE_DIR);
|
||||
const fileName = path.basename(targetPath);
|
||||
|
||||
await mkdir(outputDir, { recursive: true });
|
||||
|
||||
const args = [
|
||||
"run",
|
||||
"--rm",
|
||||
"-v",
|
||||
`${outputDir}:/work/output`,
|
||||
"-v",
|
||||
`${cacheDir}:/cache`,
|
||||
"-e",
|
||||
"HF_HOME=/cache/huggingface",
|
||||
"-e",
|
||||
"HF_HUB_CACHE=/cache/huggingface/hub",
|
||||
"-e",
|
||||
"TRANSFORMERS_CACHE=/cache/transformers",
|
||||
];
|
||||
|
||||
if (this.config.TTS_DEVICE !== "cpu") {
|
||||
args.push("--gpus", "all");
|
||||
}
|
||||
|
||||
args.push(
|
||||
this.config.TTS_IMAGE,
|
||||
"--text",
|
||||
text,
|
||||
"--output",
|
||||
`/work/output/${fileName}`,
|
||||
"--language",
|
||||
this.config.TTS_LANGUAGE,
|
||||
"--speaker",
|
||||
this.config.TTS_SPEAKER,
|
||||
"--speed",
|
||||
String(this.config.TTS_SPEED),
|
||||
"--device",
|
||||
this.config.TTS_DEVICE,
|
||||
);
|
||||
|
||||
this.logger.info("Starting MeloTTS synthesis", {
|
||||
image: this.config.TTS_IMAGE,
|
||||
language: this.config.TTS_LANGUAGE,
|
||||
speaker: this.config.TTS_SPEAKER,
|
||||
speed: this.config.TTS_SPEED,
|
||||
device: this.config.TTS_DEVICE,
|
||||
});
|
||||
|
||||
await run("docker", args, "inherit");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user