Replace ElevenLabs with local STT and TTS
This commit is contained in:
94
src/services/local-tts.ts
Normal file
94
src/services/local-tts.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import { Readable } from "node:stream";
|
||||
|
||||
import prism from "prism-media";
|
||||
|
||||
import type { AssistantRuntimeConfig } from "../config.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
import { resolveFfmpegPath } from "../audio/ffmpeg-path.js";
|
||||
import { PythonJsonWorker } from "./python-json-worker.js";
|
||||
import type { PreparedSpeechAudio, TtsService } from "./tts.js";
|
||||
|
||||
interface SynthesizeResult {
|
||||
wav_base64?: string;
|
||||
}
|
||||
|
||||
export class LocalMeloTtsService implements TtsService {
|
||||
private readonly worker: PythonJsonWorker;
|
||||
|
||||
constructor(config: AssistantRuntimeConfig, logger: Logger) {
|
||||
const resolvedFfmpegPath = resolveFfmpegPath();
|
||||
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
|
||||
process.env.FFMPEG_PATH = resolvedFfmpegPath;
|
||||
}
|
||||
|
||||
this.worker = new PythonJsonWorker(config, logger, "local_tts_worker.py", "local-tts", {
|
||||
LOCAL_TTS_LANGUAGE: config.LOCAL_TTS_LANGUAGE,
|
||||
LOCAL_TTS_SPEAKER: config.LOCAL_TTS_SPEAKER,
|
||||
LOCAL_TTS_DEVICE: config.LOCAL_TTS_DEVICE,
|
||||
LOCAL_TTS_SPEED: String(config.LOCAL_TTS_SPEED),
|
||||
});
|
||||
}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
await this.worker.request("ping", {});
|
||||
}
|
||||
|
||||
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
|
||||
const result = await this.worker.request<SynthesizeResult>(
|
||||
"synthesize",
|
||||
{
|
||||
text,
|
||||
},
|
||||
signal,
|
||||
);
|
||||
|
||||
const wavBase64 = result.wav_base64;
|
||||
if (!wavBase64) {
|
||||
throw new Error("로컬 TTS가 빈 오디오를 반환했습니다.");
|
||||
}
|
||||
|
||||
const input = Readable.from([Buffer.from(wavBase64, "base64")]);
|
||||
const ffmpeg = new prism.FFmpeg({
|
||||
args: [
|
||||
"-analyzeduration",
|
||||
"0",
|
||||
"-loglevel",
|
||||
"0",
|
||||
"-i",
|
||||
"pipe:0",
|
||||
"-f",
|
||||
"s16le",
|
||||
"-ar",
|
||||
"48000",
|
||||
"-ac",
|
||||
"2",
|
||||
"pipe:1",
|
||||
],
|
||||
});
|
||||
|
||||
if (signal) {
|
||||
signal.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
}
|
||||
|
||||
input.pipe(ffmpeg);
|
||||
|
||||
return {
|
||||
stream: ffmpeg,
|
||||
dispose: () => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
await this.worker.destroy();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user