feat: scaffold realtime Korean voice assistant bot
This commit is contained in:
83
src/services/elevenlabs-tts.ts
Normal file
83
src/services/elevenlabs-tts.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import { Readable } from "node:stream";
|
||||
|
||||
import ffmpegStatic from "ffmpeg-static";
|
||||
import prism from "prism-media";
|
||||
import { StreamType, createAudioResource, type AudioResource } from "@discordjs/voice";
|
||||
|
||||
import type { AppConfig } from "../config.js";
|
||||
|
||||
export interface PreparedSpeechPlayback {
|
||||
resource: AudioResource;
|
||||
dispose: () => void;
|
||||
}
|
||||
|
||||
export class ElevenLabsTtsService {
|
||||
constructor(private readonly config: AppConfig) {
|
||||
const resolvedFfmpegPath = ffmpegStatic as unknown as string | null;
|
||||
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
|
||||
process.env.FFMPEG_PATH = resolvedFfmpegPath;
|
||||
}
|
||||
}
|
||||
|
||||
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechPlayback> {
|
||||
const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${this.config.ELEVENLABS_VOICE_ID}/stream`);
|
||||
url.searchParams.set("output_format", "mp3_44100_128");
|
||||
url.searchParams.set("enable_logging", "false");
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"xi-api-key": this.config.ELEVENLABS_API_KEY,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
model_id: this.config.ELEVENLABS_TTS_MODEL,
|
||||
language_code: this.config.BOT_DEFAULT_LANGUAGE,
|
||||
voice_settings: {
|
||||
stability: 0.35,
|
||||
similarity_boost: 0.75,
|
||||
speed: 1.05,
|
||||
},
|
||||
}),
|
||||
signal,
|
||||
});
|
||||
|
||||
if (!response.ok || !response.body) {
|
||||
throw new Error(`ElevenLabs TTS request failed with status ${response.status}`);
|
||||
}
|
||||
|
||||
const input = Readable.fromWeb(response.body as never);
|
||||
const ffmpeg = new prism.FFmpeg({
|
||||
args: [
|
||||
"-analyzeduration",
|
||||
"0",
|
||||
"-loglevel",
|
||||
"0",
|
||||
"-i",
|
||||
"pipe:0",
|
||||
"-f",
|
||||
"s16le",
|
||||
"-ar",
|
||||
"48000",
|
||||
"-ac",
|
||||
"2",
|
||||
"pipe:1",
|
||||
],
|
||||
});
|
||||
|
||||
input.pipe(ffmpeg);
|
||||
|
||||
const resource = createAudioResource(ffmpeg, {
|
||||
inputType: StreamType.Raw,
|
||||
});
|
||||
|
||||
return {
|
||||
resource,
|
||||
dispose: () => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user