feat: scaffold realtime Korean voice assistant bot

2026-04-30 02:29:18 +09:00
commit 9dee708b64
15 changed files with 1574 additions and 0 deletions
--- a/src/services/elevenlabs-tts.ts
+++ b/src/services/elevenlabs-tts.ts
@@ -0,0 +1,83 @@
+import { Readable } from "node:stream";
+
+import ffmpegStatic from "ffmpeg-static";
+import prism from "prism-media";
+import { StreamType, createAudioResource, type AudioResource } from "@discordjs/voice";
+
+import type { AppConfig } from "../config.js";
+
+export interface PreparedSpeechPlayback {
+  resource: AudioResource;
+  dispose: () => void;
+}
+
+export class ElevenLabsTtsService {
+  constructor(private readonly config: AppConfig) {
+    const resolvedFfmpegPath = ffmpegStatic as unknown as string | null;
+    if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
+      process.env.FFMPEG_PATH = resolvedFfmpegPath;
+    }
+  }
+
+  async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechPlayback> {
+    const url = new URL(`https://api.elevenlabs.io/v1/text-to-speech/${this.config.ELEVENLABS_VOICE_ID}/stream`);
+    url.searchParams.set("output_format", "mp3_44100_128");
+    url.searchParams.set("enable_logging", "false");
+
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "xi-api-key": this.config.ELEVENLABS_API_KEY,
+      },
+      body: JSON.stringify({
+        text,
+        model_id: this.config.ELEVENLABS_TTS_MODEL,
+        language_code: this.config.BOT_DEFAULT_LANGUAGE,
+        voice_settings: {
+          stability: 0.35,
+          similarity_boost: 0.75,
+          speed: 1.05,
+        },
+      }),
+      signal,
+    });
+
+    if (!response.ok || !response.body) {
+      throw new Error(`ElevenLabs TTS request failed with status ${response.status}`);
+    }
+
+    const input = Readable.fromWeb(response.body as never);
+    const ffmpeg = new prism.FFmpeg({
+      args: [
+        "-analyzeduration",
+        "0",
+        "-loglevel",
+        "0",
+        "-i",
+        "pipe:0",
+        "-f",
+        "s16le",
+        "-ar",
+        "48000",
+        "-ac",
+        "2",
+        "pipe:1",
+      ],
+    });
+
+    input.pipe(ffmpeg);
+
+    const resource = createAudioResource(ffmpeg, {
+      inputType: StreamType.Raw,
+    });
+
+    return {
+      resource,
+      dispose: () => {
+        input.destroy();
+        ffmpeg.destroy();
+      },
+    };
+  }
+}