Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
This commit is contained in:
52
bot/src/bridge.ts
Normal file
52
bot/src/bridge.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* HTTP client for the Python brain bridge (bridge/server.py).
|
||||
* All AI work (STT, reply engine, TTS) lives behind these calls.
|
||||
*/
|
||||
import { config } from "./config.ts";
|
||||
|
||||
export interface ConverseResult {
|
||||
transcript: string;
|
||||
language?: string | null;
|
||||
reply: string;
|
||||
error?: string | null;
|
||||
/** base64-encoded 16-bit PCM WAV of the spoken reply, or null if TTS off */
|
||||
audio_b64?: string | null;
|
||||
}
|
||||
|
||||
export interface TextResult {
|
||||
reply: string;
|
||||
error?: string | null;
|
||||
audio_b64?: string | null;
|
||||
}
|
||||
|
||||
/** Full voice turn: WAV in -> {transcript, reply, reply audio}. */
|
||||
export async function converse(wav: Buffer): Promise<ConverseResult> {
|
||||
const res = await fetch(`${config.bridgeUrl}/converse`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "audio/wav" },
|
||||
body: wav,
|
||||
});
|
||||
if (!res.ok) throw new Error(`bridge /converse ${res.status}: ${await res.text()}`);
|
||||
return (await res.json()) as ConverseResult;
|
||||
}
|
||||
|
||||
/** Text-only turn (used by /자비스 ask). */
|
||||
export async function ask(text: string): Promise<TextResult> {
|
||||
const res = await fetch(`${config.bridgeUrl}/text`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`bridge /text ${res.status}: ${await res.text()}`);
|
||||
return (await res.json()) as TextResult;
|
||||
}
|
||||
|
||||
export async function health(): Promise<any> {
|
||||
const res = await fetch(`${config.bridgeUrl}/health`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export function decodeWav(audio_b64?: string | null): Buffer | null {
|
||||
if (!audio_b64) return null;
|
||||
return Buffer.from(audio_b64, "base64");
|
||||
}
|
||||
55
bot/src/config.ts
Normal file
55
bot/src/config.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Centralised, typed configuration loaded from environment (.env at repo root).
|
||||
* Nothing else in the bot reads process.env directly.
|
||||
*/
|
||||
import "dotenv/config";
|
||||
|
||||
function req(name: string): string {
|
||||
const v = process.env[name];
|
||||
if (!v) throw new Error(`Missing required env var: ${name} (see .env.example)`);
|
||||
return v;
|
||||
}
|
||||
|
||||
function opt(name: string, fallback = ""): string {
|
||||
return process.env[name] ?? fallback;
|
||||
}
|
||||
|
||||
export type StreamBackend = "selfbot" | "novnc" | "screenshot" | "none";
|
||||
|
||||
export const config = {
|
||||
// --- Normal Discord bot (voice I/O, slash commands) ---
|
||||
botToken: req("DISCORD_BOT_TOKEN"),
|
||||
appId: req("DISCORD_APP_ID"),
|
||||
guildId: req("DISCORD_GUILD_ID"),
|
||||
|
||||
// --- Python brain bridge ---
|
||||
bridgeUrl: opt("BRIDGE_URL", "http://127.0.0.1:8765"),
|
||||
|
||||
// --- VNC screen broadcast ---
|
||||
// selfbot = real live "Go Live" stream via a user (burner) account token
|
||||
// novnc = post a noVNC web link the channel can open in a browser
|
||||
// screenshot= periodically upload VNC screenshots
|
||||
// none = disable screen sharing
|
||||
streamBackend: (opt("STREAM_BACKEND", "selfbot") as StreamBackend),
|
||||
|
||||
// x11grab source for the VNC display (TigerVNC runs the desktop on :1)
|
||||
vncDisplay: opt("VNC_DISPLAY", ":1"),
|
||||
vncResolution: opt("VNC_RESOLUTION", "1920x1080"),
|
||||
vncFramerate: parseInt(opt("VNC_FRAMERATE", "30"), 10),
|
||||
vncBitrateKbps: parseInt(opt("VNC_BITRATE_KBPS", "4000"), 10),
|
||||
|
||||
// selfbot backend (ToS-risk; use a throwaway account token, never your main)
|
||||
selfbotToken: opt("DISCORD_SELFBOT_TOKEN"),
|
||||
|
||||
// novnc backend
|
||||
novncUrl: opt("NOVNC_URL", ""),
|
||||
|
||||
// screenshot backend
|
||||
screenshotIntervalSec: parseInt(opt("SCREENSHOT_INTERVAL_SEC", "5"), 10),
|
||||
|
||||
// --- Voice behaviour ---
|
||||
// Min/max captured utterance bounds (ms) before forwarding to the brain.
|
||||
silenceMs: parseInt(opt("VOICE_SILENCE_MS", "800"), 10),
|
||||
};
|
||||
|
||||
export type AppConfig = typeof config;
|
||||
148
bot/src/index.ts
Normal file
148
bot/src/index.ts
Normal file
@@ -0,0 +1,148 @@
|
||||
/**
|
||||
* Javis bot entry point.
|
||||
*
|
||||
* A normal Discord bot that:
|
||||
* - exposes /자비스 (join / leave / ask / stream / stop / status)
|
||||
* - replies to every slash command EPHEMERALLY (only the invoker sees it)
|
||||
* - joins the caller's voice channel for live voice conversation (brain in bridge/)
|
||||
* - broadcasts the VNC screen via a pluggable backend (selfbot / novnc / screenshot)
|
||||
*/
|
||||
import {
|
||||
Client,
|
||||
GatewayIntentBits,
|
||||
MessageFlags,
|
||||
type ChatInputCommandInteraction,
|
||||
type GuildMember,
|
||||
type TextBasedChannel,
|
||||
} from "discord.js";
|
||||
import { AttachmentBuilder } from "discord.js";
|
||||
import { config } from "./config.ts";
|
||||
import { ask, health } from "./bridge.ts";
|
||||
import { joinChannel, leaveGuild, getSession } from "./voice.ts";
|
||||
import { createStreamer, type ScreenStreamer, type StreamContext } from "./stream/index.ts";
|
||||
|
||||
const client = new Client({
|
||||
intents: [GatewayIntentBits.Guilds, GatewayIntentBits.GuildVoiceStates],
|
||||
});
|
||||
|
||||
const streamers = new Map<string, ScreenStreamer>();
|
||||
|
||||
async function getStreamer(guildId: string): Promise<ScreenStreamer> {
|
||||
let s = streamers.get(guildId);
|
||||
if (!s) {
|
||||
s = await createStreamer(config);
|
||||
streamers.set(guildId, s);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
const eph = { flags: MessageFlags.Ephemeral } as const;
|
||||
|
||||
client.once("clientReady", () => {
|
||||
console.log(`✓ 로그인: ${client.user?.tag} | stream backend: ${config.streamBackend}`);
|
||||
});
|
||||
|
||||
client.on("interactionCreate", async (interaction) => {
|
||||
if (!interaction.isChatInputCommand()) return;
|
||||
if (interaction.commandName !== "자비스") return;
|
||||
const i = interaction as ChatInputCommandInteraction;
|
||||
const sub = i.options.getSubcommand();
|
||||
|
||||
try {
|
||||
switch (sub) {
|
||||
case "join":
|
||||
return void (await handleJoin(i));
|
||||
case "leave":
|
||||
return void (await handleLeave(i));
|
||||
case "ask":
|
||||
return void (await handleAsk(i));
|
||||
case "stream":
|
||||
return void (await handleStream(i));
|
||||
case "stop":
|
||||
return void (await handleStop(i));
|
||||
case "status":
|
||||
return void (await handleStatus(i));
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`[/자비스 ${sub}]`, err);
|
||||
const msg = `오류: ${(err as Error).message}`;
|
||||
if (i.deferred || i.replied) await i.editReply(msg);
|
||||
else await i.reply({ content: msg, ...eph });
|
||||
}
|
||||
});
|
||||
|
||||
async function handleJoin(i: ChatInputCommandInteraction) {
|
||||
const member = i.member as GuildMember;
|
||||
const channel = member?.voice?.channel;
|
||||
if (!channel) {
|
||||
return i.reply({ content: "먼저 음성 채널에 들어간 뒤 다시 호출해주세요.", ...eph });
|
||||
}
|
||||
await i.deferReply(eph);
|
||||
const session = await joinChannel(channel);
|
||||
session.onTurn = ({ transcript, reply }) =>
|
||||
console.log(`🗣️ ${transcript}\n🤖 ${reply}`);
|
||||
await i.editReply(`🎙️ '${channel.name}' 채널에 접속했습니다. 말씀하세요.`);
|
||||
}
|
||||
|
||||
async function handleLeave(i: ChatInputCommandInteraction) {
|
||||
const left = leaveGuild(i.guildId!);
|
||||
await i.reply({ content: left ? "음성 채널에서 나갔습니다." : "접속 중인 세션이 없습니다.", ...eph });
|
||||
}
|
||||
|
||||
async function handleAsk(i: ChatInputCommandInteraction) {
|
||||
const q = i.options.getString("질문", true);
|
||||
await i.deferReply(eph);
|
||||
const res = await ask(q);
|
||||
const reply = res.reply || res.error || "(응답 없음)";
|
||||
await i.editReply(reply.slice(0, 1900));
|
||||
}
|
||||
|
||||
async function handleStream(i: ChatInputCommandInteraction) {
|
||||
const member = i.member as GuildMember;
|
||||
await i.deferReply(eph);
|
||||
const streamer = await getStreamer(i.guildId!);
|
||||
const ctx: StreamContext = {
|
||||
guildId: i.guildId!,
|
||||
voiceChannelId: member?.voice?.channelId ?? "",
|
||||
postImage: async (png, name) => {
|
||||
const ch = i.channel as TextBasedChannel | null;
|
||||
if (ch && "send" in ch) {
|
||||
await (ch as any).send({ files: [new AttachmentBuilder(png, { name })] });
|
||||
}
|
||||
},
|
||||
};
|
||||
if (config.streamBackend === "selfbot" && !ctx.voiceChannelId) {
|
||||
return i.editReply("셀프봇 송출은 음성 채널 안에서 호출해야 합니다. 음성 채널에 들어간 뒤 다시 시도하세요.");
|
||||
}
|
||||
const msg = await streamer.start(ctx);
|
||||
await i.editReply(msg);
|
||||
}
|
||||
|
||||
async function handleStop(i: ChatInputCommandInteraction) {
|
||||
const streamer = streamers.get(i.guildId!);
|
||||
if (!streamer) return i.reply({ content: "송출 중이 아닙니다.", ...eph });
|
||||
await streamer.stop();
|
||||
await i.reply({ content: "송출을 중단했습니다.", ...eph });
|
||||
}
|
||||
|
||||
async function handleStatus(i: ChatInputCommandInteraction) {
|
||||
await i.deferReply(eph);
|
||||
let brain = "unreachable";
|
||||
try {
|
||||
const h = await health();
|
||||
brain = h.brain_ready ? "ready" : `not-ready${h.brain_error ? " (" + h.brain_error + ")" : ""}`;
|
||||
} catch {
|
||||
/* keep unreachable */
|
||||
}
|
||||
const session = getSession(i.guildId!);
|
||||
const streamer = streamers.get(i.guildId!);
|
||||
await i.editReply(
|
||||
[
|
||||
`브릿지 두뇌: ${brain}`,
|
||||
`음성 세션: ${session ? "접속 중" : "없음"}`,
|
||||
`송출 백엔드: ${config.streamBackend} (${streamer?.isActive() ? "활성" : "대기"})`,
|
||||
].join("\n"),
|
||||
);
|
||||
}
|
||||
|
||||
client.login(config.botToken);
|
||||
42
bot/src/register-commands.ts
Normal file
42
bot/src/register-commands.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* Registers the /자비스 slash command (guild-scoped for instant availability).
|
||||
* Run once after changing the command shape: bun run register
|
||||
*/
|
||||
import { REST, Routes, SlashCommandBuilder } from "discord.js";
|
||||
import { config } from "./config.ts";
|
||||
|
||||
export const jarvisCommand = new SlashCommandBuilder()
|
||||
.setName("자비스")
|
||||
.setDescription("자비스 음성 비서를 제어합니다")
|
||||
.addSubcommand((s) =>
|
||||
s.setName("join").setDescription("당신이 있는 음성 채널에 접속해 듣기 시작합니다"),
|
||||
)
|
||||
.addSubcommand((s) => s.setName("leave").setDescription("음성 채널에서 나갑니다"))
|
||||
.addSubcommand((s) =>
|
||||
s
|
||||
.setName("ask")
|
||||
.setDescription("텍스트로 자비스에게 질문합니다")
|
||||
.addStringOption((o) =>
|
||||
o.setName("질문").setDescription("질문 내용").setRequired(true),
|
||||
),
|
||||
)
|
||||
.addSubcommand((s) =>
|
||||
s.setName("stream").setDescription("VNC 화면을 디스코드에 송출합니다"),
|
||||
)
|
||||
.addSubcommand((s) => s.setName("stop").setDescription("VNC 화면 송출을 중단합니다"))
|
||||
.addSubcommand((s) => s.setName("status").setDescription("브릿지/세션 상태를 봅니다"));
|
||||
|
||||
export async function registerCommands() {
|
||||
const rest = new REST({ version: "10" }).setToken(config.botToken);
|
||||
await rest.put(Routes.applicationGuildCommands(config.appId, config.guildId), {
|
||||
body: [jarvisCommand.toJSON()],
|
||||
});
|
||||
console.log("✓ /자비스 명령어 등록 완료 (guild:", config.guildId, ")");
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
registerCommands().catch((e) => {
|
||||
console.error("명령어 등록 실패:", e);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
51
bot/src/stream/index.ts
Normal file
51
bot/src/stream/index.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Pluggable VNC screen-broadcast backends.
|
||||
*
|
||||
* Per the chosen design (option 1): the streaming method is swappable via
|
||||
* STREAM_BACKEND in .env. The default is the real live "Go Live" stream via a
|
||||
* selfbot account (only way to get a native Discord video broadcast), with safe
|
||||
* fallbacks (noVNC link / periodic screenshots) available without code changes.
|
||||
*/
|
||||
import type { AppConfig } from "../config.ts";
|
||||
|
||||
export interface StreamContext {
|
||||
guildId: string;
|
||||
voiceChannelId: string;
|
||||
/** Post an image to the invoking text channel (used by the screenshot backend). */
|
||||
postImage?: (png: Buffer, name: string) => Promise<void>;
|
||||
}
|
||||
|
||||
export interface ScreenStreamer {
|
||||
readonly kind: AppConfig["streamBackend"];
|
||||
/** Start broadcasting. Returns a short user-facing status/link message. */
|
||||
start(ctx: StreamContext): Promise<string>;
|
||||
stop(): Promise<void>;
|
||||
isActive(): boolean;
|
||||
}
|
||||
|
||||
export async function createStreamer(config: AppConfig): Promise<ScreenStreamer> {
|
||||
switch (config.streamBackend) {
|
||||
case "selfbot": {
|
||||
const { SelfbotStreamer } = await import("./selfbot.ts");
|
||||
return new SelfbotStreamer(config);
|
||||
}
|
||||
case "novnc": {
|
||||
const { NoVncStreamer } = await import("./novnc.ts");
|
||||
return new NoVncStreamer(config);
|
||||
}
|
||||
case "screenshot": {
|
||||
const { ScreenshotStreamer } = await import("./screenshot.ts");
|
||||
return new ScreenshotStreamer(config);
|
||||
}
|
||||
case "none":
|
||||
default:
|
||||
return {
|
||||
kind: "none",
|
||||
async start() {
|
||||
return "화면 송출이 비활성화되어 있습니다 (STREAM_BACKEND=none).";
|
||||
},
|
||||
async stop() {},
|
||||
isActive: () => false,
|
||||
};
|
||||
}
|
||||
}
|
||||
34
bot/src/stream/novnc.ts
Normal file
34
bot/src/stream/novnc.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
* noVNC link backend (safe, real-time, no ban risk).
|
||||
*
|
||||
* Does not broadcast natively into Discord. Instead it shares a noVNC web URL
|
||||
* that anyone can open in a browser to watch (and optionally control) the VNC
|
||||
* desktop live. Set NOVNC_URL in .env (e.g. http://192.168.10.9:6080/vnc.html).
|
||||
*
|
||||
* Stand up noVNC once on the host with websockify, e.g.:
|
||||
* websockify --web=/usr/share/novnc 6080 localhost:5901
|
||||
*/
|
||||
import type { AppConfig } from "../config.ts";
|
||||
import type { ScreenStreamer, StreamContext } from "./index.ts";
|
||||
|
||||
export class NoVncStreamer implements ScreenStreamer {
|
||||
readonly kind = "novnc" as const;
|
||||
private active = false;
|
||||
constructor(private config: AppConfig) {}
|
||||
|
||||
isActive() {
|
||||
return this.active;
|
||||
}
|
||||
|
||||
async start(_ctx: StreamContext): Promise<string> {
|
||||
if (!this.config.novncUrl) {
|
||||
return "NOVNC_URL이 설정되지 않았습니다 (.env). 예: http://192.168.10.9:6080/vnc.html";
|
||||
}
|
||||
this.active = true;
|
||||
return `🖥️ VNC 화면 실시간 보기 (브라우저): ${this.config.novncUrl}`;
|
||||
}
|
||||
|
||||
async stop(): Promise<void> {
|
||||
this.active = false;
|
||||
}
|
||||
}
|
||||
62
bot/src/stream/screenshot.ts
Normal file
62
bot/src/stream/screenshot.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
* Screenshot backend (safe, no ban risk, not real-time).
|
||||
*
|
||||
* Periodically grabs a frame from the VNC X display with ffmpeg's x11grab and
|
||||
* posts it to the invoking text channel. Low FPS, but works with a normal bot
|
||||
* account and never touches Discord's selfbot surface.
|
||||
*/
|
||||
import { spawn } from "node:child_process";
|
||||
import type { AppConfig } from "../config.ts";
|
||||
import type { ScreenStreamer, StreamContext } from "./index.ts";
|
||||
|
||||
function grabFrame(display: string, size: string): Promise<Buffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const ff = spawn("ffmpeg", [
|
||||
"-loglevel", "error",
|
||||
"-f", "x11grab",
|
||||
"-video_size", size,
|
||||
"-i", display,
|
||||
"-frames:v", "1",
|
||||
"-f", "image2pipe",
|
||||
"-vcodec", "png",
|
||||
"pipe:1",
|
||||
]);
|
||||
const chunks: Buffer[] = [];
|
||||
ff.stdout.on("data", (c) => chunks.push(c));
|
||||
ff.on("error", reject);
|
||||
ff.on("close", (code) =>
|
||||
code === 0 ? resolve(Buffer.concat(chunks)) : reject(new Error(`ffmpeg exited ${code}`)),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export class ScreenshotStreamer implements ScreenStreamer {
|
||||
readonly kind = "screenshot" as const;
|
||||
private timer: ReturnType<typeof setInterval> | null = null;
|
||||
constructor(private config: AppConfig) {}
|
||||
|
||||
isActive() {
|
||||
return this.timer !== null;
|
||||
}
|
||||
|
||||
async start(ctx: StreamContext): Promise<string> {
|
||||
if (!ctx.postImage) return "스크린샷을 올릴 텍스트 채널 컨텍스트가 없습니다.";
|
||||
if (this.timer) return "이미 스크린샷 송출 중입니다.";
|
||||
const tick = async () => {
|
||||
try {
|
||||
const png = await grabFrame(this.config.vncDisplay, this.config.vncResolution);
|
||||
await ctx.postImage!(png, "vnc.png");
|
||||
} catch (e) {
|
||||
console.error("[screenshot] grab failed:", e);
|
||||
}
|
||||
};
|
||||
this.timer = setInterval(tick, this.config.screenshotIntervalSec * 1000);
|
||||
void tick();
|
||||
return `📸 ${this.config.screenshotIntervalSec}초마다 VNC 스크린샷을 이 채널에 올립니다.`;
|
||||
}
|
||||
|
||||
async stop(): Promise<void> {
|
||||
if (this.timer) clearInterval(this.timer);
|
||||
this.timer = null;
|
||||
}
|
||||
}
|
||||
116
bot/src/stream/selfbot.ts
Normal file
116
bot/src/stream/selfbot.ts
Normal file
@@ -0,0 +1,116 @@
|
||||
/**
|
||||
* Selfbot live-stream backend (default).
|
||||
*
|
||||
* Streams the VNC X display (:1) into the voice channel as a real Discord
|
||||
* "Go Live" broadcast. Discord blocks video from *bot* accounts, so this path
|
||||
* requires a USER account token (a "selfbot"), which violates Discord ToS and
|
||||
* can get the account banned. Use a throwaway/burner account, never your main.
|
||||
*
|
||||
* Dependencies are optional (native): install with
|
||||
* bun add discord.js-selfbot-v13 @dank074/discord-video-stream
|
||||
* They are dynamically imported so the core bot installs/runs without them.
|
||||
*
|
||||
* Library API targets @dank074/discord-video-stream v6 (Streamer / prepareStream
|
||||
* / playStream). If a different major is installed, the import guard below will
|
||||
* point you at the docs rather than crash cryptically.
|
||||
*/
|
||||
import type { AppConfig } from "../config.ts";
|
||||
import type { ScreenStreamer, StreamContext } from "./index.ts";
|
||||
|
||||
export class SelfbotStreamer implements ScreenStreamer {
|
||||
readonly kind = "selfbot" as const;
|
||||
private config: AppConfig;
|
||||
private streamer: any = null;
|
||||
private controller: AbortController | null = null;
|
||||
private active = false;
|
||||
|
||||
constructor(config: AppConfig) {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
isActive() {
|
||||
return this.active;
|
||||
}
|
||||
|
||||
private async loadLib() {
|
||||
let selfbot: any, videoStream: any;
|
||||
try {
|
||||
selfbot = await import("discord.js-selfbot-v13");
|
||||
// Optional native dep; resolved at runtime only. Version/name can vary by
|
||||
// upstream release, so we don't hard-bind its types at compile time.
|
||||
// @ts-ignore - optional dependency, may be absent until `bun add`ed
|
||||
videoStream = await import("@dank074/discord-video-stream");
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
"셀프봇 송출 의존성이 없습니다. 설치: bun add discord.js-selfbot-v13 @dank074/discord-video-stream\n" +
|
||||
`원본 오류: ${(e as Error).message}`,
|
||||
);
|
||||
}
|
||||
if (!videoStream.Streamer || !videoStream.prepareStream || !videoStream.playStream) {
|
||||
throw new Error(
|
||||
"@dank074/discord-video-stream v6 API(Streamer/prepareStream/playStream)를 찾지 못했습니다. " +
|
||||
"package.json 버전을 ^4.2.1(=v6 npm 태그)로 맞추거나 docs를 확인하세요.",
|
||||
);
|
||||
}
|
||||
return { selfbot, videoStream };
|
||||
}
|
||||
|
||||
async start(ctx: StreamContext): Promise<string> {
|
||||
if (this.active) return "이미 송출 중입니다.";
|
||||
if (!this.config.selfbotToken) {
|
||||
return "DISCORD_SELFBOT_TOKEN이 설정되지 않았습니다 (.env). 버너 계정 토큰을 넣어주세요.";
|
||||
}
|
||||
const { selfbot, videoStream } = await this.loadLib();
|
||||
const { Streamer, prepareStream, playStream, Utils } = videoStream;
|
||||
|
||||
this.streamer = new Streamer(new selfbot.Client());
|
||||
await this.streamer.client.login(this.config.selfbotToken);
|
||||
await this.streamer.joinVoice(ctx.guildId, ctx.voiceChannelId);
|
||||
|
||||
// Grab the VNC X display with ffmpeg's x11grab and let the library
|
||||
// encode/transport it. NVENC (RTX 5050) is used if available.
|
||||
const input = `x11grab:${this.config.vncDisplay}`;
|
||||
const { command, output } = prepareStream(
|
||||
input,
|
||||
{
|
||||
width: parseInt(this.config.vncResolution.split("x")[0] ?? "1920", 10),
|
||||
height: parseInt(this.config.vncResolution.split("x")[1] ?? "1080", 10),
|
||||
frameRate: this.config.vncFramerate,
|
||||
bitrateVideo: this.config.vncBitrateKbps,
|
||||
videoCodec: Utils?.normalizeVideoCodec ? Utils.normalizeVideoCodec("H264") : "H264",
|
||||
// x11grab needs to be set as the input format for ffmpeg
|
||||
customHeaders: undefined,
|
||||
inputFormat: "x11grab",
|
||||
inputSize: this.config.vncResolution,
|
||||
},
|
||||
(this.controller = new AbortController()).signal,
|
||||
);
|
||||
|
||||
command.on("error", (err: Error) => {
|
||||
if (!this.controller?.signal.aborted) console.error("[selfbot] ffmpeg error:", err);
|
||||
});
|
||||
|
||||
this.active = true;
|
||||
// Fire-and-forget; resolves when the stream ends.
|
||||
playStream(output, this.streamer, { type: "go-live" })
|
||||
.catch((err: Error) => console.error("[selfbot] playStream:", err))
|
||||
.finally(() => {
|
||||
this.active = false;
|
||||
});
|
||||
|
||||
return "🔴 셀프봇으로 VNC 화면을 음성채널에 실시간 송출 중입니다 (Go Live).";
|
||||
}
|
||||
|
||||
async stop(): Promise<void> {
|
||||
this.controller?.abort();
|
||||
this.controller = null;
|
||||
try {
|
||||
this.streamer?.leaveVoice?.();
|
||||
this.streamer?.client?.destroy?.();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
this.streamer = null;
|
||||
this.active = false;
|
||||
}
|
||||
}
|
||||
169
bot/src/voice.ts
Normal file
169
bot/src/voice.ts
Normal file
@@ -0,0 +1,169 @@
|
||||
/**
|
||||
* Discord voice I/O.
|
||||
*
|
||||
* - Joins the caller's voice channel.
|
||||
* - Receives each speaker's Opus stream, decodes to PCM, and on end-of-speech
|
||||
* forwards the utterance (as a WAV) to the brain bridge.
|
||||
* - Plays the brain's spoken reply back into the channel.
|
||||
*
|
||||
* No AI logic here — capture in, audio out. The brain lives in bridge/.
|
||||
*/
|
||||
import { Readable } from "node:stream";
|
||||
import {
|
||||
joinVoiceChannel,
|
||||
createAudioPlayer,
|
||||
createAudioResource,
|
||||
EndBehaviorType,
|
||||
StreamType,
|
||||
VoiceConnection,
|
||||
VoiceConnectionStatus,
|
||||
entersState,
|
||||
type AudioPlayer,
|
||||
} from "@discordjs/voice";
|
||||
import prism from "prism-media";
|
||||
import type { VoiceBasedChannel } from "discord.js";
|
||||
import { converse, decodeWav } from "./bridge.ts";
|
||||
import { config } from "./config.ts";
|
||||
|
||||
const DISCORD_RATE = 48000;
|
||||
const DISCORD_CHANNELS = 2;
|
||||
|
||||
/** Build a minimal PCM16 mono WAV around raw little-endian samples. */
|
||||
function pcm16MonoToWav(pcm: Buffer, sampleRate: number): Buffer {
|
||||
const header = Buffer.alloc(44);
|
||||
const dataLen = pcm.length;
|
||||
header.write("RIFF", 0);
|
||||
header.writeUInt32LE(36 + dataLen, 4);
|
||||
header.write("WAVE", 8);
|
||||
header.write("fmt ", 12);
|
||||
header.writeUInt32LE(16, 16);
|
||||
header.writeUInt16LE(1, 20); // PCM
|
||||
header.writeUInt16LE(1, 22); // mono
|
||||
header.writeUInt32LE(sampleRate, 24);
|
||||
header.writeUInt32LE(sampleRate * 2, 28); // byte rate (mono * 2 bytes)
|
||||
header.writeUInt16LE(2, 32); // block align
|
||||
header.writeUInt16LE(16, 34); // bits per sample
|
||||
header.write("data", 36);
|
||||
header.writeUInt32LE(dataLen, 40);
|
||||
return Buffer.concat([header, pcm]);
|
||||
}
|
||||
|
||||
/** Downmix interleaved stereo PCM16 to mono PCM16. */
|
||||
function stereoToMono(stereo: Buffer): Buffer {
|
||||
const samples = stereo.length / 4; // 2 ch * 2 bytes
|
||||
const mono = Buffer.alloc(samples * 2);
|
||||
for (let i = 0; i < samples; i++) {
|
||||
const l = stereo.readInt16LE(i * 4);
|
||||
const r = stereo.readInt16LE(i * 4 + 2);
|
||||
mono.writeInt16LE((l + r) >> 1, i * 2);
|
||||
}
|
||||
return mono;
|
||||
}
|
||||
|
||||
export class VoiceSession {
|
||||
readonly guildId: string;
|
||||
private connection: VoiceConnection;
|
||||
private player: AudioPlayer;
|
||||
private listening = new Set<string>();
|
||||
/** Optional callback to surface transcripts/replies to a text channel. */
|
||||
onTurn?: (info: { user: string; transcript: string; reply: string }) => void;
|
||||
|
||||
constructor(channel: VoiceBasedChannel) {
|
||||
this.guildId = channel.guild.id;
|
||||
this.connection = joinVoiceChannel({
|
||||
channelId: channel.id,
|
||||
guildId: channel.guild.id,
|
||||
adapterCreator: channel.guild.voiceAdapterCreator,
|
||||
selfDeaf: false, // we need to hear users
|
||||
selfMute: false,
|
||||
});
|
||||
this.player = createAudioPlayer();
|
||||
this.connection.subscribe(this.player);
|
||||
this.attachReceiver();
|
||||
}
|
||||
|
||||
async ready(): Promise<void> {
|
||||
await entersState(this.connection, VoiceConnectionStatus.Ready, 20_000);
|
||||
}
|
||||
|
||||
private attachReceiver() {
|
||||
const receiver = this.connection.receiver;
|
||||
receiver.speaking.on("start", (userId: string) => {
|
||||
if (this.listening.has(userId)) return;
|
||||
this.listening.add(userId);
|
||||
this.captureUtterance(userId).finally(() => this.listening.delete(userId));
|
||||
});
|
||||
}
|
||||
|
||||
private async captureUtterance(userId: string): Promise<void> {
|
||||
const opusStream = this.connection.receiver.subscribe(userId, {
|
||||
end: { behavior: EndBehaviorType.AfterSilence, duration: config.silenceMs },
|
||||
});
|
||||
const decoder = new prism.opus.Decoder({
|
||||
frameSize: 960,
|
||||
channels: DISCORD_CHANNELS,
|
||||
rate: DISCORD_RATE,
|
||||
});
|
||||
const chunks: Buffer[] = [];
|
||||
const pcmStream = opusStream.pipe(decoder);
|
||||
pcmStream.on("data", (c: Buffer) => chunks.push(c));
|
||||
|
||||
await new Promise<void>((resolve) => pcmStream.once("end", () => resolve()));
|
||||
|
||||
if (!chunks.length) return;
|
||||
const mono = stereoToMono(Buffer.concat(chunks));
|
||||
// Ignore blips shorter than ~300ms (likely noise / key clicks).
|
||||
if (mono.length < DISCORD_RATE * 0.3 * 2) return;
|
||||
const wav = pcm16MonoToWav(mono, DISCORD_RATE);
|
||||
|
||||
try {
|
||||
const result = await converse(wav);
|
||||
if (result.transcript) {
|
||||
this.onTurn?.({ user: userId, transcript: result.transcript, reply: result.reply });
|
||||
}
|
||||
const audio = decodeWav(result.audio_b64);
|
||||
if (audio) this.play(audio);
|
||||
} catch (err) {
|
||||
console.error("[voice] converse failed:", err);
|
||||
}
|
||||
}
|
||||
|
||||
/** Play a WAV buffer into the channel. */
|
||||
play(wav: Buffer) {
|
||||
const resource = createAudioResource(Readable.from(wav), {
|
||||
inputType: StreamType.Arbitrary,
|
||||
});
|
||||
this.player.play(resource);
|
||||
}
|
||||
|
||||
destroy() {
|
||||
try {
|
||||
this.connection.destroy();
|
||||
} catch {
|
||||
/* already gone */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** One session per guild. */
|
||||
const sessions = new Map<string, VoiceSession>();
|
||||
|
||||
export async function joinChannel(channel: VoiceBasedChannel): Promise<VoiceSession> {
|
||||
sessions.get(channel.guild.id)?.destroy();
|
||||
const session = new VoiceSession(channel);
|
||||
sessions.set(channel.guild.id, session);
|
||||
await session.ready();
|
||||
return session;
|
||||
}
|
||||
|
||||
export function leaveGuild(guildId: string): boolean {
|
||||
const s = sessions.get(guildId);
|
||||
if (!s) return false;
|
||||
s.destroy();
|
||||
sessions.delete(guildId);
|
||||
return true;
|
||||
}
|
||||
|
||||
export function getSession(guildId: string): VoiceSession | undefined {
|
||||
return sessions.get(guildId);
|
||||
}
|
||||
Reference in New Issue
Block a user