Files
javis_bot/bot/src/stream/selfbot.ts
javis-bot 208fbbc851 feat(selfbot): broadcast desktop audio + smart subtitles in the browse scenario
Two broadcast-experience improvements:

- Audio: the Go-Live stream was video-only. Capture the desktop sound (the
  default PipeWire/Pulse sink monitor, @DEFAULT_MONITOR@) as a second ffmpeg
  input and mux AAC into the mpegts; the library re-encodes it to Opus for
  Discord. Controlled by STREAM_AUDIO / STREAM_AUDIO_SOURCE (default on). ffmpeg
  inherits XDG_RUNTIME_DIR to reach the pulse socket. Verified: the streamer now
  reports "Found audio stream" and the monitor carries Chrome audio (~-11 dB).
- Subtitles: in the browse scenario, default captions OFF, but auto-enable a
  Korean track when the video offers one (getOption captions tracklist ->
  setOption / unloadModule).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-10 15:50:32 +09:00

312 lines
13 KiB
TypeScript

/**
* Selfbot live-stream backend (default).
*
* Streams the VNC X display (:1) into the voice channel as a real Discord
* "Go Live" broadcast. Discord blocks video from *bot* accounts, so this path
* requires a USER account token (a "selfbot"), which violates Discord ToS and
* can get the account banned. Use a throwaway/burner account, never your main.
*
* Dependencies are optional (native) and dynamically imported so the core bot
* installs/runs without them:
* bun add discord.js-selfbot-v13 @dank074/discord-video-stream
* bun pm trust @dank074/node-av node-datachannel # build native deps
*
* API targets @dank074/discord-video-stream v6 (verified against its d.ts):
* new Streamer(client) -> joinVoice(guildId, channelId)
* prepareStream(input, opts, signal) -> { command, output }
* playStream(output, streamer, { type: "go-live" }, signal)
*/
import { spawn, type ChildProcess } from "node:child_process";
import type { AppConfig } from "../config.ts";
import type { ScreenStreamer, StreamContext } from "./index.ts";
import { VncKeepalive, resolveVncPassword, vncPortForDisplay } from "./vnc-keepalive.ts";
export class SelfbotStreamer implements ScreenStreamer {
readonly kind = "selfbot" as const;
private streamer: any = null;
private capture: ChildProcess | null = null;
private keepalive: VncKeepalive | null = null;
private controller: AbortController | null = null;
private active = false;
constructor(private config: AppConfig) {}
isActive() {
return this.active;
}
/**
* Wait a randomised, human-plausible amount of time. Resolves immediately if
* the stream is aborted (stop()) mid-wait, so teardown never hangs on a pause.
*/
private humanPause(minMs: number, maxMs: number, signal?: AbortSignal): Promise<void> {
const ms = Math.floor(minMs + Math.random() * Math.max(0, maxMs - minMs));
return new Promise((resolve) => {
if (signal?.aborted) return resolve();
const onAbort = () => {
clearTimeout(timer);
resolve();
};
const timer = setTimeout(() => {
signal?.removeEventListener("abort", onAbort);
resolve();
}, ms);
signal?.addEventListener("abort", onAbort, { once: true });
});
}
private async loadLib() {
let selfbot: any, vs: any;
try {
selfbot = await import("discord.js-selfbot-v13");
// Optional native dep; resolved at runtime only.
// @ts-ignore - optional dependency, may be absent until `bun add`ed
vs = await import("@dank074/discord-video-stream");
} catch (e) {
throw new Error(
"셀프봇 송출 의존성이 없습니다. 설치: bun add discord.js-selfbot-v13 @dank074/discord-video-stream\n" +
`원본 오류: ${(e as Error).message}`,
);
}
if (!vs.Streamer || !vs.prepareStream || !vs.playStream) {
throw new Error(
"@dank074/discord-video-stream v6 API(Streamer/prepareStream/playStream)를 찾지 못했습니다. " +
"package.json 버전을 ^6.0.0으로 맞추세요.",
);
}
return { selfbot, vs };
}
async start(ctx: StreamContext): Promise<string> {
if (this.active) return "이미 송출 중입니다.";
if (!this.config.selfbotToken) {
return "DISCORD_SELFBOT_TOKEN이 설정되지 않았습니다 (.env). 버너 계정 토큰을 넣어주세요.";
}
if (!ctx.voiceChannelId) {
return "셀프봇 송출은 음성 채널 안에서 호출해야 합니다.";
}
// Lock the starting state BEFORE any await: the human-pause delays below
// mean start() is in-flight for several seconds, so a second /stream call
// must be rejected by the `this.active` guard above, and the status must
// read "starting" rather than idle during the wait. Keep controller /
// streamer / capture as LOCAL refs so an interleaved stop() (which nulls the
// instance fields) can't turn our own continuation into a null dereference.
this.active = true;
const controller = (this.controller = new AbortController());
const signal = controller.signal;
let streamer: any = null;
let capture: ChildProcess | null = null;
let keepalive: VncKeepalive | null = null;
try {
const { selfbot, vs } = await this.loadLib();
const { Streamer, prepareStream, playStream } = vs;
signal.throwIfAborted();
streamer = this.streamer = new Streamer(new selfbot.Client());
await streamer.client.login(this.config.selfbotToken);
signal.throwIfAborted();
// Act like a person, not a bot: take a breath after coming online before
// navigating into the voice channel, then settle in for a few seconds
// before hitting "Go Live". Randomised so the cadence isn't
// fingerprintable. throwIfAborted() after each pause unwinds into the
// catch below if stop() lands mid-wait, so we never join/go-live on a
// torn-down streamer.
await this.humanPause(2500, 4500, signal);
signal.throwIfAborted();
await streamer.joinVoice(ctx.guildId, ctx.voiceChannelId);
await this.humanPause(6000, 10000, signal);
signal.throwIfAborted();
const [w, h] = this.config.vncResolution.split("x").map((n) => parseInt(n, 10));
// Capture the VNC X display with the SYSTEM ffmpeg (which reliably has
// x11grab), then pipe that stream into the library. Relying on the lib's
// bundled libav for the x11grab input device is not portable; piping the
// system ffmpeg is. (Verified live against a real voice channel.)
//
// The SYSTEM ffmpeg produces the final, Discord-ready H264 in one pass:
// target bitrate (-b:v/-maxrate), no B-frames (WebRTC requires this), a
// 1s keyframe interval, and yuv420p. The library then only REMUXES it
// (noTranscoding below) so there is no second decode/scale/encode. With
// streamHw on (default) this single encode runs on the GPU (h264_nvenc,
// RTX 5050); otherwise it falls back to software x264.
const hw = this.config.streamHw;
const kbps = this.config.vncBitrateKbps;
// The library advertises a hardcoded max_bitrate of 10 Mbps to Discord
// (BaseMediaConnection: `max_bitrate: 10000 * 1000`). If the encoder bursts
// above that negotiated ceiling, WebRTC congestion control drops packets
// and the viewer sees stutter. Cap -maxrate at 10 Mbps to stay within it.
const LIB_MAX_BITRATE_KBPS = 10000;
const maxKbps = Math.min(Math.round(kbps * 1.5), LIB_MAX_BITRATE_KBPS);
const captureCodecArgs = hw
? ["-c:v", "h264_nvenc", "-preset", "p4", "-tune", "ll", "-forced-idr", "1"]
: ["-c:v", "libx264", "-preset", "ultrafast", "-tune", "zerolatency"];
// Optionally pull desktop audio (the default sink's PipeWire/Pulse monitor)
// so the broadcast has sound. We add it as a second input and mux AAC into
// the mpegts; the library re-encodes it to Opus for Discord. ffmpeg needs
// XDG_RUNTIME_DIR (inherited) to reach the pulse socket. -map is required
// once there are two inputs.
const audioOn = this.config.streamAudio;
const audioInput = audioOn ? ["-f", "pulse", "-i", this.config.streamAudioSource] : [];
const audioMap = audioOn ? ["-map", "0:v:0", "-map", "1:a:0"] : [];
const audioCodec = audioOn ? ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-ac", "2"] : [];
capture = this.capture = spawn("ffmpeg", [
"-loglevel", "error",
"-thread_queue_size", "1024",
"-f", "x11grab",
"-framerate", String(this.config.vncFramerate),
"-video_size", this.config.vncResolution,
"-i", this.config.vncDisplay,
...(audioOn ? ["-thread_queue_size", "1024"] : []),
...audioInput,
...audioMap,
...captureCodecArgs,
"-b:v", `${kbps}k`, "-maxrate", `${maxKbps}k`, "-bufsize", `${kbps}k`,
"-bf", "0",
"-pix_fmt", "yuv420p",
"-g", String(this.config.vncFramerate),
...audioCodec,
"-f", "mpegts", "pipe:1",
]);
capture.stderr?.on("data", (d) => {
if (!signal.aborted) console.error("[selfbot x11grab]", d.toString().trim());
});
// Keep a VNC client attached for the life of the stream. TigerVNC only
// flushes its framebuffer at full rate while a client pulls updates; the
// Discord broadcast reads that framebuffer with x11grab (not as a VNC
// client), so without this the captured screen would idle at ~1.5 fps and
// the stream would look badly choppy. Fail-open: a missing password just
// skips it. Matched to the stream framerate so motion stays smooth.
const vncPw = resolveVncPassword();
if (vncPw) {
keepalive = this.keepalive = new VncKeepalive({
host: "127.0.0.1",
port: vncPortForDisplay(this.config.vncDisplay),
password: vncPw,
fps: this.config.vncFramerate,
});
keepalive.start();
}
const { command, output } = prepareStream(
capture.stdout,
{
// The capture above is already a Discord-ready H264 elementary stream,
// so the library only remuxes it (no second encode). width/height/
// frameRate are passed for signalling; encoding options are ignored
// on the copy path.
width: w || 1920,
height: h || 1080,
frameRate: this.config.vncFramerate,
videoCodec: "H264",
noTranscoding: true,
},
signal,
);
command.on("error", (err: Error) => {
if (!signal.aborted) console.error("[selfbot] ffmpeg error:", err);
});
signal.throwIfAborted();
playStream(output, streamer, { type: "go-live" }, signal)
.catch((err: Error) => {
if (!signal.aborted) console.error("[selfbot] playStream:", err);
})
.finally(() => {
// The stream ended on its own (Discord closed the Go-Live, the voice
// UDP dropped, or ffmpeg exited) rather than via stop(). If we are
// still the current attempt, tear the pipeline DOWN: kill the capture
// ffmpeg and leave voice. Otherwise the x11grab->nvenc encoder keeps
// running forever feeding a pipe nobody reads, pinning a CPU core
// while no media is actually transmitted. Skip if a concurrent
// stop()/start() already replaced the controller (it owns teardown).
if (this.controller !== controller) return;
try {
capture?.kill("SIGKILL");
} catch {
/* ignore */
}
try {
keepalive?.stop();
} catch {
/* ignore */
}
try {
streamer?.leaveVoice?.();
streamer?.client?.destroy?.();
} catch {
/* ignore */
}
if (this.capture === capture) this.capture = null;
if (this.keepalive === keepalive) this.keepalive = null;
if (this.streamer === streamer) this.streamer = null;
this.controller = null;
this.active = false;
});
return "🔴 셀프봇으로 VNC 화면을 음성채널에 실시간 송출 중입니다 (Go Live).";
} catch (e) {
// Startup was aborted (stop() during a pause) or failed. Tear down using
// our LOCAL refs, then clear instance state only if it still points at us
// (a concurrent stop()/start() may already have replaced it).
try {
capture?.kill("SIGKILL");
} catch {
/* ignore */
}
try {
keepalive?.stop();
} catch {
/* ignore */
}
try {
streamer?.leaveVoice?.();
streamer?.client?.destroy?.();
} catch {
/* ignore */
}
// Only release the lock / clear instance state if WE are still the
// current attempt. If a concurrent stop()+start() already replaced the
// controller, a newer start() owns `active` — clearing it here would
// unlock it mid-startup and let a third start() race in.
if (this.controller === controller) {
if (this.capture === capture) this.capture = null;
if (this.keepalive === keepalive) this.keepalive = null;
if (this.streamer === streamer) this.streamer = null;
this.controller = null;
this.active = false;
}
if (signal.aborted) return "송출을 시작하는 중에 중지했습니다.";
throw e;
}
}
async stop(): Promise<void> {
this.controller?.abort();
this.controller = null;
try {
this.capture?.kill("SIGKILL");
} catch {
/* ignore */
}
this.capture = null;
try {
this.keepalive?.stop();
} catch {
/* ignore */
}
this.keepalive = null;
try {
this.streamer?.leaveVoice?.();
this.streamer?.client?.destroy?.();
} catch {
/* ignore */
}
this.streamer = null;
this.active = false;
}
}