fix: make humanised selfbot startup abort- and concurrency-safe

The human-pause delays leave start() in-flight for several seconds, which
exposed two races:
- stop() during a pause only ended the pause; start() continued and called
  joinVoice on the streamer stop() had already nulled (null deref).
- `active` was set only just before go-live, so a second /stream during the
  delay passed the guard and both calls raced on the same overwritten streamer.

Now start() locks `active` before any await, keeps controller/streamer/capture
as local refs, and calls signal.throwIfAborted() after each await so an
interleaved stop() unwinds into a catch that tears down via the local refs and
clears instance state only if it still points at this attempt. isActive() now
reflects "starting" during the delay too.

Verified live: concurrent start is rejected ("이미 송출 중입니다"), stop() mid-
startup returns a cancel message with isActive=false and no uncaught error, and
the happy path still goes live and tears down cleanly. tsc --noEmit passes.
This commit is contained in:
javis-bot
2026-06-10 11:42:57 +09:00
parent b6cf05f6cf
commit 2c7f0a95b5

View File

@@ -37,10 +37,9 @@ export class SelfbotStreamer implements ScreenStreamer {
* Wait a randomised, human-plausible amount of time. Resolves immediately if
* the stream is aborted (stop()) mid-wait, so teardown never hangs on a pause.
*/
private humanPause(minMs: number, maxMs: number): Promise<void> {
private humanPause(minMs: number, maxMs: number, signal?: AbortSignal): Promise<void> {
const ms = Math.floor(minMs + Math.random() * Math.max(0, maxMs - minMs));
return new Promise((resolve) => {
const signal = this.controller?.signal;
if (signal?.aborted) return resolve();
const onAbort = () => {
clearTimeout(timer);
@@ -84,85 +83,127 @@ export class SelfbotStreamer implements ScreenStreamer {
if (!ctx.voiceChannelId) {
return "셀프봇 송출은 음성 채널 안에서 호출해야 합니다.";
}
const { selfbot, vs } = await this.loadLib();
const { Streamer, prepareStream, playStream } = vs;
this.controller = new AbortController();
this.streamer = new Streamer(new selfbot.Client());
await this.streamer.client.login(this.config.selfbotToken);
// Act like a person, not a bot: take a breath after coming online before
// navigating into the voice channel, then settle in for a few seconds
// before hitting "Go Live". Randomised so the cadence isn't fingerprintable.
await this.humanPause(900, 2200);
await this.streamer.joinVoice(ctx.guildId, ctx.voiceChannelId);
await this.humanPause(2500, 5000);
const [w, h] = this.config.vncResolution.split("x").map((n) => parseInt(n, 10));
// Capture the VNC X display with the SYSTEM ffmpeg (which reliably has
// x11grab), then pipe that stream into the library. Relying on the lib's
// bundled libav for the x11grab input device is not portable; piping the
// system ffmpeg is. (Verified live against a real voice channel.)
//
// The SYSTEM ffmpeg produces the final, Discord-ready H264 in one pass:
// target bitrate (-b:v/-maxrate), no B-frames (WebRTC requires this), a
// 1s keyframe interval, and yuv420p. The library then only REMUXES it
// (noTranscoding below) so there is no second decode/scale/encode. With
// streamHw on (default) this single encode runs on the GPU (h264_nvenc,
// RTX 5050); otherwise it falls back to software x264.
const hw = this.config.streamHw;
const kbps = this.config.vncBitrateKbps;
const maxKbps = Math.round(kbps * 1.5);
const captureCodecArgs = hw
? ["-c:v", "h264_nvenc", "-preset", "p4", "-tune", "ll", "-forced-idr", "1"]
: ["-c:v", "libx264", "-preset", "ultrafast", "-tune", "zerolatency"];
const capture = spawn("ffmpeg", [
"-loglevel", "error",
"-f", "x11grab",
"-framerate", String(this.config.vncFramerate),
"-video_size", this.config.vncResolution,
"-i", this.config.vncDisplay,
...captureCodecArgs,
"-b:v", `${kbps}k`, "-maxrate", `${maxKbps}k`, "-bufsize", `${kbps}k`,
"-bf", "0",
"-pix_fmt", "yuv420p",
"-g", String(this.config.vncFramerate),
"-f", "mpegts", "pipe:1",
]);
this.capture = capture;
capture.stderr?.on("data", (d) => {
if (!this.controller?.signal.aborted) console.error("[selfbot x11grab]", d.toString().trim());
});
const { command, output } = prepareStream(
capture.stdout,
{
// The capture above is already a Discord-ready H264 elementary stream,
// so the library only remuxes it (no second encode). width/height/
// frameRate are passed for signalling; encoding options are ignored
// on the copy path.
width: w || 1920,
height: h || 1080,
frameRate: this.config.vncFramerate,
videoCodec: "H264",
noTranscoding: true,
},
this.controller.signal,
);
command.on("error", (err: Error) => {
if (!this.controller?.signal.aborted) console.error("[selfbot] ffmpeg error:", err);
});
// Lock the starting state BEFORE any await: the human-pause delays below
// mean start() is in-flight for several seconds, so a second /stream call
// must be rejected by the `this.active` guard above, and the status must
// read "starting" rather than idle during the wait. Keep controller /
// streamer / capture as LOCAL refs so an interleaved stop() (which nulls the
// instance fields) can't turn our own continuation into a null dereference.
this.active = true;
playStream(output, this.streamer, { type: "go-live" }, this.controller.signal)
.catch((err: Error) => console.error("[selfbot] playStream:", err))
.finally(() => {
this.active = false;
const controller = (this.controller = new AbortController());
const signal = controller.signal;
let streamer: any = null;
let capture: ChildProcess | null = null;
try {
const { selfbot, vs } = await this.loadLib();
const { Streamer, prepareStream, playStream } = vs;
signal.throwIfAborted();
streamer = this.streamer = new Streamer(new selfbot.Client());
await streamer.client.login(this.config.selfbotToken);
signal.throwIfAborted();
// Act like a person, not a bot: take a breath after coming online before
// navigating into the voice channel, then settle in for a few seconds
// before hitting "Go Live". Randomised so the cadence isn't
// fingerprintable. throwIfAborted() after each pause unwinds into the
// catch below if stop() lands mid-wait, so we never join/go-live on a
// torn-down streamer.
await this.humanPause(900, 2200, signal);
signal.throwIfAborted();
await streamer.joinVoice(ctx.guildId, ctx.voiceChannelId);
await this.humanPause(2500, 5000, signal);
signal.throwIfAborted();
const [w, h] = this.config.vncResolution.split("x").map((n) => parseInt(n, 10));
// Capture the VNC X display with the SYSTEM ffmpeg (which reliably has
// x11grab), then pipe that stream into the library. Relying on the lib's
// bundled libav for the x11grab input device is not portable; piping the
// system ffmpeg is. (Verified live against a real voice channel.)
//
// The SYSTEM ffmpeg produces the final, Discord-ready H264 in one pass:
// target bitrate (-b:v/-maxrate), no B-frames (WebRTC requires this), a
// 1s keyframe interval, and yuv420p. The library then only REMUXES it
// (noTranscoding below) so there is no second decode/scale/encode. With
// streamHw on (default) this single encode runs on the GPU (h264_nvenc,
// RTX 5050); otherwise it falls back to software x264.
const hw = this.config.streamHw;
const kbps = this.config.vncBitrateKbps;
const maxKbps = Math.round(kbps * 1.5);
const captureCodecArgs = hw
? ["-c:v", "h264_nvenc", "-preset", "p4", "-tune", "ll", "-forced-idr", "1"]
: ["-c:v", "libx264", "-preset", "ultrafast", "-tune", "zerolatency"];
capture = this.capture = spawn("ffmpeg", [
"-loglevel", "error",
"-f", "x11grab",
"-framerate", String(this.config.vncFramerate),
"-video_size", this.config.vncResolution,
"-i", this.config.vncDisplay,
...captureCodecArgs,
"-b:v", `${kbps}k`, "-maxrate", `${maxKbps}k`, "-bufsize", `${kbps}k`,
"-bf", "0",
"-pix_fmt", "yuv420p",
"-g", String(this.config.vncFramerate),
"-f", "mpegts", "pipe:1",
]);
capture.stderr?.on("data", (d) => {
if (!signal.aborted) console.error("[selfbot x11grab]", d.toString().trim());
});
return "🔴 셀프봇으로 VNC 화면을 음성채널에 실시간 송출 중입니다 (Go Live).";
const { command, output } = prepareStream(
capture.stdout,
{
// The capture above is already a Discord-ready H264 elementary stream,
// so the library only remuxes it (no second encode). width/height/
// frameRate are passed for signalling; encoding options are ignored
// on the copy path.
width: w || 1920,
height: h || 1080,
frameRate: this.config.vncFramerate,
videoCodec: "H264",
noTranscoding: true,
},
signal,
);
command.on("error", (err: Error) => {
if (!signal.aborted) console.error("[selfbot] ffmpeg error:", err);
});
signal.throwIfAborted();
playStream(output, streamer, { type: "go-live" }, signal)
.catch((err: Error) => {
if (!signal.aborted) console.error("[selfbot] playStream:", err);
})
.finally(() => {
// The stream ended on its own (not via stop()); release the lock.
if (this.controller === controller) this.active = false;
});
return "🔴 셀프봇으로 VNC 화면을 음성채널에 실시간 송출 중입니다 (Go Live).";
} catch (e) {
// Startup was aborted (stop() during a pause) or failed. Tear down using
// our LOCAL refs, then clear instance state only if it still points at us
// (a concurrent stop()/start() may already have replaced it).
try {
capture?.kill("SIGKILL");
} catch {
/* ignore */
}
try {
streamer?.leaveVoice?.();
streamer?.client?.destroy?.();
} catch {
/* ignore */
}
if (this.capture === capture) this.capture = null;
if (this.streamer === streamer) this.streamer = null;
if (this.controller === controller) this.controller = null;
this.active = false;
if (signal.aborted) return "송출을 시작하는 중에 중지했습니다.";
throw e;
}
}
async stop(): Promise<void> {