fix: cap selfbot stream -maxrate at lib's 10 Mbps ceiling; add stream-test tooling

- selfbot.ts: the @dank074 lib advertises a hardcoded max_bitrate of 10 Mbps to
  Discord (BaseMediaConnection: `max_bitrate: 10000 * 1000`). Our encoder used
  -maxrate = 1.5x target (12 Mbps at 8 Mbps target), so high-motion bursts
  exceeded the negotiated ceiling and WebRTC dropped packets (viewer stutter).
  Cap -maxrate at 10 Mbps.
- Add bot/scripts/stream-test/: env-driven stream-hold.ts (persistent Go-Live
  holder), human.mjs (real xdotool mouse/keyboard + char-by-char typing), and
  scenario.mjs (YouTube/Naver browse). Channel/guild/video are env-parametrised.
- .env.example: document DISCORD_VOICE_CHANNEL_ID for the stream-test scripts.
This commit is contained in:
javis-bot
2026-06-10 12:50:24 +09:00
parent 7a148f8caa
commit 1e30a49562
6 changed files with 274 additions and 1 deletions

View File

@@ -11,6 +11,8 @@ DISCORD_BOT_TOKEN=
DISCORD_APP_ID=
# The (single) server this bot serves. Guild-scoped commands appear instantly.
DISCORD_GUILD_ID=
# Voice channel used by the stream-test scripts (bot/scripts/stream-test).
DISCORD_VOICE_CHANNEL_ID=
# ---------------------------------------------------------------------------
# Brain bridge (Python service in bridge/) — STT + reply engine + TTS

View File

@@ -0,0 +1,39 @@
# stream-test
Operational scripts for manually verifying the selfbot Go-Live broadcast with a
real browsing session captured from the X display.
## Files
- `stream-hold.ts` - joins the voice channel and keeps the Go-Live stream up
until stopped. All params from `.env` (`DISCORD_SELFBOT_TOKEN`,
`DISCORD_GUILD_ID`, `DISCORD_VOICE_CHANNEL_ID`, `VNC_RESOLUTION`,
`VNC_FRAMERATE`, `VNC_BITRATE_KBPS`, `STREAM_HW`, `VNC_DISPLAY`).
- `human.mjs` - human-like interaction helpers. Real mouse/keyboard via
`xdotool` (so the cursor is visible in the stream); Playwright locates
elements. Behind-the-scenes control (fullscreen, play, quality, autoplay,
navigation) uses the CDP/DOM API.
- `scenario.mjs` - the browse scenario (YouTube -> IU live -> 1080p ->
fullscreen -> Naver -> 나무위키), driven with the human helpers. Connects to a
Chrome already running with `--remote-debugging-port` (`CDP_PORT`, default
9222) on the streamed display.
## Run
```
# keep the broadcast up (separate process / service)
bun bot/scripts/stream-test/stream-hold.ts
# Chrome on the streamed display with remote debugging, then:
node bot/scripts/stream-test/scenario.mjs
```
## A/B framerate/resolution
Lower settings to compare what Discord actually delivers to viewers, e.g.:
```
VNC_RESOLUTION=1280x720 VNC_FRAMERATE=30 bun bot/scripts/stream-test/stream-hold.ts
```
## Notes
- Selfbot streaming violates Discord ToS; use a burner account.
- Requires `xdotool`, an X display, and a system `ffmpeg` with `x11grab`/nvenc.
- Prereqs (`playwright`, system Chrome) are not bot dependencies; install
separately where you run the scenario.

View File

@@ -0,0 +1,103 @@
// Human-like interaction helpers: drive the REAL X mouse/keyboard via xdotool
// so the cursor visibly moves and is captured by the screen stream, using
// Playwright only to LOCATE elements and read state. This is the default
// interaction mode for the browse scenarios.
//
// Note: only the user-visible browsing actions are real input (cursor move,
// click, scroll, char-by-char typing). Behind-the-scenes control (window
// fullscreen, play, quality, autoplay toggle, page navigation, and click
// fallbacks) intentionally uses the CDP/DOM API for reliability.
import { execFile } from 'node:child_process';
const DISPLAY = process.env.VNC_DISPLAY || ':1';
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
const rand = (a, b) => a + Math.random() * (b - a);
const xdo = (args) =>
new Promise((res, rej) =>
execFile('xdotool', args, { env: { ...process.env, DISPLAY } }, (e, so) => (e ? rej(e) : res(so || ''))),
);
let cur = { x: 960, y: 540 };
const easeInOut = (t) => (t < 0.5 ? 2 * t * t : 1 - Math.pow(-2 * t + 2, 2) / 2);
async function contentOrigin(page) {
const g = await page.evaluate(() => ({
sx: window.screenX, sy: window.screenY,
ow: window.outerWidth, oh: window.outerHeight,
iw: window.innerWidth, ih: window.innerHeight,
}));
const bx = Math.max(0, Math.round((g.ow - g.iw) / 2));
const topInset = Math.max(0, g.oh - g.ih - bx);
return { ox: g.sx + bx, oy: g.sy + topInset };
}
// Smoothly move the real cursor to a screen point with eased, slightly jittered steps.
export async function humanMove(toX, toY) {
const steps = Math.max(12, Math.min(48, Math.round(Math.hypot(toX - cur.x, toY - cur.y) / 22)));
const cmd = [];
for (let i = 1; i <= steps; i++) {
const t = easeInOut(i / steps);
const jx = i < steps ? rand(-1.5, 1.5) : 0;
const jy = i < steps ? rand(-1.5, 1.5) : 0;
cmd.push('mousemove', String(Math.round(cur.x + (toX - cur.x) * t + jx)),
String(Math.round(cur.y + (toY - cur.y) * t + jy)),
'sleep', rand(0.006, 0.018).toFixed(3));
}
await xdo(cmd);
cur = { x: toX, y: toY };
await sleep(rand(40, 130));
}
export async function humanClickXY(sx, sy) {
await humanMove(sx, sy);
await sleep(rand(60, 170));
await xdo(['click', '1']);
await sleep(rand(130, 300));
}
// Locate a Playwright element, move the real cursor into it (random offset), click.
export async function humanClick(page, locator) {
await locator.scrollIntoViewIfNeeded().catch(() => {});
await sleep(rand(150, 380));
const box = await locator.boundingBox();
if (!box) { await locator.click({ timeout: 5000 }).catch(() => {}); return; }
const { ox, oy } = await contentOrigin(page);
const sx = Math.round(ox + box.x + box.width * rand(0.35, 0.65));
const sy = Math.round(oy + box.y + box.height * rand(0.35, 0.65));
await humanClickXY(sx, sy);
}
// Type text one character at a time at a human, slightly irregular pace.
export async function humanType(text) {
await sleep(rand(220, 420)); // let focus settle so the 1st char isn't dropped
for (const ch of text) {
await xdo(['type', '--clearmodifiers', '--', ch]);
await sleep(rand(70, 200));
if (Math.random() < 0.12) await sleep(rand(150, 400)); // occasional pause
}
}
export async function pressKey(key) {
await xdo(['key', '--clearmodifiers', key]);
await sleep(rand(120, 280));
}
// Gradual wheel scroll (dir>0 = down). Optionally hover over an element first.
export async function humanScroll(page, dir, notches, overLocator) {
if (overLocator) {
const box = await overLocator.boundingBox().catch(() => null);
if (box) {
const { ox, oy } = await contentOrigin(page);
await humanMove(Math.round(ox + box.x + box.width / 2), Math.round(oy + box.y + box.height / 2));
}
}
const button = dir > 0 ? '5' : '4';
for (let i = 0; i < notches; i++) {
await xdo(['click', button]);
await sleep(rand(40, 115));
if (i % 6 === 5) await sleep(rand(250, 600)); // pause like reading
}
await sleep(rand(250, 600));
}
export { sleep, rand };

View File

@@ -0,0 +1,77 @@
// Browse scenario driven with human-like real mouse/keyboard (see human.mjs).
// Connects to a Chrome already running with --remote-debugging-port (default
// 9222) on the streamed X display, and performs:
// YouTube search -> open IU live concert -> 1080p -> watch 20s -> fullscreen
// 20s -> Naver search 아이유 -> scroll -> 나무위키 -> scroll.
//
// Real input (visible on the stream): search/Naver typing, clicking the video,
// the fullscreen button, scrolling, entering 나무위키.
// API-driven (behind the scenes): window fullscreen, play, quality, autoplay
// toggle, page navigation, and click fallbacks.
import { chromium } from 'playwright';
import { humanClick, humanType, pressKey, humanScroll, sleep } from './human.mjs';
const CDP = process.env.CDP_PORT || '9222';
const VID = process.env.TEST_VIDEO_ID || 'X_am71G6Vy4'; // IU HEREH WORLD TOUR (live, 1080p+)
const SEARCH = process.env.TEST_YT_QUERY || '내손을잡아';
const NAVER_Q = process.env.TEST_NAVER_QUERY || '아이유';
const b = await chromium.connectOverCDP(`http://localhost:${CDP}`);
const ctx = b.contexts()[0];
const page = ctx.pages()[0];
const s = await ctx.newCDPSession(page);
page.setDefaultTimeout(25000);
const winState = async (st) => { const { windowId } = await s.send('Browser.getWindowForTarget'); await s.send('Browser.setWindowBounds', { windowId, bounds: { windowState: st } }); };
const ensurePlaying = () => page.evaluate(() => { const v = document.querySelector('video'); const p = document.getElementById('movie_player'); try { p && p.playVideo && p.playVideo(); } catch {} if (v && v.paused) v.play().catch(() => {}); });
const autoplayOff = () => page.evaluate(() => { const btn = document.querySelector('.ytp-autonav-toggle-button'); if (btn && btn.getAttribute('aria-checked') === 'true') btn.click(); });
await page.evaluate(() => { if (document.fullscreenElement) document.exitFullscreen?.(); });
await winState('normal');
await page.goto('https://www.youtube.com', { waitUntil: 'domcontentloaded' }); await sleep(2500);
await humanClick(page, page.locator('input#search, input[name=search_query]').first());
await humanType(SEARCH);
await pressKey('Return');
await sleep(3800);
let link = page.locator(`a#video-title[href*="${VID}"], a[href*="${VID}"]`).first();
if (!(await link.count().catch(() => 0))) link = page.locator('ytd-video-renderer a#video-title, ytd-rich-item-renderer a#video-title').first();
await humanClick(page, link);
await sleep(3000);
if (!/watch/.test(page.url())) await page.goto('https://www.youtube.com/watch?v=' + VID, { waitUntil: 'domcontentloaded' });
await page.waitForSelector('#movie_player', { timeout: 25000 }); await sleep(2500);
for (let i = 0; i < 8; i++) { const ad = page.locator('.ytp-ad-skip-button, .ytp-ad-skip-button-modern, .ytp-skip-ad-button'); if (await ad.count().catch(() => 0)) { await ad.first().click({ timeout: 1500 }).catch(() => {}); await sleep(1200); } else break; }
await ensurePlaying(); await sleep(1200);
await page.evaluate(() => { const p = document.getElementById('movie_player'); try { const L = p.getAvailableQualityLevels ? p.getAvailableQualityLevels() : []; const w = L.includes('hd1080') ? 'hd1080' : (L[0]); p.setPlaybackQualityRange && p.setPlaybackQualityRange(w, w); } catch {} });
await autoplayOff();
console.log('STEP watch-1080-windowed'); await sleep(20000);
await winState('fullscreen'); await sleep(1200);
await humanClick(page, page.locator('.ytp-fullscreen-button'));
await sleep(800); await ensurePlaying();
console.log('STEP fullscreen'); await sleep(20000);
await page.evaluate(() => { if (document.fullscreenElement) document.exitFullscreen?.(); });
await winState('normal'); await sleep(1500);
await page.goto('https://www.naver.com', { waitUntil: 'domcontentloaded' }); await sleep(2500);
await humanClick(page, page.locator('input#query').first());
await humanType(NAVER_Q);
await pressKey('Return');
await sleep(2800);
await humanScroll(page, +1, 18);
console.log('STEP naver-scrolled');
const namu = page.locator('a[href*="namu.wiki"]').first();
if (await namu.count().catch(() => 0)) {
await humanClick(page, namu);
await sleep(3000);
await humanScroll(page, +1, 14);
await humanScroll(page, -1, 8);
await humanScroll(page, +1, 10);
console.log('STEP namu-scrolled');
} else console.log('STEP namu-not-found');
console.log('SCENARIO_DONE');
await b.close();

View File

@@ -0,0 +1,47 @@
// Persistent selfbot stream holder for manual/operational testing of the
// Go-Live broadcast. Joins the voice channel, goes live, and keeps the stream
// up until stopped (SIGTERM/SIGINT) or HOLD_MS elapses. All parameters come
// from the environment (.env).
//
// bun bot/scripts/stream-test/stream-hold.ts
//
// Requires in .env: DISCORD_SELFBOT_TOKEN, DISCORD_GUILD_ID,
// DISCORD_VOICE_CHANNEL_ID. Stream params: VNC_RESOLUTION, VNC_FRAMERATE,
// VNC_BITRATE_KBPS, STREAM_HW, VNC_DISPLAY (same vars the bot uses).
import "dotenv/config";
import { SelfbotStreamer } from "../../src/stream/selfbot.ts";
const config = {
selfbotToken: process.env.DISCORD_SELFBOT_TOKEN ?? "",
vncDisplay: process.env.VNC_DISPLAY ?? ":1",
vncResolution: process.env.VNC_RESOLUTION ?? "1920x1080",
vncFramerate: parseInt(process.env.VNC_FRAMERATE ?? "60", 10),
vncBitrateKbps: parseInt(process.env.VNC_BITRATE_KBPS ?? "8000", 10),
streamHw: (process.env.STREAM_HW ?? "1") !== "0",
} as any;
const guildId = process.env.DISCORD_GUILD_ID;
const voiceChannelId = process.env.DISCORD_VOICE_CHANNEL_ID;
if (!config.selfbotToken || !guildId || !voiceChannelId) {
console.error("Missing DISCORD_SELFBOT_TOKEN / DISCORD_GUILD_ID / DISCORD_VOICE_CHANNEL_ID in .env");
process.exit(1);
}
const s = new SelfbotStreamer(config);
const maxMs = parseInt(process.env.HOLD_MS ?? "7200000", 10);
let stopped = false;
const stop = async () => {
if (stopped) return;
stopped = true;
console.log("STREAM_STOPPING");
await s.stop();
console.log("STREAM_STOPPED");
process.exit(0);
};
process.on("SIGTERM", stop);
process.on("SIGINT", stop);
const r = await s.start({ guildId, voiceChannelId } as any);
console.log(`STREAM_START: ${r} active:${s.isActive()} (${config.vncResolution}@${config.vncFramerate} ${config.vncBitrateKbps}k hw=${config.streamHw})`);
setTimeout(stop, maxMs);
setInterval(() => {}, 60000);

View File

@@ -131,7 +131,12 @@ export class SelfbotStreamer implements ScreenStreamer {
// RTX 5050); otherwise it falls back to software x264.
const hw = this.config.streamHw;
const kbps = this.config.vncBitrateKbps;
const maxKbps = Math.round(kbps * 1.5);
// The library advertises a hardcoded max_bitrate of 10 Mbps to Discord
// (BaseMediaConnection: `max_bitrate: 10000 * 1000`). If the encoder bursts
// above that negotiated ceiling, WebRTC congestion control drops packets
// and the viewer sees stutter. Cap -maxrate at 10 Mbps to stay within it.
const LIB_MAX_BITRATE_KBPS = 10000;
const maxKbps = Math.min(Math.round(kbps * 1.5), LIB_MAX_BITRATE_KBPS);
const captureCodecArgs = hw
? ["-c:v", "h264_nvenc", "-preset", "p4", "-tune", "ll", "-forced-idr", "1"]
: ["-c:v", "libx264", "-preset", "ultrafast", "-tune", "zerolatency"];