diff --git a/.env.example b/.env.example index 198dd04..2c84a53 100644 --- a/.env.example +++ b/.env.example @@ -11,6 +11,8 @@ DISCORD_BOT_TOKEN= DISCORD_APP_ID= # The (single) server this bot serves. Guild-scoped commands appear instantly. DISCORD_GUILD_ID= +# Voice channel used by the stream-test scripts (bot/scripts/stream-test). +DISCORD_VOICE_CHANNEL_ID= # --------------------------------------------------------------------------- # Brain bridge (Python service in bridge/) — STT + reply engine + TTS diff --git a/bot/scripts/stream-test/README.md b/bot/scripts/stream-test/README.md new file mode 100644 index 0000000..b721089 --- /dev/null +++ b/bot/scripts/stream-test/README.md @@ -0,0 +1,39 @@ +# stream-test + +Operational scripts for manually verifying the selfbot Go-Live broadcast with a +real browsing session captured from the X display. + +## Files +- `stream-hold.ts` - joins the voice channel and keeps the Go-Live stream up + until stopped. All params from `.env` (`DISCORD_SELFBOT_TOKEN`, + `DISCORD_GUILD_ID`, `DISCORD_VOICE_CHANNEL_ID`, `VNC_RESOLUTION`, + `VNC_FRAMERATE`, `VNC_BITRATE_KBPS`, `STREAM_HW`, `VNC_DISPLAY`). +- `human.mjs` - human-like interaction helpers. Real mouse/keyboard via + `xdotool` (so the cursor is visible in the stream); Playwright locates + elements. Behind-the-scenes control (fullscreen, play, quality, autoplay, + navigation) uses the CDP/DOM API. +- `scenario.mjs` - the browse scenario (YouTube -> IU live -> 1080p -> + fullscreen -> Naver -> 나무위키), driven with the human helpers. Connects to a + Chrome already running with `--remote-debugging-port` (`CDP_PORT`, default + 9222) on the streamed display. + +## Run +``` +# keep the broadcast up (separate process / service) +bun bot/scripts/stream-test/stream-hold.ts + +# Chrome on the streamed display with remote debugging, then: +node bot/scripts/stream-test/scenario.mjs +``` + +## A/B framerate/resolution +Lower settings to compare what Discord actually delivers to viewers, e.g.: +``` +VNC_RESOLUTION=1280x720 VNC_FRAMERATE=30 bun bot/scripts/stream-test/stream-hold.ts +``` + +## Notes +- Selfbot streaming violates Discord ToS; use a burner account. +- Requires `xdotool`, an X display, and a system `ffmpeg` with `x11grab`/nvenc. +- Prereqs (`playwright`, system Chrome) are not bot dependencies; install + separately where you run the scenario. diff --git a/bot/scripts/stream-test/human.mjs b/bot/scripts/stream-test/human.mjs new file mode 100644 index 0000000..1ca3828 --- /dev/null +++ b/bot/scripts/stream-test/human.mjs @@ -0,0 +1,103 @@ +// Human-like interaction helpers: drive the REAL X mouse/keyboard via xdotool +// so the cursor visibly moves and is captured by the screen stream, using +// Playwright only to LOCATE elements and read state. This is the default +// interaction mode for the browse scenarios. +// +// Note: only the user-visible browsing actions are real input (cursor move, +// click, scroll, char-by-char typing). Behind-the-scenes control (window +// fullscreen, play, quality, autoplay toggle, page navigation, and click +// fallbacks) intentionally uses the CDP/DOM API for reliability. +import { execFile } from 'node:child_process'; + +const DISPLAY = process.env.VNC_DISPLAY || ':1'; +const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); +const rand = (a, b) => a + Math.random() * (b - a); +const xdo = (args) => + new Promise((res, rej) => + execFile('xdotool', args, { env: { ...process.env, DISPLAY } }, (e, so) => (e ? rej(e) : res(so || ''))), + ); + +let cur = { x: 960, y: 540 }; +const easeInOut = (t) => (t < 0.5 ? 2 * t * t : 1 - Math.pow(-2 * t + 2, 2) / 2); + +async function contentOrigin(page) { + const g = await page.evaluate(() => ({ + sx: window.screenX, sy: window.screenY, + ow: window.outerWidth, oh: window.outerHeight, + iw: window.innerWidth, ih: window.innerHeight, + })); + const bx = Math.max(0, Math.round((g.ow - g.iw) / 2)); + const topInset = Math.max(0, g.oh - g.ih - bx); + return { ox: g.sx + bx, oy: g.sy + topInset }; +} + +// Smoothly move the real cursor to a screen point with eased, slightly jittered steps. +export async function humanMove(toX, toY) { + const steps = Math.max(12, Math.min(48, Math.round(Math.hypot(toX - cur.x, toY - cur.y) / 22))); + const cmd = []; + for (let i = 1; i <= steps; i++) { + const t = easeInOut(i / steps); + const jx = i < steps ? rand(-1.5, 1.5) : 0; + const jy = i < steps ? rand(-1.5, 1.5) : 0; + cmd.push('mousemove', String(Math.round(cur.x + (toX - cur.x) * t + jx)), + String(Math.round(cur.y + (toY - cur.y) * t + jy)), + 'sleep', rand(0.006, 0.018).toFixed(3)); + } + await xdo(cmd); + cur = { x: toX, y: toY }; + await sleep(rand(40, 130)); +} + +export async function humanClickXY(sx, sy) { + await humanMove(sx, sy); + await sleep(rand(60, 170)); + await xdo(['click', '1']); + await sleep(rand(130, 300)); +} + +// Locate a Playwright element, move the real cursor into it (random offset), click. +export async function humanClick(page, locator) { + await locator.scrollIntoViewIfNeeded().catch(() => {}); + await sleep(rand(150, 380)); + const box = await locator.boundingBox(); + if (!box) { await locator.click({ timeout: 5000 }).catch(() => {}); return; } + const { ox, oy } = await contentOrigin(page); + const sx = Math.round(ox + box.x + box.width * rand(0.35, 0.65)); + const sy = Math.round(oy + box.y + box.height * rand(0.35, 0.65)); + await humanClickXY(sx, sy); +} + +// Type text one character at a time at a human, slightly irregular pace. +export async function humanType(text) { + await sleep(rand(220, 420)); // let focus settle so the 1st char isn't dropped + for (const ch of text) { + await xdo(['type', '--clearmodifiers', '--', ch]); + await sleep(rand(70, 200)); + if (Math.random() < 0.12) await sleep(rand(150, 400)); // occasional pause + } +} + +export async function pressKey(key) { + await xdo(['key', '--clearmodifiers', key]); + await sleep(rand(120, 280)); +} + +// Gradual wheel scroll (dir>0 = down). Optionally hover over an element first. +export async function humanScroll(page, dir, notches, overLocator) { + if (overLocator) { + const box = await overLocator.boundingBox().catch(() => null); + if (box) { + const { ox, oy } = await contentOrigin(page); + await humanMove(Math.round(ox + box.x + box.width / 2), Math.round(oy + box.y + box.height / 2)); + } + } + const button = dir > 0 ? '5' : '4'; + for (let i = 0; i < notches; i++) { + await xdo(['click', button]); + await sleep(rand(40, 115)); + if (i % 6 === 5) await sleep(rand(250, 600)); // pause like reading + } + await sleep(rand(250, 600)); +} + +export { sleep, rand }; diff --git a/bot/scripts/stream-test/scenario.mjs b/bot/scripts/stream-test/scenario.mjs new file mode 100644 index 0000000..85f5e99 --- /dev/null +++ b/bot/scripts/stream-test/scenario.mjs @@ -0,0 +1,77 @@ +// Browse scenario driven with human-like real mouse/keyboard (see human.mjs). +// Connects to a Chrome already running with --remote-debugging-port (default +// 9222) on the streamed X display, and performs: +// YouTube search -> open IU live concert -> 1080p -> watch 20s -> fullscreen +// 20s -> Naver search 아이유 -> scroll -> 나무위키 -> scroll. +// +// Real input (visible on the stream): search/Naver typing, clicking the video, +// the fullscreen button, scrolling, entering 나무위키. +// API-driven (behind the scenes): window fullscreen, play, quality, autoplay +// toggle, page navigation, and click fallbacks. +import { chromium } from 'playwright'; +import { humanClick, humanType, pressKey, humanScroll, sleep } from './human.mjs'; + +const CDP = process.env.CDP_PORT || '9222'; +const VID = process.env.TEST_VIDEO_ID || 'X_am71G6Vy4'; // IU HEREH WORLD TOUR (live, 1080p+) +const SEARCH = process.env.TEST_YT_QUERY || '내손을잡아'; +const NAVER_Q = process.env.TEST_NAVER_QUERY || '아이유'; + +const b = await chromium.connectOverCDP(`http://localhost:${CDP}`); +const ctx = b.contexts()[0]; +const page = ctx.pages()[0]; +const s = await ctx.newCDPSession(page); +page.setDefaultTimeout(25000); +const winState = async (st) => { const { windowId } = await s.send('Browser.getWindowForTarget'); await s.send('Browser.setWindowBounds', { windowId, bounds: { windowState: st } }); }; +const ensurePlaying = () => page.evaluate(() => { const v = document.querySelector('video'); const p = document.getElementById('movie_player'); try { p && p.playVideo && p.playVideo(); } catch {} if (v && v.paused) v.play().catch(() => {}); }); +const autoplayOff = () => page.evaluate(() => { const btn = document.querySelector('.ytp-autonav-toggle-button'); if (btn && btn.getAttribute('aria-checked') === 'true') btn.click(); }); + +await page.evaluate(() => { if (document.fullscreenElement) document.exitFullscreen?.(); }); +await winState('normal'); +await page.goto('https://www.youtube.com', { waitUntil: 'domcontentloaded' }); await sleep(2500); + +await humanClick(page, page.locator('input#search, input[name=search_query]').first()); +await humanType(SEARCH); +await pressKey('Return'); +await sleep(3800); + +let link = page.locator(`a#video-title[href*="${VID}"], a[href*="${VID}"]`).first(); +if (!(await link.count().catch(() => 0))) link = page.locator('ytd-video-renderer a#video-title, ytd-rich-item-renderer a#video-title').first(); +await humanClick(page, link); +await sleep(3000); +if (!/watch/.test(page.url())) await page.goto('https://www.youtube.com/watch?v=' + VID, { waitUntil: 'domcontentloaded' }); +await page.waitForSelector('#movie_player', { timeout: 25000 }); await sleep(2500); +for (let i = 0; i < 8; i++) { const ad = page.locator('.ytp-ad-skip-button, .ytp-ad-skip-button-modern, .ytp-skip-ad-button'); if (await ad.count().catch(() => 0)) { await ad.first().click({ timeout: 1500 }).catch(() => {}); await sleep(1200); } else break; } + +await ensurePlaying(); await sleep(1200); +await page.evaluate(() => { const p = document.getElementById('movie_player'); try { const L = p.getAvailableQualityLevels ? p.getAvailableQualityLevels() : []; const w = L.includes('hd1080') ? 'hd1080' : (L[0]); p.setPlaybackQualityRange && p.setPlaybackQualityRange(w, w); } catch {} }); +await autoplayOff(); +console.log('STEP watch-1080-windowed'); await sleep(20000); + +await winState('fullscreen'); await sleep(1200); +await humanClick(page, page.locator('.ytp-fullscreen-button')); +await sleep(800); await ensurePlaying(); +console.log('STEP fullscreen'); await sleep(20000); + +await page.evaluate(() => { if (document.fullscreenElement) document.exitFullscreen?.(); }); +await winState('normal'); await sleep(1500); + +await page.goto('https://www.naver.com', { waitUntil: 'domcontentloaded' }); await sleep(2500); +await humanClick(page, page.locator('input#query').first()); +await humanType(NAVER_Q); +await pressKey('Return'); +await sleep(2800); +await humanScroll(page, +1, 18); +console.log('STEP naver-scrolled'); + +const namu = page.locator('a[href*="namu.wiki"]').first(); +if (await namu.count().catch(() => 0)) { + await humanClick(page, namu); + await sleep(3000); + await humanScroll(page, +1, 14); + await humanScroll(page, -1, 8); + await humanScroll(page, +1, 10); + console.log('STEP namu-scrolled'); +} else console.log('STEP namu-not-found'); + +console.log('SCENARIO_DONE'); +await b.close(); diff --git a/bot/scripts/stream-test/stream-hold.ts b/bot/scripts/stream-test/stream-hold.ts new file mode 100644 index 0000000..cc2cc29 --- /dev/null +++ b/bot/scripts/stream-test/stream-hold.ts @@ -0,0 +1,47 @@ +// Persistent selfbot stream holder for manual/operational testing of the +// Go-Live broadcast. Joins the voice channel, goes live, and keeps the stream +// up until stopped (SIGTERM/SIGINT) or HOLD_MS elapses. All parameters come +// from the environment (.env). +// +// bun bot/scripts/stream-test/stream-hold.ts +// +// Requires in .env: DISCORD_SELFBOT_TOKEN, DISCORD_GUILD_ID, +// DISCORD_VOICE_CHANNEL_ID. Stream params: VNC_RESOLUTION, VNC_FRAMERATE, +// VNC_BITRATE_KBPS, STREAM_HW, VNC_DISPLAY (same vars the bot uses). +import "dotenv/config"; +import { SelfbotStreamer } from "../../src/stream/selfbot.ts"; + +const config = { + selfbotToken: process.env.DISCORD_SELFBOT_TOKEN ?? "", + vncDisplay: process.env.VNC_DISPLAY ?? ":1", + vncResolution: process.env.VNC_RESOLUTION ?? "1920x1080", + vncFramerate: parseInt(process.env.VNC_FRAMERATE ?? "60", 10), + vncBitrateKbps: parseInt(process.env.VNC_BITRATE_KBPS ?? "8000", 10), + streamHw: (process.env.STREAM_HW ?? "1") !== "0", +} as any; + +const guildId = process.env.DISCORD_GUILD_ID; +const voiceChannelId = process.env.DISCORD_VOICE_CHANNEL_ID; +if (!config.selfbotToken || !guildId || !voiceChannelId) { + console.error("Missing DISCORD_SELFBOT_TOKEN / DISCORD_GUILD_ID / DISCORD_VOICE_CHANNEL_ID in .env"); + process.exit(1); +} + +const s = new SelfbotStreamer(config); +const maxMs = parseInt(process.env.HOLD_MS ?? "7200000", 10); +let stopped = false; +const stop = async () => { + if (stopped) return; + stopped = true; + console.log("STREAM_STOPPING"); + await s.stop(); + console.log("STREAM_STOPPED"); + process.exit(0); +}; +process.on("SIGTERM", stop); +process.on("SIGINT", stop); + +const r = await s.start({ guildId, voiceChannelId } as any); +console.log(`STREAM_START: ${r} active:${s.isActive()} (${config.vncResolution}@${config.vncFramerate} ${config.vncBitrateKbps}k hw=${config.streamHw})`); +setTimeout(stop, maxMs); +setInterval(() => {}, 60000); diff --git a/bot/src/stream/selfbot.ts b/bot/src/stream/selfbot.ts index 48b2cf2..c7b1813 100644 --- a/bot/src/stream/selfbot.ts +++ b/bot/src/stream/selfbot.ts @@ -131,7 +131,12 @@ export class SelfbotStreamer implements ScreenStreamer { // RTX 5050); otherwise it falls back to software x264. const hw = this.config.streamHw; const kbps = this.config.vncBitrateKbps; - const maxKbps = Math.round(kbps * 1.5); + // The library advertises a hardcoded max_bitrate of 10 Mbps to Discord + // (BaseMediaConnection: `max_bitrate: 10000 * 1000`). If the encoder bursts + // above that negotiated ceiling, WebRTC congestion control drops packets + // and the viewer sees stutter. Cap -maxrate at 10 Mbps to stay within it. + const LIB_MAX_BITRATE_KBPS = 10000; + const maxKbps = Math.min(Math.round(kbps * 1.5), LIB_MAX_BITRATE_KBPS); const captureCodecArgs = hw ? ["-c:v", "h264_nvenc", "-preset", "p4", "-tune", "ll", "-forced-idr", "1"] : ["-c:v", "libx264", "-preset", "ultrafast", "-tune", "zerolatency"];