From 208fbbc8510b4b75cc51a044bdc4ef3c459aa9cc Mon Sep 17 00:00:00 2001 From: javis-bot Date: Wed, 10 Jun 2026 15:50:32 +0900 Subject: [PATCH] feat(selfbot): broadcast desktop audio + smart subtitles in the browse scenario Two broadcast-experience improvements: - Audio: the Go-Live stream was video-only. Capture the desktop sound (the default PipeWire/Pulse sink monitor, @DEFAULT_MONITOR@) as a second ffmpeg input and mux AAC into the mpegts; the library re-encodes it to Opus for Discord. Controlled by STREAM_AUDIO / STREAM_AUDIO_SOURCE (default on). ffmpeg inherits XDG_RUNTIME_DIR to reach the pulse socket. Verified: the streamer now reports "Found audio stream" and the monitor carries Chrome audio (~-11 dB). - Subtitles: in the browse scenario, default captions OFF, but auto-enable a Korean track when the video offers one (getOption captions tracklist -> setOption / unloadModule). Co-Authored-By: Claude Opus 4.7 --- .env.example | 5 +++++ bot/scripts/stream-test/scenario.mjs | 18 ++++++++++++++++++ bot/scripts/stream-test/stream-hold.ts | 2 ++ bot/src/config.ts | 5 +++++ bot/src/stream/selfbot.ts | 14 ++++++++++++++ 5 files changed, 44 insertions(+) diff --git a/.env.example b/.env.example index a4ca8de..867e6f7 100644 --- a/.env.example +++ b/.env.example @@ -78,6 +78,11 @@ DISCORD_SELFBOT_TOKEN= # Hardware (NVENC) encode for the stream. 1 = use the GPU (recommended for # 1080p60), 0 = software x264. Requires an NVIDIA GPU + ffmpeg built with nvenc. STREAM_HW=1 +# Capture desktop audio into the broadcast so the stream has sound. 1 = on, +# 0 = mute. Pulls the PipeWire/Pulse monitor of the default sink; override the +# source with STREAM_AUDIO_SOURCE (e.g. a specific ".monitor"). +STREAM_AUDIO=1 +STREAM_AUDIO_SOURCE=@DEFAULT_MONITOR@ # --- novnc backend --- # e.g. http://192.168.10.9:6080/vnc.html (websockify --web=/usr/share/novnc 6080 localhost:5901) diff --git a/bot/scripts/stream-test/scenario.mjs b/bot/scripts/stream-test/scenario.mjs index 8d18733..8c8e5c8 100644 --- a/bot/scripts/stream-test/scenario.mjs +++ b/bot/scripts/stream-test/scenario.mjs @@ -43,6 +43,23 @@ async function browserFullscreen(on) { } catch { /* best-effort */ } } +// Subtitles: OFF by default, but turn ON Korean when a Korean track exists. +async function applyCaptions() { + await read(() => { try { document.getElementById('movie_player')?.loadModule?.('captions'); } catch {} }); + await sleep(800); + return read(() => { + const pl = document.getElementById('movie_player'); + if (!pl || !pl.getOption) return 'no-player'; + let tracks = []; + try { tracks = pl.getOption('captions', 'tracklist') || []; } catch {} + const ko = tracks.find((t) => /^ko/i.test(t.languageCode || '')); + if (ko) { try { pl.setOption('captions', 'track', { languageCode: ko.languageCode }); } catch {} return 'ko-on'; } + try { pl.setOption('captions', 'track', {}); } catch {} + try { pl.unloadModule('captions'); } catch {} + return 'off'; + }); +} + // 1) open YouTube by typing the URL in the address bar await navigateOmnibox('https://www.youtube.com'); await sleep(3000); @@ -84,6 +101,7 @@ async function setQuality1080() { return null; } console.log('QUALITY', await setQuality1080()); +console.log('CAPTIONS', await applyCaptions()); // 6) turn off autoplay with a real click if it is on const auto = page.locator('.ytp-autonav-toggle-button'); diff --git a/bot/scripts/stream-test/stream-hold.ts b/bot/scripts/stream-test/stream-hold.ts index cc2cc29..27298eb 100644 --- a/bot/scripts/stream-test/stream-hold.ts +++ b/bot/scripts/stream-test/stream-hold.ts @@ -18,6 +18,8 @@ const config = { vncFramerate: parseInt(process.env.VNC_FRAMERATE ?? "60", 10), vncBitrateKbps: parseInt(process.env.VNC_BITRATE_KBPS ?? "8000", 10), streamHw: (process.env.STREAM_HW ?? "1") !== "0", + streamAudio: (process.env.STREAM_AUDIO ?? "1") !== "0", + streamAudioSource: process.env.STREAM_AUDIO_SOURCE ?? "@DEFAULT_MONITOR@", } as any; const guildId = process.env.DISCORD_GUILD_ID; diff --git a/bot/src/config.ts b/bot/src/config.ts index 83e516d..7f2e861 100644 --- a/bot/src/config.ts +++ b/bot/src/config.ts @@ -42,6 +42,11 @@ export const config = { selfbotToken: opt("DISCORD_SELFBOT_TOKEN"), // Use NVENC hardware encode + hw-accelerated decode for the stream (RTX 5050). streamHw: opt("STREAM_HW", "1") !== "0", + // Capture desktop audio into the broadcast so the stream has sound. Pulls the + // PipeWire/Pulse monitor of the default sink (what the desktop plays). Set + // STREAM_AUDIO=0 to mute; STREAM_AUDIO_SOURCE overrides the capture source. + streamAudio: opt("STREAM_AUDIO", "1") !== "0", + streamAudioSource: opt("STREAM_AUDIO_SOURCE", "@DEFAULT_MONITOR@"), // novnc backend novncUrl: opt("NOVNC_URL", ""), diff --git a/bot/src/stream/selfbot.ts b/bot/src/stream/selfbot.ts index f878172..51822bb 100644 --- a/bot/src/stream/selfbot.ts +++ b/bot/src/stream/selfbot.ts @@ -143,17 +143,31 @@ export class SelfbotStreamer implements ScreenStreamer { const captureCodecArgs = hw ? ["-c:v", "h264_nvenc", "-preset", "p4", "-tune", "ll", "-forced-idr", "1"] : ["-c:v", "libx264", "-preset", "ultrafast", "-tune", "zerolatency"]; + // Optionally pull desktop audio (the default sink's PipeWire/Pulse monitor) + // so the broadcast has sound. We add it as a second input and mux AAC into + // the mpegts; the library re-encodes it to Opus for Discord. ffmpeg needs + // XDG_RUNTIME_DIR (inherited) to reach the pulse socket. -map is required + // once there are two inputs. + const audioOn = this.config.streamAudio; + const audioInput = audioOn ? ["-f", "pulse", "-i", this.config.streamAudioSource] : []; + const audioMap = audioOn ? ["-map", "0:v:0", "-map", "1:a:0"] : []; + const audioCodec = audioOn ? ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-ac", "2"] : []; capture = this.capture = spawn("ffmpeg", [ "-loglevel", "error", + "-thread_queue_size", "1024", "-f", "x11grab", "-framerate", String(this.config.vncFramerate), "-video_size", this.config.vncResolution, "-i", this.config.vncDisplay, + ...(audioOn ? ["-thread_queue_size", "1024"] : []), + ...audioInput, + ...audioMap, ...captureCodecArgs, "-b:v", `${kbps}k`, "-maxrate", `${maxKbps}k`, "-bufsize", `${kbps}k`, "-bf", "0", "-pix_fmt", "yuv420p", "-g", String(this.config.vncFramerate), + ...audioCodec, "-f", "mpegts", "pipe:1", ]); capture.stderr?.on("data", (d) => {