feat(selfbot): broadcast desktop audio + smart subtitles in the browse scenario

Two broadcast-experience improvements:

- Audio: the Go-Live stream was video-only. Capture the desktop sound (the
  default PipeWire/Pulse sink monitor, @DEFAULT_MONITOR@) as a second ffmpeg
  input and mux AAC into the mpegts; the library re-encodes it to Opus for
  Discord. Controlled by STREAM_AUDIO / STREAM_AUDIO_SOURCE (default on). ffmpeg
  inherits XDG_RUNTIME_DIR to reach the pulse socket. Verified: the streamer now
  reports "Found audio stream" and the monitor carries Chrome audio (~-11 dB).
- Subtitles: in the browse scenario, default captions OFF, but auto-enable a
  Korean track when the video offers one (getOption captions tracklist ->
  setOption / unloadModule).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
javis-bot
2026-06-10 15:50:32 +09:00
parent c6a0ca4572
commit 208fbbc851
5 changed files with 44 additions and 0 deletions

View File

@@ -78,6 +78,11 @@ DISCORD_SELFBOT_TOKEN=
# Hardware (NVENC) encode for the stream. 1 = use the GPU (recommended for # Hardware (NVENC) encode for the stream. 1 = use the GPU (recommended for
# 1080p60), 0 = software x264. Requires an NVIDIA GPU + ffmpeg built with nvenc. # 1080p60), 0 = software x264. Requires an NVIDIA GPU + ffmpeg built with nvenc.
STREAM_HW=1 STREAM_HW=1
# Capture desktop audio into the broadcast so the stream has sound. 1 = on,
# 0 = mute. Pulls the PipeWire/Pulse monitor of the default sink; override the
# source with STREAM_AUDIO_SOURCE (e.g. a specific "<sink>.monitor").
STREAM_AUDIO=1
STREAM_AUDIO_SOURCE=@DEFAULT_MONITOR@
# --- novnc backend --- # --- novnc backend ---
# e.g. http://192.168.10.9:6080/vnc.html (websockify --web=/usr/share/novnc 6080 localhost:5901) # e.g. http://192.168.10.9:6080/vnc.html (websockify --web=/usr/share/novnc 6080 localhost:5901)

View File

@@ -43,6 +43,23 @@ async function browserFullscreen(on) {
} catch { /* best-effort */ } } catch { /* best-effort */ }
} }
// Subtitles: OFF by default, but turn ON Korean when a Korean track exists.
async function applyCaptions() {
await read(() => { try { document.getElementById('movie_player')?.loadModule?.('captions'); } catch {} });
await sleep(800);
return read(() => {
const pl = document.getElementById('movie_player');
if (!pl || !pl.getOption) return 'no-player';
let tracks = [];
try { tracks = pl.getOption('captions', 'tracklist') || []; } catch {}
const ko = tracks.find((t) => /^ko/i.test(t.languageCode || ''));
if (ko) { try { pl.setOption('captions', 'track', { languageCode: ko.languageCode }); } catch {} return 'ko-on'; }
try { pl.setOption('captions', 'track', {}); } catch {}
try { pl.unloadModule('captions'); } catch {}
return 'off';
});
}
// 1) open YouTube by typing the URL in the address bar // 1) open YouTube by typing the URL in the address bar
await navigateOmnibox('https://www.youtube.com'); await sleep(3000); await navigateOmnibox('https://www.youtube.com'); await sleep(3000);
@@ -84,6 +101,7 @@ async function setQuality1080() {
return null; return null;
} }
console.log('QUALITY', await setQuality1080()); console.log('QUALITY', await setQuality1080());
console.log('CAPTIONS', await applyCaptions());
// 6) turn off autoplay with a real click if it is on // 6) turn off autoplay with a real click if it is on
const auto = page.locator('.ytp-autonav-toggle-button'); const auto = page.locator('.ytp-autonav-toggle-button');

View File

@@ -18,6 +18,8 @@ const config = {
vncFramerate: parseInt(process.env.VNC_FRAMERATE ?? "60", 10), vncFramerate: parseInt(process.env.VNC_FRAMERATE ?? "60", 10),
vncBitrateKbps: parseInt(process.env.VNC_BITRATE_KBPS ?? "8000", 10), vncBitrateKbps: parseInt(process.env.VNC_BITRATE_KBPS ?? "8000", 10),
streamHw: (process.env.STREAM_HW ?? "1") !== "0", streamHw: (process.env.STREAM_HW ?? "1") !== "0",
streamAudio: (process.env.STREAM_AUDIO ?? "1") !== "0",
streamAudioSource: process.env.STREAM_AUDIO_SOURCE ?? "@DEFAULT_MONITOR@",
} as any; } as any;
const guildId = process.env.DISCORD_GUILD_ID; const guildId = process.env.DISCORD_GUILD_ID;

View File

@@ -42,6 +42,11 @@ export const config = {
selfbotToken: opt("DISCORD_SELFBOT_TOKEN"), selfbotToken: opt("DISCORD_SELFBOT_TOKEN"),
// Use NVENC hardware encode + hw-accelerated decode for the stream (RTX 5050). // Use NVENC hardware encode + hw-accelerated decode for the stream (RTX 5050).
streamHw: opt("STREAM_HW", "1") !== "0", streamHw: opt("STREAM_HW", "1") !== "0",
// Capture desktop audio into the broadcast so the stream has sound. Pulls the
// PipeWire/Pulse monitor of the default sink (what the desktop plays). Set
// STREAM_AUDIO=0 to mute; STREAM_AUDIO_SOURCE overrides the capture source.
streamAudio: opt("STREAM_AUDIO", "1") !== "0",
streamAudioSource: opt("STREAM_AUDIO_SOURCE", "@DEFAULT_MONITOR@"),
// novnc backend // novnc backend
novncUrl: opt("NOVNC_URL", ""), novncUrl: opt("NOVNC_URL", ""),

View File

@@ -143,17 +143,31 @@ export class SelfbotStreamer implements ScreenStreamer {
const captureCodecArgs = hw const captureCodecArgs = hw
? ["-c:v", "h264_nvenc", "-preset", "p4", "-tune", "ll", "-forced-idr", "1"] ? ["-c:v", "h264_nvenc", "-preset", "p4", "-tune", "ll", "-forced-idr", "1"]
: ["-c:v", "libx264", "-preset", "ultrafast", "-tune", "zerolatency"]; : ["-c:v", "libx264", "-preset", "ultrafast", "-tune", "zerolatency"];
// Optionally pull desktop audio (the default sink's PipeWire/Pulse monitor)
// so the broadcast has sound. We add it as a second input and mux AAC into
// the mpegts; the library re-encodes it to Opus for Discord. ffmpeg needs
// XDG_RUNTIME_DIR (inherited) to reach the pulse socket. -map is required
// once there are two inputs.
const audioOn = this.config.streamAudio;
const audioInput = audioOn ? ["-f", "pulse", "-i", this.config.streamAudioSource] : [];
const audioMap = audioOn ? ["-map", "0:v:0", "-map", "1:a:0"] : [];
const audioCodec = audioOn ? ["-c:a", "aac", "-b:a", "160k", "-ar", "48000", "-ac", "2"] : [];
capture = this.capture = spawn("ffmpeg", [ capture = this.capture = spawn("ffmpeg", [
"-loglevel", "error", "-loglevel", "error",
"-thread_queue_size", "1024",
"-f", "x11grab", "-f", "x11grab",
"-framerate", String(this.config.vncFramerate), "-framerate", String(this.config.vncFramerate),
"-video_size", this.config.vncResolution, "-video_size", this.config.vncResolution,
"-i", this.config.vncDisplay, "-i", this.config.vncDisplay,
...(audioOn ? ["-thread_queue_size", "1024"] : []),
...audioInput,
...audioMap,
...captureCodecArgs, ...captureCodecArgs,
"-b:v", `${kbps}k`, "-maxrate", `${maxKbps}k`, "-bufsize", `${kbps}k`, "-b:v", `${kbps}k`, "-maxrate", `${maxKbps}k`, "-bufsize", `${kbps}k`,
"-bf", "0", "-bf", "0",
"-pix_fmt", "yuv420p", "-pix_fmt", "yuv420p",
"-g", String(this.config.vncFramerate), "-g", String(this.config.vncFramerate),
...audioCodec,
"-f", "mpegts", "pipe:1", "-f", "mpegts", "pipe:1",
]); ]);
capture.stderr?.on("data", (d) => { capture.stderr?.on("data", (d) => {