From 2cdd159fc13bb518d720c583c9f917c20e8d9181 Mon Sep 17 00:00:00 2001 From: javis-bot Date: Wed, 10 Jun 2026 14:11:58 +0900 Subject: [PATCH] feat(stream-test): drive the whole browse scenario with real input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make every action real keyboard/mouse via xdotool, not just the visible browsing: address-bar navigation (Ctrl+L + char-by-char typing), the YouTube settings gear -> 화질 -> 1080p menu (real clicks, verified hd1080), the autoplay toggle, the play button, and fullscreen via the real 'f' key (F11 isn't honored by this WM; 'f' yields true 1080p fullscreen without pausing). CDP/DOM API is now used only to read state for verification. --- bot/scripts/stream-test/README.md | 9 ++- bot/scripts/stream-test/human.mjs | 22 +++++++ bot/scripts/stream-test/scenario.mjs | 97 ++++++++++++++++++---------- 3 files changed, 91 insertions(+), 37 deletions(-) diff --git a/bot/scripts/stream-test/README.md b/bot/scripts/stream-test/README.md index b721089..68561b6 100644 --- a/bot/scripts/stream-test/README.md +++ b/bot/scripts/stream-test/README.md @@ -9,9 +9,12 @@ real browsing session captured from the X display. `DISCORD_GUILD_ID`, `DISCORD_VOICE_CHANNEL_ID`, `VNC_RESOLUTION`, `VNC_FRAMERATE`, `VNC_BITRATE_KBPS`, `STREAM_HW`, `VNC_DISPLAY`). - `human.mjs` - human-like interaction helpers. Real mouse/keyboard via - `xdotool` (so the cursor is visible in the stream); Playwright locates - elements. Behind-the-scenes control (fullscreen, play, quality, autoplay, - navigation) uses the CDP/DOM API. + `xdotool` (so the cursor is visible in the stream); Playwright only locates + elements. Every action is real input: address-bar navigation (Ctrl+L + + typing), search typing, clicking the video / settings menu / autoplay toggle / + play button, fullscreen via the `f` key, scrolling, and entering links. The + CDP/DOM API is used only to read state for verification, and as a rare click + fallback when an element has no on-screen box. - `scenario.mjs` - the browse scenario (YouTube -> IU live -> 1080p -> fullscreen -> Naver -> 나무위키), driven with the human helpers. Connects to a Chrome already running with `--remote-debugging-port` (`CDP_PORT`, default diff --git a/bot/scripts/stream-test/human.mjs b/bot/scripts/stream-test/human.mjs index 1ca3828..5b17c69 100644 --- a/bot/scripts/stream-test/human.mjs +++ b/bot/scripts/stream-test/human.mjs @@ -100,4 +100,26 @@ export async function humanScroll(page, dir, notches, overLocator) { await sleep(rand(250, 600)); } +// Press a single key (real keyboard). +export async function humanKey(key) { await xdo(['key', '--clearmodifiers', key]); await sleep(rand(120, 300)); } + +// Navigate like a person: focus the address bar (Ctrl+L), type the URL one char +// at a time, press Enter. +export async function navigateOmnibox(text) { + await xdo(['key', '--clearmodifiers', 'ctrl+l']); await sleep(rand(300, 600)); + await humanType(text); await sleep(rand(150, 320)); + await xdo(['key', '--clearmodifiers', 'Return']); +} + +// Move the real cursor over an element (hover, no click) - e.g. to reveal a +// video player's controls or to focus it for a keyboard shortcut. +export async function humanHover(page, locator) { + const box = await locator.boundingBox().catch(() => null); + if (!box) return; + const g = await page.evaluate(() => ({ sx: window.screenX, sy: window.screenY, ow: window.outerWidth, oh: window.outerHeight, iw: window.innerWidth, ih: window.innerHeight })); + const bx = Math.max(0, Math.round((g.ow - g.iw) / 2)); + const oy = g.sy + Math.max(0, g.oh - g.ih - bx); + await humanMove(Math.round(g.sx + bx + box.x + box.width * 0.5), Math.round(oy + box.y + box.height * 0.4)); +} + export { sleep, rand }; diff --git a/bot/scripts/stream-test/scenario.mjs b/bot/scripts/stream-test/scenario.mjs index 85f5e99..b1d4c9a 100644 --- a/bot/scripts/stream-test/scenario.mjs +++ b/bot/scripts/stream-test/scenario.mjs @@ -1,68 +1,97 @@ -// Browse scenario driven with human-like real mouse/keyboard (see human.mjs). -// Connects to a Chrome already running with --remote-debugging-port (default -// 9222) on the streamed X display, and performs: -// YouTube search -> open IU live concert -> 1080p -> watch 20s -> fullscreen -// 20s -> Naver search 아이유 -> scroll -> 나무위키 -> scroll. +// Browse scenario driven ENTIRELY with real mouse/keyboard input via xdotool +// (see human.mjs). Connects to a Chrome already running with +// --remote-debugging-port (default 9222) on the streamed X display. // -// Real input (visible on the stream): search/Naver typing, clicking the video, -// the fullscreen button, scrolling, entering 나무위키. -// API-driven (behind the scenes): window fullscreen, play, quality, autoplay -// toggle, page navigation, and click fallbacks. +// All ACTIONS are real input: address-bar navigation (Ctrl+L + typing), +// search typing, clicking the video, the settings gear -> 화질 -> 1080p menu, +// the autoplay toggle, the play button, fullscreen via the 'f' key, scrolling, +// and entering 나무위키. The CDP/DOM API is used ONLY to read state for +// verification (paused/quality/fullscreen) and as a rare click fallback when an +// element has no on-screen box. import { chromium } from 'playwright'; -import { humanClick, humanType, pressKey, humanScroll, sleep } from './human.mjs'; +import { humanClick, humanType, humanKey, humanHover, navigateOmnibox, humanScroll, sleep } from './human.mjs'; const CDP = process.env.CDP_PORT || '9222'; -const VID = process.env.TEST_VIDEO_ID || 'X_am71G6Vy4'; // IU HEREH WORLD TOUR (live, 1080p+) +const VID = process.env.TEST_VIDEO_ID || 'X_am71G6Vy4'; const SEARCH = process.env.TEST_YT_QUERY || '내손을잡아'; const NAVER_Q = process.env.TEST_NAVER_QUERY || '아이유'; const b = await chromium.connectOverCDP(`http://localhost:${CDP}`); const ctx = b.contexts()[0]; const page = ctx.pages()[0]; -const s = await ctx.newCDPSession(page); page.setDefaultTimeout(25000); -const winState = async (st) => { const { windowId } = await s.send('Browser.getWindowForTarget'); await s.send('Browser.setWindowBounds', { windowId, bounds: { windowState: st } }); }; -const ensurePlaying = () => page.evaluate(() => { const v = document.querySelector('video'); const p = document.getElementById('movie_player'); try { p && p.playVideo && p.playVideo(); } catch {} if (v && v.paused) v.play().catch(() => {}); }); -const autoplayOff = () => page.evaluate(() => { const btn = document.querySelector('.ytp-autonav-toggle-button'); if (btn && btn.getAttribute('aria-checked') === 'true') btn.click(); }); +const read = (fn) => page.evaluate(fn); +const playerLoc = () => page.locator('#movie_player'); -await page.evaluate(() => { if (document.fullscreenElement) document.exitFullscreen?.(); }); -await winState('normal'); -await page.goto('https://www.youtube.com', { waitUntil: 'domcontentloaded' }); await sleep(2500); +// 1) open YouTube by typing the URL in the address bar +await navigateOmnibox('https://www.youtube.com'); await sleep(3000); +// 2) really type the search and submit await humanClick(page, page.locator('input#search, input[name=search_query]').first()); await humanType(SEARCH); -await pressKey('Return'); +await humanKey('Return'); await sleep(3800); +// 3) click the IU concert result with the real mouse let link = page.locator(`a#video-title[href*="${VID}"], a[href*="${VID}"]`).first(); if (!(await link.count().catch(() => 0))) link = page.locator('ytd-video-renderer a#video-title, ytd-rich-item-renderer a#video-title').first(); await humanClick(page, link); -await sleep(3000); -if (!/watch/.test(page.url())) await page.goto('https://www.youtube.com/watch?v=' + VID, { waitUntil: 'domcontentloaded' }); -await page.waitForSelector('#movie_player', { timeout: 25000 }); await sleep(2500); -for (let i = 0; i < 8; i++) { const ad = page.locator('.ytp-ad-skip-button, .ytp-ad-skip-button-modern, .ytp-skip-ad-button'); if (await ad.count().catch(() => 0)) { await ad.first().click({ timeout: 1500 }).catch(() => {}); await sleep(1200); } else break; } +await sleep(3500); +await page.waitForSelector('#movie_player', { timeout: 25000 }); await sleep(2000); +for (let i = 0; i < 8; i++) { const ad = page.locator('.ytp-ad-skip-button, .ytp-ad-skip-button-modern, .ytp-skip-ad-button'); if (await ad.count().catch(() => 0)) { await humanClick(page, ad.first()); await sleep(1200); } else break; } -await ensurePlaying(); await sleep(1200); -await page.evaluate(() => { const p = document.getElementById('movie_player'); try { const L = p.getAvailableQualityLevels ? p.getAvailableQualityLevels() : []; const w = L.includes('hd1080') ? 'hd1080' : (L[0]); p.setPlaybackQualityRange && p.setPlaybackQualityRange(w, w); } catch {} }); -await autoplayOff(); +// 4) if paused, press play with the real mouse +if (await read(() => document.querySelector('video')?.paused)) { + const big = page.locator('.ytp-large-play-button, .ytp-play-button').first(); + await humanClick(page, big); +} +await sleep(1500); + +// 5) set 1080p through the real settings menu (gear -> 화질 -> 1080p), verify +async function setQuality1080() { + for (let attempt = 0; attempt < 2; attempt++) { + await humanHover(page, playerLoc()); + await humanClick(page, page.locator('.ytp-settings-button')); await sleep(900); + let qrow = page.locator('.ytp-menuitem', { hasText: /화질|Quality/ }).first(); + if (!(await qrow.count().catch(() => 0))) qrow = page.locator('.ytp-panel-menu .ytp-menuitem').last(); + await humanClick(page, qrow); await sleep(900); + const item = page.locator('.ytp-menuitem', { hasText: /1080/ }).first(); + if (await item.count().catch(() => 0)) await humanClick(page, item); + await sleep(2000); + const q = await read(() => document.getElementById('movie_player')?.getPlaybackQuality?.()); + if (q && /1080/.test(q)) return q; + } + return null; +} +console.log('QUALITY', await setQuality1080()); + +// 6) turn off autoplay with a real click if it is on +const auto = page.locator('.ytp-autonav-toggle-button'); +if ((await auto.count().catch(() => 0)) && (await auto.getAttribute('aria-checked').catch(() => null)) === 'true') { + await humanHover(page, playerLoc()); + await humanClick(page, auto); +} console.log('STEP watch-1080-windowed'); await sleep(20000); -await winState('fullscreen'); await sleep(1200); -await humanClick(page, page.locator('.ytp-fullscreen-button')); -await sleep(800); await ensurePlaying(); -console.log('STEP fullscreen'); await sleep(20000); +// 7) fullscreen with the real 'f' key (hover the player to focus it), 20s +await humanHover(page, playerLoc()); +await humanKey('f'); await sleep(1500); +if (!(await read(() => !!document.fullscreenElement))) { await humanHover(page, playerLoc()); await humanKey('f'); await sleep(1200); } +console.log('STEP fullscreen', await read(() => ({ full: !!document.fullscreenElement, h: window.innerHeight }))); await sleep(20000); -await page.evaluate(() => { if (document.fullscreenElement) document.exitFullscreen?.(); }); -await winState('normal'); await sleep(1500); +// 8) exit fullscreen with real 'f' +await humanKey('f'); await sleep(1500); -await page.goto('https://www.naver.com', { waitUntil: 'domcontentloaded' }); await sleep(2500); +// 9) Naver via the address bar, then really type the query +await navigateOmnibox('https://www.naver.com'); await sleep(2800); await humanClick(page, page.locator('input#query').first()); await humanType(NAVER_Q); -await pressKey('Return'); +await humanKey('Return'); await sleep(2800); await humanScroll(page, +1, 18); console.log('STEP naver-scrolled'); +// 10) enter 나무위키 with a real click, then scroll const namu = page.locator('a[href*="namu.wiki"]').first(); if (await namu.count().catch(() => 0)) { await humanClick(page, namu);