// Human-like interaction helpers. Drive input with xdotool, using Playwright // only to LOCATE elements and read state. // // What xdotool actually is: it injects input events into the X server (it is // NOT a physical HID device). The browser and the captured screen receive them // as genuine pointer/keyboard input, with a visibly moving cursor. Every ACTION // here is such input: cursor move, click, char-by-char typing, key presses, and // wheel scroll - including (in scenario.mjs) navigation, quality, fullscreen and // the autoplay toggle. The CDP/DOM API is used only to READ state for // verification, never to perform an action. Elements are brought into view with // a real wheel scroll (not a DOM scrollIntoView); if an element has no on-screen // box, the click fails rather than falling back to a synthetic click. import { execFile } from 'node:child_process'; const DISPLAY = process.env.VNC_DISPLAY || ':1'; const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); const rand = (a, b) => a + Math.random() * (b - a); const xdo = (args) => new Promise((res, rej) => execFile('xdotool', args, { env: { ...process.env, DISPLAY } }, (e, so) => (e ? rej(e) : res(so || ''))), ); let cur = { x: 960, y: 540 }; const easeInOut = (t) => (t < 0.5 ? 2 * t * t : 1 - Math.pow(-2 * t + 2, 2) / 2); async function contentOrigin(page) { const g = await page.evaluate(() => ({ sx: window.screenX, sy: window.screenY, ow: window.outerWidth, oh: window.outerHeight, iw: window.innerWidth, ih: window.innerHeight, })); const bx = Math.max(0, Math.round((g.ow - g.iw) / 2)); const topInset = Math.max(0, g.oh - g.ih - bx); return { ox: g.sx + bx, oy: g.sy + topInset }; } // Smoothly move the real cursor to a screen point with eased, slightly jittered steps. export async function humanMove(toX, toY) { const steps = Math.max(12, Math.min(48, Math.round(Math.hypot(toX - cur.x, toY - cur.y) / 22))); const cmd = []; for (let i = 1; i <= steps; i++) { const t = easeInOut(i / steps); const jx = i < steps ? rand(-1.5, 1.5) : 0; const jy = i < steps ? rand(-1.5, 1.5) : 0; cmd.push('mousemove', String(Math.round(cur.x + (toX - cur.x) * t + jx)), String(Math.round(cur.y + (toY - cur.y) * t + jy)), 'sleep', rand(0.006, 0.018).toFixed(3)); } await xdo(cmd); cur = { x: toX, y: toY }; await sleep(rand(40, 130)); } export async function humanClickXY(sx, sy) { await humanMove(sx, sy); await sleep(rand(60, 170)); await xdo(['click', '1']); await sleep(rand(130, 300)); } // Bring an element into view using a REAL wheel scroll (not a DOM // scrollIntoView). Returns its viewport box, or null if it can't be revealed. async function bringIntoView(page, locator) { const ih = await page.evaluate(() => window.innerHeight); for (let i = 0; i < 14; i++) { const box = await locator.boundingBox().catch(() => null); if (box && box.y >= 70 && box.y + box.height <= ih - 70) return box; const button = box ? (box.y < 70 ? '4' : '5') : '5'; // 4=up, 5=down await xdo(['click', button]); await xdo(['click', button]); await xdo(['click', button]); await sleep(rand(120, 240)); } return await locator.boundingBox().catch(() => null); } // Locate a Playwright element, real-wheel it into view, move the real cursor // into it (random offset), and click. No synthetic-click fallback: if the // element has no on-screen box, this throws. export async function humanClick(page, locator) { await sleep(rand(150, 380)); const box = await bringIntoView(page, locator); if (!box) throw new Error('humanClick: element has no on-screen box; refusing synthetic click'); const { ox, oy } = await contentOrigin(page); const sx = Math.round(ox + box.x + box.width * rand(0.35, 0.65)); const sy = Math.round(oy + box.y + box.height * rand(0.35, 0.65)); await humanClickXY(sx, sy); } // Type text one character at a time at a human, slightly irregular pace. export async function humanType(text) { await sleep(rand(220, 420)); // let focus settle so the 1st char isn't dropped for (const ch of text) { await xdo(['type', '--clearmodifiers', '--', ch]); await sleep(rand(70, 200)); if (Math.random() < 0.12) await sleep(rand(150, 400)); // occasional pause } } export async function pressKey(key) { await xdo(['key', '--clearmodifiers', key]); await sleep(rand(120, 280)); } // Gradual wheel scroll (dir>0 = down). Optionally hover over an element first. export async function humanScroll(page, dir, notches, overLocator) { if (overLocator) { const box = await overLocator.boundingBox().catch(() => null); if (box) { const { ox, oy } = await contentOrigin(page); await humanMove(Math.round(ox + box.x + box.width / 2), Math.round(oy + box.y + box.height / 2)); } } const button = dir > 0 ? '5' : '4'; for (let i = 0; i < notches; i++) { await xdo(['click', button]); await sleep(rand(40, 115)); if (i % 6 === 5) await sleep(rand(250, 600)); // pause like reading } await sleep(rand(250, 600)); } // Press a single key (real keyboard). export async function humanKey(key) { await xdo(['key', '--clearmodifiers', key]); await sleep(rand(120, 300)); } // Navigate like a person: focus the address bar (Ctrl+L), type the URL one char // at a time, press Enter. export async function navigateOmnibox(text) { await xdo(['key', '--clearmodifiers', 'ctrl+l']); await sleep(rand(300, 600)); await humanType(text); await sleep(rand(150, 320)); await xdo(['key', '--clearmodifiers', 'Return']); } // Move the real cursor over an element (hover, no click) - e.g. to reveal a // video player's controls or to focus it for a keyboard shortcut. export async function humanHover(page, locator) { const box = await locator.boundingBox().catch(() => null); if (!box) return; const g = await page.evaluate(() => ({ sx: window.screenX, sy: window.screenY, ow: window.outerWidth, oh: window.outerHeight, iw: window.innerWidth, ih: window.innerHeight })); const bx = Math.max(0, Math.round((g.ow - g.iw) / 2)); const oy = g.sy + Math.max(0, g.oh - g.ih - bx); await humanMove(Math.round(g.sx + bx + box.x + box.width * 0.5), Math.round(oy + box.y + box.height * 0.4)); } export { sleep, rand };