Files
javis_bot/bot/scripts/stream-test/human.mjs
javis-bot 8709f40fd6 fix(stream-test): refuse final box when element stays off-screen
bringIntoView returned the last boundingBox() unconditionally after the
scroll loop exhausted, so an element still outside the viewport would be
clicked anyway. Validate the final box against the actual viewport bounds
on both axes (innerWidth/innerHeight) and return null otherwise, so
humanClick fails instead of clicking an off-screen coordinate.
2026-06-10 14:18:43 +09:00

150 lines
6.6 KiB
JavaScript

// Human-like interaction helpers. Drive input with xdotool, using Playwright
// only to LOCATE elements and read state.
//
// What xdotool actually is: it injects input events into the X server (it is
// NOT a physical HID device). The browser and the captured screen receive them
// as genuine pointer/keyboard input, with a visibly moving cursor. Every ACTION
// here is such input: cursor move, click, char-by-char typing, key presses, and
// wheel scroll - including (in scenario.mjs) navigation, quality, fullscreen and
// the autoplay toggle. The CDP/DOM API is used only to READ state for
// verification, never to perform an action. Elements are brought into view with
// a real wheel scroll (not a DOM scrollIntoView); if an element has no on-screen
// box, the click fails rather than falling back to a synthetic click.
import { execFile } from 'node:child_process';
const DISPLAY = process.env.VNC_DISPLAY || ':1';
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
const rand = (a, b) => a + Math.random() * (b - a);
const xdo = (args) =>
new Promise((res, rej) =>
execFile('xdotool', args, { env: { ...process.env, DISPLAY } }, (e, so) => (e ? rej(e) : res(so || ''))),
);
let cur = { x: 960, y: 540 };
const easeInOut = (t) => (t < 0.5 ? 2 * t * t : 1 - Math.pow(-2 * t + 2, 2) / 2);
async function contentOrigin(page) {
const g = await page.evaluate(() => ({
sx: window.screenX, sy: window.screenY,
ow: window.outerWidth, oh: window.outerHeight,
iw: window.innerWidth, ih: window.innerHeight,
}));
const bx = Math.max(0, Math.round((g.ow - g.iw) / 2));
const topInset = Math.max(0, g.oh - g.ih - bx);
return { ox: g.sx + bx, oy: g.sy + topInset };
}
// Smoothly move the real cursor to a screen point with eased, slightly jittered steps.
export async function humanMove(toX, toY) {
const steps = Math.max(12, Math.min(48, Math.round(Math.hypot(toX - cur.x, toY - cur.y) / 22)));
const cmd = [];
for (let i = 1; i <= steps; i++) {
const t = easeInOut(i / steps);
const jx = i < steps ? rand(-1.5, 1.5) : 0;
const jy = i < steps ? rand(-1.5, 1.5) : 0;
cmd.push('mousemove', String(Math.round(cur.x + (toX - cur.x) * t + jx)),
String(Math.round(cur.y + (toY - cur.y) * t + jy)),
'sleep', rand(0.006, 0.018).toFixed(3));
}
await xdo(cmd);
cur = { x: toX, y: toY };
await sleep(rand(40, 130));
}
export async function humanClickXY(sx, sy) {
await humanMove(sx, sy);
await sleep(rand(60, 170));
await xdo(['click', '1']);
await sleep(rand(130, 300));
}
// Bring an element into view using a REAL wheel scroll (not a DOM
// scrollIntoView). Returns its viewport box, or null if it can't be revealed.
async function bringIntoView(page, locator) {
const { iw, ih } = await page.evaluate(() => ({ iw: window.innerWidth, ih: window.innerHeight }));
for (let i = 0; i < 14; i++) {
const box = await locator.boundingBox().catch(() => null);
if (box && box.y >= 70 && box.y + box.height <= ih - 70) return box;
const button = box ? (box.y < 70 ? '4' : '5') : '5'; // 4=up, 5=down
await xdo(['click', button]); await xdo(['click', button]); await xdo(['click', button]);
await sleep(rand(120, 240));
}
// Loop exhausted: only accept the final box if it actually lies inside the
// viewport on BOTH axes. Otherwise refuse, so the caller fails instead of
// clicking a coordinate that is still off-screen.
const box = await locator.boundingBox().catch(() => null);
if (!box) return null;
const onScreen = box.x >= 0 && box.y >= 0 && box.x + box.width <= iw && box.y + box.height <= ih;
return onScreen ? box : null;
}
// Locate a Playwright element, real-wheel it into view, move the real cursor
// into it (random offset), and click. No synthetic-click fallback: if the
// element has no on-screen box, this throws.
export async function humanClick(page, locator) {
await sleep(rand(150, 380));
const box = await bringIntoView(page, locator);
if (!box) throw new Error('humanClick: element has no on-screen box; refusing synthetic click');
const { ox, oy } = await contentOrigin(page);
const sx = Math.round(ox + box.x + box.width * rand(0.35, 0.65));
const sy = Math.round(oy + box.y + box.height * rand(0.35, 0.65));
await humanClickXY(sx, sy);
}
// Type text one character at a time at a human, slightly irregular pace.
export async function humanType(text) {
await sleep(rand(220, 420)); // let focus settle so the 1st char isn't dropped
for (const ch of text) {
await xdo(['type', '--clearmodifiers', '--', ch]);
await sleep(rand(70, 200));
if (Math.random() < 0.12) await sleep(rand(150, 400)); // occasional pause
}
}
export async function pressKey(key) {
await xdo(['key', '--clearmodifiers', key]);
await sleep(rand(120, 280));
}
// Gradual wheel scroll (dir>0 = down). Optionally hover over an element first.
export async function humanScroll(page, dir, notches, overLocator) {
if (overLocator) {
const box = await overLocator.boundingBox().catch(() => null);
if (box) {
const { ox, oy } = await contentOrigin(page);
await humanMove(Math.round(ox + box.x + box.width / 2), Math.round(oy + box.y + box.height / 2));
}
}
const button = dir > 0 ? '5' : '4';
for (let i = 0; i < notches; i++) {
await xdo(['click', button]);
await sleep(rand(40, 115));
if (i % 6 === 5) await sleep(rand(250, 600)); // pause like reading
}
await sleep(rand(250, 600));
}
// Press a single key (real keyboard).
export async function humanKey(key) { await xdo(['key', '--clearmodifiers', key]); await sleep(rand(120, 300)); }
// Navigate like a person: focus the address bar (Ctrl+L), type the URL one char
// at a time, press Enter.
export async function navigateOmnibox(text) {
await xdo(['key', '--clearmodifiers', 'ctrl+l']); await sleep(rand(300, 600));
await humanType(text); await sleep(rand(150, 320));
await xdo(['key', '--clearmodifiers', 'Return']);
}
// Move the real cursor over an element (hover, no click) - e.g. to reveal a
// video player's controls or to focus it for a keyboard shortcut.
export async function humanHover(page, locator) {
const box = await locator.boundingBox().catch(() => null);
if (!box) return;
const g = await page.evaluate(() => ({ sx: window.screenX, sy: window.screenY, ow: window.outerWidth, oh: window.outerHeight, iw: window.innerWidth, ih: window.innerHeight }));
const bx = Math.max(0, Math.round((g.ow - g.iw) / 2));
const oy = g.sy + Math.max(0, g.oh - g.ih - bx);
await humanMove(Math.round(g.sx + bx + box.x + box.width * 0.5), Math.round(oy + box.y + box.height * 0.4));
}
export { sleep, rand };