"""Recognize slabs from a screenshot of the in-game inventory. Approach: template matching against the cached CDN images. Given a screenshot and the inventory bounding box, we divide it into a grid and compare each cell against every slab template (resized to the cell). Mean absolute error in RGB picks the best match; cells above a threshold are treated as empty. This is a best-effort fallback. Accuracy depends heavily on the screenshot resolution and the slab images matching the in-game render style. The CDN images are the same pixel-art assets the game uses, so accuracy is usually fine when the screenshot is sharp. """ from __future__ import annotations import os from dataclasses import dataclass from typing import List, Optional, Tuple from PIL import Image from .renderer import fetch_slab_image from .slabs import GRID_COLS, SLABS, generate_grid_config @dataclass class Recognition: slot_id: str value: Optional[str] # None = empty score: float # lower = better match def _mae(a: Image.Image, b: Image.Image) -> float: """Mean absolute error in RGB. Both images must be the same size.""" if a.size != b.size: b = b.resize(a.size) a_rgb = a.convert("RGB") b_rgb = b.convert("RGB") pa = list(a_rgb.getdata()) pb = list(b_rgb.getdata()) n = len(pa) if n == 0: return 1e9 total = 0 for (ar, ag, ab), (br, bg, bb) in zip(pa, pb): total += abs(ar - br) + abs(ag - bg) + abs(ab - bb) return total / (n * 3) def _alpha_composite_on_dark(img: Image.Image) -> Image.Image: """Slab templates are RGBA on transparent. Composite onto dark BG for fairer compare.""" if img.mode != "RGBA": return img.convert("RGB") bg = Image.new("RGBA", img.size, (50, 30, 50, 255)) bg.alpha_composite(img) return bg.convert("RGB") def recognize( screenshot_path: str, bbox: Tuple[int, int, int, int], slot_num: int = 34, empty_threshold: float = 35.0, ) -> List[Recognition]: """Recognize slabs in the inventory area of a screenshot. Args: screenshot_path: Path to the game screenshot (PNG/JPG). bbox: (left, top, right, bottom) pixel coords of the inventory grid. Must enclose only the slot grid, not the surrounding UI. slot_num: Total slot count (18..60). Used to compute row layout. empty_threshold: MAE above this counts as empty. Returns: List of Recognition entries, one per slot in row-major order. """ img = Image.open(screenshot_path).convert("RGB") left, top, right, bottom = bbox img = img.crop((left, top, right, bottom)) grid = generate_grid_config(slot_num) if not grid: return [] rows = len(grid) cell_w = (right - left) // GRID_COLS cell_h = (bottom - top) // rows template_size = (min(cell_w, cell_h), min(cell_w, cell_h)) # Pre-load and downscale templates templates: List[Tuple[str, Image.Image]] = [] for slab in SLABS: t = fetch_slab_image(slab.image) if t is None: continue t = _alpha_composite_on_dark(t).resize(template_size) templates.append((slab.value, t)) results: List[Recognition] = [] for row in grid: y = row["rows"] for x in range(row["cols"]): cx0 = x * cell_w cy0 = y * cell_h cell = img.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h)).resize(template_size) best_value: Optional[str] = None best_score = 1e9 for v, t in templates: s = _mae(cell, t) if s < best_score: best_score = s best_value = v if best_score > empty_threshold: results.append(Recognition(f"{y}-{x}", None, best_score)) else: results.append(Recognition(f"{y}-{x}", best_value, best_score)) return results def recognized_values(recognitions: List[Recognition]) -> List[str]: """Helper: extract just the non-empty slab values.""" return [r.value for r in recognitions if r.value is not None]