v0.3.0: game-window picker + NCC recognition + artifacts + ?-merged

- window_capture.py: enumerate top-level windows (pygetwindow) and capture a specific one via PrintWindow PW_RENDERFULLCONTENT (works on non-focused windows). Linux falls back to mss region grab. - recognizer.py: replace MAE matcher with NCC over numpy vectors. Each rotatable slab generates 4 templates (0/90/180/270). Adds 248 artifact templates and an empty-cell heuristic (low mean/std-dev). Cells below confidence floor are tagged "unknown" — likely merged "?" boxes. - gui.py: new ScreenshotFrame with [게임 창 선택] button → window picker dialog → bbox crop → recognize → editable preview grid with per-cell CellEditor that handles slab / artifact / merged(?) / empty. Merged cells let user pick which two slabs got combined + a level. - artifacts.py + bundled _artifacts.json (248 entries from WhiteDog1004/sephiria) for matching and rendering. - renderer.py: factored CDN fetch into _fetch_image; added fetch_artifact_image(). - requirements.txt: + numpy, pygetwindow (Win), pywin32 (Win). - docker-build-cmd.sh: upgrade PyInstaller to 5.x inside cdrx container so numpy DLL manifest reads work. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-14 09:36:49 +09:00
parent e388c965bc
commit 2e23ad5d2f
9 changed files with 4878 additions and 358 deletions
--- a/sephiria_inv/screenshot.py
+++ b/sephiria_inv/screenshot.py
@@ -1,122 +1,42 @@
-"""Recognize slabs from a screenshot of the in-game inventory.
+"""Backward-compatible thin wrapper over the new recognizer.

-Approach: template matching against the cached CDN images. Given a screenshot
-and the inventory bounding box, we divide it into a grid and compare each cell
-against every slab template (resized to the cell). Mean absolute error in RGB
-picks the best match; cells above a threshold are treated as empty.
+The old API exposed `Recognition` (slot_id, value, score) and `recognize()`
+returning slabs only. Existing CLI code (`__main__.py`) and tests use that
+surface, so we keep it working by delegating to recognizer.py.

-This is a best-effort fallback. Accuracy depends heavily on the screenshot
-resolution and the slab images matching the in-game render style. The CDN
-images are the same pixel-art assets the game uses, so accuracy is usually
-fine when the screenshot is sharp.
+New code should call `recognizer.recognize_image()` / `recognize_file()`
+directly for richer (kind, rotation, artifact) results.
 """

 from __future__ import annotations

-import os
 from dataclasses import dataclass
 from typing import List, Optional, Tuple

-from PIL import Image
-
-from .renderer import fetch_slab_image
-from .slabs import GRID_COLS, SLABS, generate_grid_config
+from .recognizer import recognize_file


@dataclass
 class Recognition:
    slot_id: str
-    value: Optional[str]  # None = empty
-    score: float  # lower = better match
-
-
-def _mae(a: Image.Image, b: Image.Image) -> float:
-    """Mean absolute error in RGB. Both images must be the same size."""
-    if a.size != b.size:
-        b = b.resize(a.size)
-    a_rgb = a.convert("RGB")
-    b_rgb = b.convert("RGB")
-    pa = list(a_rgb.getdata())
-    pb = list(b_rgb.getdata())
-    n = len(pa)
-    if n == 0:
-        return 1e9
-    total = 0
-    for (ar, ag, ab), (br, bg, bb) in zip(pa, pb):
-        total += abs(ar - br) + abs(ag - bg) + abs(ab - bb)
-    return total / (n * 3)
-
-
-def _alpha_composite_on_dark(img: Image.Image) -> Image.Image:
-    """Slab templates are RGBA on transparent. Composite onto dark BG for fairer compare."""
-    if img.mode != "RGBA":
-        return img.convert("RGB")
-    bg = Image.new("RGBA", img.size, (50, 30, 50, 255))
-    bg.alpha_composite(img)
-    return bg.convert("RGB")
+    value: Optional[str]  # slab value, or None if empty/unknown/artifact
+    score: float          # NCC score in [-1, 1]; higher = better


 def recognize(
    screenshot_path: str,
    bbox: Tuple[int, int, int, int],
    slot_num: int = 34,
-    empty_threshold: float = 35.0,
+    empty_threshold: float = 35.0,  # ignored; kept for arg-compat
 ) -> List[Recognition]:
-    """Recognize slabs in the inventory area of a screenshot.
-
-    Args:
-        screenshot_path: Path to the game screenshot (PNG/JPG).
-        bbox: (left, top, right, bottom) pixel coords of the inventory grid.
-            Must enclose only the slot grid, not the surrounding UI.
-        slot_num: Total slot count (18..60). Used to compute row layout.
-        empty_threshold: MAE above this counts as empty.
-
-    Returns:
-        List of Recognition entries, one per slot in row-major order.
-    """
-    img = Image.open(screenshot_path).convert("RGB")
-    left, top, right, bottom = bbox
-    img = img.crop((left, top, right, bottom))
-
-    grid = generate_grid_config(slot_num)
-    if not grid:
-        return []
-    rows = len(grid)
-    cell_w = (right - left) // GRID_COLS
-    cell_h = (bottom - top) // rows
-    template_size = (min(cell_w, cell_h), min(cell_w, cell_h))
-
-    # Pre-load and downscale templates
-    templates: List[Tuple[str, Image.Image]] = []
-    for slab in SLABS:
-        t = fetch_slab_image(slab.image)
-        if t is None:
-            continue
-        t = _alpha_composite_on_dark(t).resize(template_size)
-        templates.append((slab.value, t))
-
-    results: List[Recognition] = []
-    for row in grid:
-        y = row["rows"]
-        for x in range(row["cols"]):
-            cx0 = x * cell_w
-            cy0 = y * cell_h
-            cell = img.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h)).resize(template_size)
-            best_value: Optional[str] = None
-            best_score = 1e9
-            for v, t in templates:
-                s = _mae(cell, t)
-                if s < best_score:
-                    best_score = s
-                    best_value = v
-            if best_score > empty_threshold:
-                results.append(Recognition(f"{y}-{x}", None, best_score))
-            else:
-                results.append(Recognition(f"{y}-{x}", best_value, best_score))
-
-    return results
+    """Recognize slabs in the inventory area of a screenshot (slabs only)."""
+    cells = recognize_file(screenshot_path, bbox, slot_num=slot_num)
+    out: List[Recognition] = []
+    for c in cells:
+        v = c.value if c.kind == "slab" else None
+        out.append(Recognition(c.slot_id, v, c.score))
+    return out


 def recognized_values(recognitions: List[Recognition]) -> List[str]:
-    """Helper: extract just the non-empty slab values."""
    return [r.value for r in recognitions if r.value is not None]