v0.3.0: game-window picker + NCC recognition + artifacts + ?-merged

- window_capture.py: enumerate top-level windows (pygetwindow) and capture a specific one via PrintWindow PW_RENDERFULLCONTENT (works on non-focused windows). Linux falls back to mss region grab. - recognizer.py: replace MAE matcher with NCC over numpy vectors. Each rotatable slab generates 4 templates (0/90/180/270). Adds 248 artifact templates and an empty-cell heuristic (low mean/std-dev). Cells below confidence floor are tagged "unknown" — likely merged "?" boxes. - gui.py: new ScreenshotFrame with [게임 창 선택] button → window picker dialog → bbox crop → recognize → editable preview grid with per-cell CellEditor that handles slab / artifact / merged(?) / empty. Merged cells let user pick which two slabs got combined + a level. - artifacts.py + bundled _artifacts.json (248 entries from WhiteDog1004/sephiria) for matching and rendering. - renderer.py: factored CDN fetch into _fetch_image; added fetch_artifact_image(). - requirements.txt: + numpy, pygetwindow (Win), pywin32 (Win). - docker-build-cmd.sh: upgrade PyInstaller to 5.x inside cdrx container so numpy DLL manifest reads work. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-14 09:36:49 +09:00
parent e388c965bc
commit 2e23ad5d2f
9 changed files with 4878 additions and 358 deletions
--- a/sephiria_inv/recognizer.py
+++ b/sephiria_inv/recognizer.py
@@ -0,0 +1,206 @@
+"""Cell-level recognition over the inventory grid.
+
+Pipeline given a cropped inventory image:
+  1. Slice into 6-col rows per generate_grid_config().
+  2. Per cell, classify: empty / slab / artifact / unknown.
+     - "empty"   = low std-dev / dark uniform pixels
+     - "slab"    = best NCC match across all slabs × 4 rotations
+     - "artifact"= best NCC match across all artifacts (no rotation)
+     - "unknown" = nothing matched above the confidence floor →
+                   likely a merged "?" slab box, surfaced to the user.
+
+NCC (normalized cross-correlation) is used instead of MAE because it's
+invariant to brightness/contrast shifts — the in-game render has subtle
+shader effects (bloom, vignette) that MAE penalizes harshly.
+
+Templates are fetched via renderer.fetch_slab_image / fetch_artifact_image
+on first call and cached on disk.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+from PIL import Image
+
+from .artifacts import ARTIFACTS
+from .renderer import fetch_slab_image, fetch_artifact_image
+from .slabs import GRID_COLS, SLABS, SLABS_BY_VALUE, generate_grid_config
+
+
+# ---------- types ----------
+
+@dataclass
+class CellResult:
+    slot_id: str          # "<row>-<col>"
+    row: int
+    col: int
+    kind: str             # "empty" | "slab" | "artifact" | "unknown"
+    value: Optional[str]  # slab/artifact value, or None
+    rotation: int         # 0/1/2/3 for slabs; 0 otherwise
+    score: float          # NCC in [-1, 1] — higher is better
+
+
+# ---------- template prep ----------
+
+_TEMPLATE_SIZE = 64  # work at 64x64 — small enough to be fast, big enough to discriminate
+
+
+def _on_dark(img: Image.Image) -> Image.Image:
+    """Composite a possibly-transparent template onto a dark bag-slot color."""
+    if img.mode != "RGBA":
+        return img.convert("RGB")
+    bg = Image.new("RGBA", img.size, (38, 22, 42, 255))
+    bg.alpha_composite(img)
+    return bg.convert("RGB")
+
+
+def _to_feat(img: Image.Image) -> np.ndarray:
+    """Resize to fixed size, grayscale, mean-subtract, unit-normalize. Returns 1-D float vector."""
+    g = img.convert("L").resize((_TEMPLATE_SIZE, _TEMPLATE_SIZE), Image.BILINEAR)
+    a = np.asarray(g, dtype=np.float32).reshape(-1)
+    a = a - a.mean()
+    n = np.linalg.norm(a)
+    if n < 1e-6:
+        return a  # all zeros — uniform cell
+    return a / n
+
+
+@dataclass
+class _Template:
+    kind: str  # "slab" | "artifact"
+    value: str
+    rotation: int  # for slabs
+    feat: np.ndarray
+
+
+_TEMPLATE_CACHE: List[_Template] = []
+_CACHE_BUILT = False
+
+
+def _build_templates(*, include_artifacts: bool = True) -> List[_Template]:
+    """Build (and cache) the full template list. Lazy because download is slow."""
+    global _CACHE_BUILT
+    if _CACHE_BUILT and _TEMPLATE_CACHE:
+        return _TEMPLATE_CACHE
+    out: List[_Template] = []
+    # Slabs: 4 rotations for rotatable, 1 otherwise
+    for s in SLABS:
+        img = fetch_slab_image(s.image)
+        if img is None:
+            continue
+        base = _on_dark(img)
+        rotations = (0, 1, 2, 3) if s.rotate else (0,)
+        for r in rotations:
+            rotated = base if r == 0 else base.rotate(-90 * r, expand=False)
+            out.append(_Template("slab", s.value, r, _to_feat(rotated)))
+    if include_artifacts:
+        for a in ARTIFACTS:
+            img = fetch_artifact_image(a.image)
+            if img is None:
+                continue
+            base = _on_dark(img)
+            out.append(_Template("artifact", a.value, 0, _to_feat(base)))
+    _TEMPLATE_CACHE.clear()
+    _TEMPLATE_CACHE.extend(out)
+    _CACHE_BUILT = True
+    return _TEMPLATE_CACHE
+
+
+def warm_templates(*, include_artifacts: bool = True) -> int:
+    """Force-download all icons. Returns total template count.
+
+    Call once from GUI before recognition to avoid stalls per cell.
+    """
+    return len(_build_templates(include_artifacts=include_artifacts))
+
+
+# ---------- cell classification ----------
+
+def _is_empty(cell: Image.Image) -> bool:
+    """Heuristic: empty slots are dark and ~uniform."""
+    g = np.asarray(cell.convert("L"), dtype=np.float32)
+    return bool(g.mean() < 60.0 and g.std() < 14.0)
+
+
+def _classify(
+    cell: Image.Image,
+    templates: List[_Template],
+    *,
+    min_score: float = 0.55,
+) -> Tuple[str, Optional[str], int, float]:
+    """Return (kind, value, rotation, score)."""
+    if _is_empty(cell):
+        return "empty", None, 0, 1.0
+    feat = _to_feat(cell)
+    # Stack template features into a matrix for one big dot-product
+    if not templates:
+        return "unknown", None, 0, 0.0
+    M = np.stack([t.feat for t in templates], axis=0)  # (N, D)
+    scores = M @ feat  # NCC since both are mean-subtracted unit norm
+    idx = int(np.argmax(scores))
+    best = float(scores[idx])
+    if best < min_score:
+        return "unknown", None, 0, best
+    t = templates[idx]
+    return t.kind, t.value, t.rotation, best
+
+
+# ---------- public API ----------
+
+def recognize_image(
+    img: Image.Image,
+    bbox: Tuple[int, int, int, int],
+    *,
+    slot_num: int = 34,
+    include_artifacts: bool = True,
+    min_score: float = 0.55,
+) -> List[CellResult]:
+    """Slice img[bbox] into a 6-col grid and classify each cell.
+
+    bbox is in source-image pixel coords.
+    """
+    L, T, R, B = bbox
+    crop = img.crop((L, T, R, B)).convert("RGB")
+    grid = generate_grid_config(slot_num)
+    if not grid:
+        return []
+    rows = len(grid)
+    cell_w = (R - L) // GRID_COLS
+    cell_h = (B - T) // rows
+    templates = _build_templates(include_artifacts=include_artifacts)
+
+    out: List[CellResult] = []
+    for row in grid:
+        y = row["rows"]
+        for x in range(row["cols"]):
+            cx0 = x * cell_w
+            cy0 = y * cell_h
+            cell = crop.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h))
+            kind, value, rot, score = _classify(cell, templates, min_score=min_score)
+            out.append(CellResult(f"{y}-{x}", y, x, kind, value, rot, score))
+    return out
+
+
+def recognize_file(
+    path: str,
+    bbox: Tuple[int, int, int, int],
+    *,
+    slot_num: int = 34,
+    include_artifacts: bool = True,
+    min_score: float = 0.55,
+) -> List[CellResult]:
+    img = Image.open(path)
+    return recognize_image(
+        img, bbox,
+        slot_num=slot_num,
+        include_artifacts=include_artifacts,
+        min_score=min_score,
+    )
+
+
+def slab_values_from(results: List[CellResult]) -> List[str]:
+    """Helper: just the slab values, ignoring artifacts/empty/unknown."""
+    return [r.value for r in results if r.kind == "slab" and r.value]