Files
sephiria_inv_program/sephiria_inv/recognizer.py
Claude 2e23ad5d2f v0.3.0: game-window picker + NCC recognition + artifacts + ?-merged
- window_capture.py: enumerate top-level windows (pygetwindow) and
  capture a specific one via PrintWindow PW_RENDERFULLCONTENT (works
  on non-focused windows). Linux falls back to mss region grab.
- recognizer.py: replace MAE matcher with NCC over numpy vectors.
  Each rotatable slab generates 4 templates (0/90/180/270). Adds 248
  artifact templates and an empty-cell heuristic (low mean/std-dev).
  Cells below confidence floor are tagged "unknown" — likely merged
  "?" boxes.
- gui.py: new ScreenshotFrame with [게임 창 선택] button → window
  picker dialog → bbox crop → recognize → editable preview grid with
  per-cell CellEditor that handles slab / artifact / merged(?) / empty.
  Merged cells let user pick which two slabs got combined + a level.
- artifacts.py + bundled _artifacts.json (248 entries from
  WhiteDog1004/sephiria) for matching and rendering.
- renderer.py: factored CDN fetch into _fetch_image; added
  fetch_artifact_image().
- requirements.txt: + numpy, pygetwindow (Win), pywin32 (Win).
- docker-build-cmd.sh: upgrade PyInstaller to 5.x inside cdrx
  container so numpy DLL manifest reads work.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-14 09:36:49 +09:00

207 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Cell-level recognition over the inventory grid.
Pipeline given a cropped inventory image:
1. Slice into 6-col rows per generate_grid_config().
2. Per cell, classify: empty / slab / artifact / unknown.
- "empty" = low std-dev / dark uniform pixels
- "slab" = best NCC match across all slabs × 4 rotations
- "artifact"= best NCC match across all artifacts (no rotation)
- "unknown" = nothing matched above the confidence floor →
likely a merged "?" slab box, surfaced to the user.
NCC (normalized cross-correlation) is used instead of MAE because it's
invariant to brightness/contrast shifts — the in-game render has subtle
shader effects (bloom, vignette) that MAE penalizes harshly.
Templates are fetched via renderer.fetch_slab_image / fetch_artifact_image
on first call and cached on disk.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
import numpy as np
from PIL import Image
from .artifacts import ARTIFACTS
from .renderer import fetch_slab_image, fetch_artifact_image
from .slabs import GRID_COLS, SLABS, SLABS_BY_VALUE, generate_grid_config
# ---------- types ----------
@dataclass
class CellResult:
slot_id: str # "<row>-<col>"
row: int
col: int
kind: str # "empty" | "slab" | "artifact" | "unknown"
value: Optional[str] # slab/artifact value, or None
rotation: int # 0/1/2/3 for slabs; 0 otherwise
score: float # NCC in [-1, 1] — higher is better
# ---------- template prep ----------
_TEMPLATE_SIZE = 64 # work at 64x64 — small enough to be fast, big enough to discriminate
def _on_dark(img: Image.Image) -> Image.Image:
"""Composite a possibly-transparent template onto a dark bag-slot color."""
if img.mode != "RGBA":
return img.convert("RGB")
bg = Image.new("RGBA", img.size, (38, 22, 42, 255))
bg.alpha_composite(img)
return bg.convert("RGB")
def _to_feat(img: Image.Image) -> np.ndarray:
"""Resize to fixed size, grayscale, mean-subtract, unit-normalize. Returns 1-D float vector."""
g = img.convert("L").resize((_TEMPLATE_SIZE, _TEMPLATE_SIZE), Image.BILINEAR)
a = np.asarray(g, dtype=np.float32).reshape(-1)
a = a - a.mean()
n = np.linalg.norm(a)
if n < 1e-6:
return a # all zeros — uniform cell
return a / n
@dataclass
class _Template:
kind: str # "slab" | "artifact"
value: str
rotation: int # for slabs
feat: np.ndarray
_TEMPLATE_CACHE: List[_Template] = []
_CACHE_BUILT = False
def _build_templates(*, include_artifacts: bool = True) -> List[_Template]:
"""Build (and cache) the full template list. Lazy because download is slow."""
global _CACHE_BUILT
if _CACHE_BUILT and _TEMPLATE_CACHE:
return _TEMPLATE_CACHE
out: List[_Template] = []
# Slabs: 4 rotations for rotatable, 1 otherwise
for s in SLABS:
img = fetch_slab_image(s.image)
if img is None:
continue
base = _on_dark(img)
rotations = (0, 1, 2, 3) if s.rotate else (0,)
for r in rotations:
rotated = base if r == 0 else base.rotate(-90 * r, expand=False)
out.append(_Template("slab", s.value, r, _to_feat(rotated)))
if include_artifacts:
for a in ARTIFACTS:
img = fetch_artifact_image(a.image)
if img is None:
continue
base = _on_dark(img)
out.append(_Template("artifact", a.value, 0, _to_feat(base)))
_TEMPLATE_CACHE.clear()
_TEMPLATE_CACHE.extend(out)
_CACHE_BUILT = True
return _TEMPLATE_CACHE
def warm_templates(*, include_artifacts: bool = True) -> int:
"""Force-download all icons. Returns total template count.
Call once from GUI before recognition to avoid stalls per cell.
"""
return len(_build_templates(include_artifacts=include_artifacts))
# ---------- cell classification ----------
def _is_empty(cell: Image.Image) -> bool:
"""Heuristic: empty slots are dark and ~uniform."""
g = np.asarray(cell.convert("L"), dtype=np.float32)
return bool(g.mean() < 60.0 and g.std() < 14.0)
def _classify(
cell: Image.Image,
templates: List[_Template],
*,
min_score: float = 0.55,
) -> Tuple[str, Optional[str], int, float]:
"""Return (kind, value, rotation, score)."""
if _is_empty(cell):
return "empty", None, 0, 1.0
feat = _to_feat(cell)
# Stack template features into a matrix for one big dot-product
if not templates:
return "unknown", None, 0, 0.0
M = np.stack([t.feat for t in templates], axis=0) # (N, D)
scores = M @ feat # NCC since both are mean-subtracted unit norm
idx = int(np.argmax(scores))
best = float(scores[idx])
if best < min_score:
return "unknown", None, 0, best
t = templates[idx]
return t.kind, t.value, t.rotation, best
# ---------- public API ----------
def recognize_image(
img: Image.Image,
bbox: Tuple[int, int, int, int],
*,
slot_num: int = 34,
include_artifacts: bool = True,
min_score: float = 0.55,
) -> List[CellResult]:
"""Slice img[bbox] into a 6-col grid and classify each cell.
bbox is in source-image pixel coords.
"""
L, T, R, B = bbox
crop = img.crop((L, T, R, B)).convert("RGB")
grid = generate_grid_config(slot_num)
if not grid:
return []
rows = len(grid)
cell_w = (R - L) // GRID_COLS
cell_h = (B - T) // rows
templates = _build_templates(include_artifacts=include_artifacts)
out: List[CellResult] = []
for row in grid:
y = row["rows"]
for x in range(row["cols"]):
cx0 = x * cell_w
cy0 = y * cell_h
cell = crop.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h))
kind, value, rot, score = _classify(cell, templates, min_score=min_score)
out.append(CellResult(f"{y}-{x}", y, x, kind, value, rot, score))
return out
def recognize_file(
path: str,
bbox: Tuple[int, int, int, int],
*,
slot_num: int = 34,
include_artifacts: bool = True,
min_score: float = 0.55,
) -> List[CellResult]:
img = Image.open(path)
return recognize_image(
img, bbox,
slot_num=slot_num,
include_artifacts=include_artifacts,
min_score=min_score,
)
def slab_values_from(results: List[CellResult]) -> List[str]:
"""Helper: just the slab values, ignoring artifacts/empty/unknown."""
return [r.value for r in results if r.kind == "slab" and r.value]