User reports all 34 cells classified as 미인식 with score 0.00 even when the grid was correctly cropped. Multiple compounding issues: 1. _is_empty required mean<60 (dark) AND std<14. HDR/bright captures produce pinkish empty slots with mean ~150-180, so even empty cells fell through to template matching. Drop the mean check; uniformity alone (std<18 grayscale, std<22 per-channel) is the real signal. 2. Score 0.00 across the board strongly suggests templates list was empty (only path that returns exactly 0.0). Track per-bucket load counts (slabs_ok/fail, artifacts_ok/fail) and surface them in the GUI status bar so a CDN failure is immediately visible. Currently no signal at all on download failure. 3. min_score 0.55 was tuned against simulator-clean renders. Real game captures have decorative cell borders, stack-count badges in corners, HDR shader effects. Lower to 0.35 and inset cell crops by 16% on each side before matching to skip the decorative frame. 4. Add 디버그 저장 button + dump_debug() that saves screenshot.png, bbox_crop.png, cells/<row>-<col>.png, and report.txt with top-3 matches per cell to %LOCALAPPDATA%/sephiria_inv/debug/<timestamp>/. Lets us iterate on tuning from real captures without round-tripping raw screenshots through chat each time.
319 lines
10 KiB
Python
319 lines
10 KiB
Python
"""Cell-level recognition over the inventory grid.
|
||
|
||
Pipeline given a cropped inventory image:
|
||
1. Slice into 6-col rows per generate_grid_config().
|
||
2. Per cell, classify: empty / slab / artifact / unknown.
|
||
- "empty" = low std-dev / dark uniform pixels
|
||
- "slab" = best NCC match across all slabs × 4 rotations
|
||
- "artifact"= best NCC match across all artifacts (no rotation)
|
||
- "unknown" = nothing matched above the confidence floor →
|
||
likely a merged "?" slab box, surfaced to the user.
|
||
|
||
NCC (normalized cross-correlation) is used instead of MAE because it's
|
||
invariant to brightness/contrast shifts — the in-game render has subtle
|
||
shader effects (bloom, vignette) that MAE penalizes harshly.
|
||
|
||
Templates are fetched via renderer.fetch_slab_image / fetch_artifact_image
|
||
on first call and cached on disk.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
from typing import Dict, List, Optional, Tuple
|
||
|
||
import numpy as np
|
||
from PIL import Image
|
||
|
||
from .artifacts import ARTIFACTS
|
||
from .renderer import fetch_slab_image, fetch_artifact_image
|
||
from .slabs import GRID_COLS, SLABS, SLABS_BY_VALUE, generate_grid_config
|
||
|
||
|
||
# ---------- types ----------
|
||
|
||
@dataclass
|
||
class CellResult:
|
||
slot_id: str # "<row>-<col>"
|
||
row: int
|
||
col: int
|
||
kind: str # "empty" | "slab" | "artifact" | "unknown"
|
||
value: Optional[str] # slab/artifact value, or None
|
||
rotation: int # 0/1/2/3 for slabs; 0 otherwise
|
||
score: float # NCC in [-1, 1] — higher is better
|
||
|
||
|
||
# ---------- template prep ----------
|
||
|
||
_TEMPLATE_SIZE = 64 # work at 64x64 — small enough to be fast, big enough to discriminate
|
||
|
||
|
||
def _on_dark(img: Image.Image) -> Image.Image:
|
||
"""Composite a possibly-transparent template onto a dark bag-slot color."""
|
||
if img.mode != "RGBA":
|
||
return img.convert("RGB")
|
||
bg = Image.new("RGBA", img.size, (38, 22, 42, 255))
|
||
bg.alpha_composite(img)
|
||
return bg.convert("RGB")
|
||
|
||
|
||
def _to_feat(img: Image.Image) -> np.ndarray:
|
||
"""Resize to fixed size, grayscale, mean-subtract, unit-normalize. Returns 1-D float vector."""
|
||
g = img.convert("L").resize((_TEMPLATE_SIZE, _TEMPLATE_SIZE), Image.BILINEAR)
|
||
a = np.asarray(g, dtype=np.float32).reshape(-1)
|
||
a = a - a.mean()
|
||
n = np.linalg.norm(a)
|
||
if n < 1e-6:
|
||
return a # all zeros — uniform cell
|
||
return a / n
|
||
|
||
|
||
@dataclass
|
||
class _Template:
|
||
kind: str # "slab" | "artifact"
|
||
value: str
|
||
rotation: int # for slabs
|
||
feat: np.ndarray
|
||
|
||
|
||
_TEMPLATE_CACHE: List[_Template] = []
|
||
_CACHE_BUILT = False
|
||
|
||
|
||
_LAST_LOAD_STATS: Dict[str, int] = {"slabs_ok": 0, "slabs_fail": 0,
|
||
"artifacts_ok": 0, "artifacts_fail": 0}
|
||
|
||
|
||
def _build_templates(*, include_artifacts: bool = True) -> List[_Template]:
|
||
"""Build (and cache) the full template list. Lazy because download is slow."""
|
||
global _CACHE_BUILT
|
||
if _CACHE_BUILT and _TEMPLATE_CACHE:
|
||
return _TEMPLATE_CACHE
|
||
out: List[_Template] = []
|
||
s_ok = s_fail = a_ok = a_fail = 0
|
||
# Slabs: 4 rotations for rotatable, 1 otherwise
|
||
for s in SLABS:
|
||
img = fetch_slab_image(s.image)
|
||
if img is None:
|
||
s_fail += 1
|
||
continue
|
||
s_ok += 1
|
||
base = _on_dark(img)
|
||
rotations = (0, 1, 2, 3) if s.rotate else (0,)
|
||
for r in rotations:
|
||
rotated = base if r == 0 else base.rotate(-90 * r, expand=False)
|
||
out.append(_Template("slab", s.value, r, _to_feat(rotated)))
|
||
if include_artifacts:
|
||
for a in ARTIFACTS:
|
||
img = fetch_artifact_image(a.image)
|
||
if img is None:
|
||
a_fail += 1
|
||
continue
|
||
a_ok += 1
|
||
base = _on_dark(img)
|
||
out.append(_Template("artifact", a.value, 0, _to_feat(base)))
|
||
_LAST_LOAD_STATS.update({"slabs_ok": s_ok, "slabs_fail": s_fail,
|
||
"artifacts_ok": a_ok, "artifacts_fail": a_fail})
|
||
_TEMPLATE_CACHE.clear()
|
||
_TEMPLATE_CACHE.extend(out)
|
||
_CACHE_BUILT = True
|
||
return _TEMPLATE_CACHE
|
||
|
||
|
||
def warm_templates(*, include_artifacts: bool = True) -> int:
|
||
"""Force-download all icons. Returns total template count.
|
||
|
||
Call once from GUI before recognition to avoid stalls per cell.
|
||
"""
|
||
return len(_build_templates(include_artifacts=include_artifacts))
|
||
|
||
|
||
def load_stats() -> Dict[str, int]:
|
||
"""Return last template load counts: slabs_ok, slabs_fail, artifacts_ok, artifacts_fail."""
|
||
return dict(_LAST_LOAD_STATS)
|
||
|
||
|
||
# ---------- cell classification ----------
|
||
|
||
def _is_empty(cell: Image.Image) -> bool:
|
||
"""Heuristic: empty slots are uniform color (any brightness).
|
||
|
||
Drops the dark-only assumption so HDR / bright-monitor captures with
|
||
pinkish slot backgrounds still detect as empty. Uniformity is the
|
||
actual invariant — empty slots have low std-dev whatever the hue.
|
||
"""
|
||
g = np.asarray(cell.convert("L"), dtype=np.float32)
|
||
rgb = np.asarray(cell.convert("RGB"), dtype=np.float32)
|
||
chan_std = float(rgb.reshape(-1, 3).std(axis=0).mean())
|
||
return bool(g.std() < 18.0 and chan_std < 22.0)
|
||
|
||
|
||
def _inset(cell: Image.Image, ratio: float = 0.16) -> Image.Image:
|
||
"""Trim decorative borders / corner badges before template matching.
|
||
|
||
The in-game slot has chunky frame ornaments and a stack-count badge in
|
||
a corner. Templates are clean icons. Cropping ~16% off every side
|
||
aligns the comparable inner art and removes the badge area in most
|
||
games.
|
||
"""
|
||
w, h = cell.size
|
||
dx = int(w * ratio)
|
||
dy = int(h * ratio)
|
||
return cell.crop((dx, dy, w - dx, h - dy))
|
||
|
||
|
||
def _classify(
|
||
cell: Image.Image,
|
||
templates: List[_Template],
|
||
*,
|
||
min_score: float = 0.35,
|
||
) -> Tuple[str, Optional[str], int, float]:
|
||
"""Return (kind, value, rotation, score)."""
|
||
if _is_empty(cell):
|
||
return "empty", None, 0, 1.0
|
||
inner = _inset(cell)
|
||
feat = _to_feat(inner)
|
||
# Stack template features into a matrix for one big dot-product
|
||
if not templates:
|
||
return "unknown", None, 0, 0.0
|
||
M = np.stack([t.feat for t in templates], axis=0) # (N, D)
|
||
scores = M @ feat # NCC since both are mean-subtracted unit norm
|
||
idx = int(np.argmax(scores))
|
||
best = float(scores[idx])
|
||
if best < min_score:
|
||
return "unknown", None, 0, best
|
||
t = templates[idx]
|
||
return t.kind, t.value, t.rotation, best
|
||
|
||
|
||
def _classify_with_top(
|
||
cell: Image.Image,
|
||
templates: List[_Template],
|
||
*,
|
||
top_k: int = 3,
|
||
) -> Tuple[str, Optional[str], int, float, List[Tuple[str, str, int, float]]]:
|
||
"""Like _classify but also returns the top-k matches for debug dumps."""
|
||
if _is_empty(cell):
|
||
return "empty", None, 0, 1.0, []
|
||
if not templates:
|
||
return "unknown", None, 0, 0.0, []
|
||
feat = _to_feat(_inset(cell))
|
||
M = np.stack([t.feat for t in templates], axis=0)
|
||
scores = M @ feat
|
||
order = np.argsort(-scores)[:top_k]
|
||
top = [(templates[i].kind, templates[i].value, templates[i].rotation,
|
||
float(scores[i])) for i in order]
|
||
kind, value, rot, score = _classify(cell, templates)
|
||
return kind, value, rot, score, top
|
||
|
||
|
||
# ---------- public API ----------
|
||
|
||
def recognize_image(
|
||
img: Image.Image,
|
||
bbox: Tuple[int, int, int, int],
|
||
*,
|
||
slot_num: int = 34,
|
||
include_artifacts: bool = True,
|
||
min_score: float = 0.35,
|
||
) -> List[CellResult]:
|
||
"""Slice img[bbox] into a 6-col grid and classify each cell.
|
||
|
||
bbox is in source-image pixel coords.
|
||
"""
|
||
L, T, R, B = bbox
|
||
crop = img.crop((L, T, R, B)).convert("RGB")
|
||
grid = generate_grid_config(slot_num)
|
||
if not grid:
|
||
return []
|
||
rows = len(grid)
|
||
cell_w = (R - L) // GRID_COLS
|
||
cell_h = (B - T) // rows
|
||
templates = _build_templates(include_artifacts=include_artifacts)
|
||
|
||
out: List[CellResult] = []
|
||
for row in grid:
|
||
y = row["rows"]
|
||
for x in range(row["cols"]):
|
||
cx0 = x * cell_w
|
||
cy0 = y * cell_h
|
||
cell = crop.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h))
|
||
kind, value, rot, score = _classify(cell, templates, min_score=min_score)
|
||
out.append(CellResult(f"{y}-{x}", y, x, kind, value, rot, score))
|
||
return out
|
||
|
||
|
||
def dump_debug(
|
||
img: Image.Image,
|
||
bbox: Tuple[int, int, int, int],
|
||
out_dir: str,
|
||
*,
|
||
slot_num: int = 34,
|
||
include_artifacts: bool = True,
|
||
) -> str:
|
||
"""Save full screenshot, bbox crop, every cell crop and a top-3 match
|
||
report to out_dir. Returns the path to the report file. Used to iterate
|
||
on recognizer tuning from real captures.
|
||
"""
|
||
import os
|
||
os.makedirs(out_dir, exist_ok=True)
|
||
img.save(os.path.join(out_dir, "screenshot.png"))
|
||
L, T, R, B = bbox
|
||
crop = img.crop((L, T, R, B)).convert("RGB")
|
||
crop.save(os.path.join(out_dir, "bbox_crop.png"))
|
||
grid = generate_grid_config(slot_num)
|
||
if not grid:
|
||
return out_dir
|
||
rows = len(grid)
|
||
cell_w = (R - L) // GRID_COLS
|
||
cell_h = (B - T) // rows
|
||
templates = _build_templates(include_artifacts=include_artifacts)
|
||
stats = load_stats()
|
||
lines = [
|
||
f"bbox: {bbox}",
|
||
f"grid: {len(grid)} rows x {GRID_COLS} cols, slot_num={slot_num}",
|
||
f"cell px: {cell_w} x {cell_h}",
|
||
f"templates loaded: total={len(templates)} stats={stats}",
|
||
"",
|
||
]
|
||
cells_dir = os.path.join(out_dir, "cells")
|
||
os.makedirs(cells_dir, exist_ok=True)
|
||
for row in grid:
|
||
y = row["rows"]
|
||
for x in range(row["cols"]):
|
||
cx0 = x * cell_w
|
||
cy0 = y * cell_h
|
||
cell = crop.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h))
|
||
cell.save(os.path.join(cells_dir, f"{y}-{x}.png"))
|
||
kind, value, rot, score, top = _classify_with_top(cell, templates)
|
||
top_s = ", ".join(f"{k}:{v}@r{r}={s:.3f}" for k, v, r, s in top)
|
||
lines.append(
|
||
f" {y}-{x}: kind={kind} value={value} rot={rot} score={score:.3f} | top: {top_s}"
|
||
)
|
||
report = os.path.join(out_dir, "report.txt")
|
||
with open(report, "w", encoding="utf-8") as fh:
|
||
fh.write("\n".join(lines))
|
||
return report
|
||
|
||
|
||
def recognize_file(
|
||
path: str,
|
||
bbox: Tuple[int, int, int, int],
|
||
*,
|
||
slot_num: int = 34,
|
||
include_artifacts: bool = True,
|
||
min_score: float = 0.55,
|
||
) -> List[CellResult]:
|
||
img = Image.open(path)
|
||
return recognize_image(
|
||
img, bbox,
|
||
slot_num=slot_num,
|
||
include_artifacts=include_artifacts,
|
||
min_score=min_score,
|
||
)
|
||
|
||
|
||
def slab_values_from(results: List[CellResult]) -> List[str]:
|
||
"""Helper: just the slab values, ignoring artifacts/empty/unknown."""
|
||
return [r.value for r in results if r.kind == "slab" and r.value]
|