sephiria_inv_program/sephiria_inv/recognizer.py

"""Cell-level recognition over the inventory grid.

Pipeline given a cropped inventory image:
  1. Slice into 6-col rows per generate_grid_config().
  2. Per cell, classify: empty / slab / artifact / unknown.
     - "empty"   = low std-dev / dark uniform pixels
     - "slab"    = best NCC match across all slabs × 4 rotations
     - "artifact"= best NCC match across all artifacts (no rotation)
     - "unknown" = nothing matched above the confidence floor →
                   likely a merged "?" slab box, surfaced to the user.

NCC (normalized cross-correlation) is used instead of MAE because it's
invariant to brightness/contrast shifts — the in-game render has subtle
shader effects (bloom, vignette) that MAE penalizes harshly.

Templates are fetched via renderer.fetch_slab_image / fetch_artifact_image
on first call and cached on disk.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple

import numpy as np
from PIL import Image

from .artifacts import ARTIFACTS
from .renderer import fetch_slab_image, fetch_artifact_image
from .slabs import GRID_COLS, SLABS, SLABS_BY_VALUE, generate_grid_config


# ---------- types ----------

@dataclass
class CellResult:
    slot_id: str          # "<row>-<col>"
    row: int
    col: int
    kind: str             # "empty" | "slab" | "artifact" | "unknown"
    value: Optional[str]  # slab/artifact value, or None
    rotation: int         # 0/1/2/3 for slabs; 0 otherwise
    score: float          # NCC in [-1, 1] — higher is better


# ---------- template prep ----------

_TEMPLATE_SIZE = 64  # work at 64x64 — small enough to be fast, big enough to discriminate


def _on_dark(img: Image.Image) -> Image.Image:
    """Composite a possibly-transparent template onto a dark bag-slot color."""
    if img.mode != "RGBA":
        return img.convert("RGB")
    bg = Image.new("RGBA", img.size, (38, 22, 42, 255))
    bg.alpha_composite(img)
    return bg.convert("RGB")


def _to_feat(img: Image.Image) -> np.ndarray:
    """Resize to fixed size, grayscale, mean-subtract, unit-normalize. Returns 1-D float vector."""
    g = img.convert("L").resize((_TEMPLATE_SIZE, _TEMPLATE_SIZE), Image.BILINEAR)
    a = np.asarray(g, dtype=np.float32).reshape(-1)
    a = a - a.mean()
    n = np.linalg.norm(a)
    if n < 1e-6:
        return a  # all zeros — uniform cell
    return a / n


@dataclass
class _Template:
    kind: str  # "slab" | "artifact"
    value: str
    rotation: int  # for slabs
    feat: np.ndarray


_TEMPLATE_CACHE: List[_Template] = []
_CACHE_BUILT = False


def _build_templates(*, include_artifacts: bool = True) -> List[_Template]:
    """Build (and cache) the full template list. Lazy because download is slow."""
    global _CACHE_BUILT
    if _CACHE_BUILT and _TEMPLATE_CACHE:
        return _TEMPLATE_CACHE
    out: List[_Template] = []
    # Slabs: 4 rotations for rotatable, 1 otherwise
    for s in SLABS:
        img = fetch_slab_image(s.image)
        if img is None:
            continue
        base = _on_dark(img)
        rotations = (0, 1, 2, 3) if s.rotate else (0,)
        for r in rotations:
            rotated = base if r == 0 else base.rotate(-90 * r, expand=False)
            out.append(_Template("slab", s.value, r, _to_feat(rotated)))
    if include_artifacts:
        for a in ARTIFACTS:
            img = fetch_artifact_image(a.image)
            if img is None:
                continue
            base = _on_dark(img)
            out.append(_Template("artifact", a.value, 0, _to_feat(base)))
    _TEMPLATE_CACHE.clear()
    _TEMPLATE_CACHE.extend(out)
    _CACHE_BUILT = True
    return _TEMPLATE_CACHE


def warm_templates(*, include_artifacts: bool = True) -> int:
    """Force-download all icons. Returns total template count.

    Call once from GUI before recognition to avoid stalls per cell.
    """
    return len(_build_templates(include_artifacts=include_artifacts))


# ---------- cell classification ----------

def _is_empty(cell: Image.Image) -> bool:
    """Heuristic: empty slots are dark and ~uniform."""
    g = np.asarray(cell.convert("L"), dtype=np.float32)
    return bool(g.mean() < 60.0 and g.std() < 14.0)


def _classify(
    cell: Image.Image,
    templates: List[_Template],
    *,
    min_score: float = 0.55,
) -> Tuple[str, Optional[str], int, float]:
    """Return (kind, value, rotation, score)."""
    if _is_empty(cell):
        return "empty", None, 0, 1.0
    feat = _to_feat(cell)
    # Stack template features into a matrix for one big dot-product
    if not templates:
        return "unknown", None, 0, 0.0
    M = np.stack([t.feat for t in templates], axis=0)  # (N, D)
    scores = M @ feat  # NCC since both are mean-subtracted unit norm
    idx = int(np.argmax(scores))
    best = float(scores[idx])
    if best < min_score:
        return "unknown", None, 0, best
    t = templates[idx]
    return t.kind, t.value, t.rotation, best


# ---------- public API ----------

def recognize_image(
    img: Image.Image,
    bbox: Tuple[int, int, int, int],
    *,
    slot_num: int = 34,
    include_artifacts: bool = True,
    min_score: float = 0.55,
) -> List[CellResult]:
    """Slice img[bbox] into a 6-col grid and classify each cell.

    bbox is in source-image pixel coords.
    """
    L, T, R, B = bbox
    crop = img.crop((L, T, R, B)).convert("RGB")
    grid = generate_grid_config(slot_num)
    if not grid:
        return []
    rows = len(grid)
    cell_w = (R - L) // GRID_COLS
    cell_h = (B - T) // rows
    templates = _build_templates(include_artifacts=include_artifacts)

    out: List[CellResult] = []
    for row in grid:
        y = row["rows"]
        for x in range(row["cols"]):
            cx0 = x * cell_w
            cy0 = y * cell_h
            cell = crop.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h))
            kind, value, rot, score = _classify(cell, templates, min_score=min_score)
            out.append(CellResult(f"{y}-{x}", y, x, kind, value, rot, score))
    return out


def recognize_file(
    path: str,
    bbox: Tuple[int, int, int, int],
    *,
    slot_num: int = 34,
    include_artifacts: bool = True,
    min_score: float = 0.55,
) -> List[CellResult]:
    img = Image.open(path)
    return recognize_image(
        img, bbox,
        slot_num=slot_num,
        include_artifacts=include_artifacts,
        min_score=min_score,
    )


def slab_values_from(results: List[CellResult]) -> List[str]:
    """Helper: just the slab values, ignoring artifacts/empty/unknown."""
    return [r.value for r in results if r.kind == "slab" and r.value]