sephiria_inv_program/sephiria_inv/recognizer.py

"""Cell-level recognition over the inventory grid.

Pipeline given a cropped inventory image:
  1. Slice into 6-col rows per generate_grid_config().
  2. Per cell, classify: empty / slab / artifact / unknown.
     - "empty"   = low std-dev / dark uniform pixels
     - "slab"    = best NCC match across all slabs × 4 rotations
     - "artifact"= best NCC match across all artifacts (no rotation)
     - "unknown" = nothing matched above the confidence floor →
                   likely a merged "?" slab box, surfaced to the user.

NCC (normalized cross-correlation) is used instead of MAE because it's
invariant to brightness/contrast shifts — the in-game render has subtle
shader effects (bloom, vignette) that MAE penalizes harshly.

Templates are fetched via renderer.fetch_slab_image / fetch_artifact_image
on first call and cached on disk.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple

import numpy as np
from PIL import Image

from .artifacts import ARTIFACTS
from .renderer import fetch_slab_image, fetch_artifact_image
from .slabs import GRID_COLS, SLABS, SLABS_BY_VALUE, generate_grid_config


# ---------- types ----------

@dataclass
class CellResult:
    slot_id: str          # "<row>-<col>"
    row: int
    col: int
    kind: str             # "empty" | "slab" | "artifact" | "unknown"
    value: Optional[str]  # slab/artifact value, or None
    rotation: int         # 0/1/2/3 for slabs; 0 otherwise
    score: float          # NCC in [-1, 1] — higher is better


# ---------- template prep ----------

_TEMPLATE_SIZE = 64  # work at 64x64 — small enough to be fast, big enough to discriminate


def _on_dark(img: Image.Image) -> Image.Image:
    """Composite a possibly-transparent template onto a dark bag-slot color."""
    if img.mode != "RGBA":
        return img.convert("RGB")
    bg = Image.new("RGBA", img.size, (38, 22, 42, 255))
    bg.alpha_composite(img)
    return bg.convert("RGB")


def _to_feat(img: Image.Image) -> np.ndarray:
    """Resize to fixed size, grayscale, mean-subtract, unit-normalize. Returns 1-D float vector."""
    g = img.convert("L").resize((_TEMPLATE_SIZE, _TEMPLATE_SIZE), Image.BILINEAR)
    a = np.asarray(g, dtype=np.float32).reshape(-1)
    a = a - a.mean()
    n = np.linalg.norm(a)
    if n < 1e-6:
        return a  # all zeros — uniform cell
    return a / n


@dataclass
class _Template:
    kind: str  # "slab" | "artifact"
    value: str
    rotation: int  # for slabs
    feat: np.ndarray


_TEMPLATE_CACHE: List[_Template] = []
_CACHE_BUILT = False


_LAST_LOAD_STATS: Dict[str, int] = {"slabs_ok": 0, "slabs_fail": 0,
                                    "artifacts_ok": 0, "artifacts_fail": 0}


def _build_templates(*, include_artifacts: bool = True) -> List[_Template]:
    """Build (and cache) the full template list. Lazy because download is slow."""
    global _CACHE_BUILT
    if _CACHE_BUILT and _TEMPLATE_CACHE:
        return _TEMPLATE_CACHE
    out: List[_Template] = []
    s_ok = s_fail = a_ok = a_fail = 0
    # Slabs: 4 rotations for rotatable, 1 otherwise
    for s in SLABS:
        img = fetch_slab_image(s.image)
        if img is None:
            s_fail += 1
            continue
        s_ok += 1
        base = _on_dark(img)
        rotations = (0, 1, 2, 3) if s.rotate else (0,)
        for r in rotations:
            rotated = base if r == 0 else base.rotate(-90 * r, expand=False)
            out.append(_Template("slab", s.value, r, _to_feat(rotated)))
    if include_artifacts:
        for a in ARTIFACTS:
            img = fetch_artifact_image(a.image)
            if img is None:
                a_fail += 1
                continue
            a_ok += 1
            base = _on_dark(img)
            out.append(_Template("artifact", a.value, 0, _to_feat(base)))
    _LAST_LOAD_STATS.update({"slabs_ok": s_ok, "slabs_fail": s_fail,
                             "artifacts_ok": a_ok, "artifacts_fail": a_fail})
    _TEMPLATE_CACHE.clear()
    _TEMPLATE_CACHE.extend(out)
    _CACHE_BUILT = True
    return _TEMPLATE_CACHE


def warm_templates(*, include_artifacts: bool = True) -> int:
    """Force-download all icons. Returns total template count.

    Call once from GUI before recognition to avoid stalls per cell.
    """
    return len(_build_templates(include_artifacts=include_artifacts))


def load_stats() -> Dict[str, int]:
    """Return last template load counts: slabs_ok, slabs_fail, artifacts_ok, artifacts_fail."""
    return dict(_LAST_LOAD_STATS)


# ---------- cell classification ----------

def _is_empty(cell: Image.Image) -> bool:
    """Heuristic: empty slots are uniform color (any brightness).

    Drops the dark-only assumption so HDR / bright-monitor captures with
    pinkish slot backgrounds still detect as empty. Uniformity is the
    actual invariant — empty slots have low std-dev whatever the hue.
    """
    g = np.asarray(cell.convert("L"), dtype=np.float32)
    rgb = np.asarray(cell.convert("RGB"), dtype=np.float32)
    chan_std = float(rgb.reshape(-1, 3).std(axis=0).mean())
    return bool(g.std() < 18.0 and chan_std < 22.0)


def _inset(cell: Image.Image, ratio: float = 0.16) -> Image.Image:
    """Trim decorative borders / corner badges before template matching.

    The in-game slot has chunky frame ornaments and a stack-count badge in
    a corner. Templates are clean icons. Cropping ~16% off every side
    aligns the comparable inner art and removes the badge area in most
    games.
    """
    w, h = cell.size
    dx = int(w * ratio)
    dy = int(h * ratio)
    return cell.crop((dx, dy, w - dx, h - dy))


def _classify(
    cell: Image.Image,
    templates: List[_Template],
    *,
    min_score: float = 0.35,
) -> Tuple[str, Optional[str], int, float]:
    """Return (kind, value, rotation, score)."""
    if _is_empty(cell):
        return "empty", None, 0, 1.0
    inner = _inset(cell)
    feat = _to_feat(inner)
    # Stack template features into a matrix for one big dot-product
    if not templates:
        return "unknown", None, 0, 0.0
    M = np.stack([t.feat for t in templates], axis=0)  # (N, D)
    scores = M @ feat  # NCC since both are mean-subtracted unit norm
    idx = int(np.argmax(scores))
    best = float(scores[idx])
    if best < min_score:
        return "unknown", None, 0, best
    t = templates[idx]
    return t.kind, t.value, t.rotation, best


def _classify_with_top(
    cell: Image.Image,
    templates: List[_Template],
    *,
    top_k: int = 3,
) -> Tuple[str, Optional[str], int, float, List[Tuple[str, str, int, float]]]:
    """Like _classify but also returns the top-k matches for debug dumps."""
    if _is_empty(cell):
        return "empty", None, 0, 1.0, []
    if not templates:
        return "unknown", None, 0, 0.0, []
    feat = _to_feat(_inset(cell))
    M = np.stack([t.feat for t in templates], axis=0)
    scores = M @ feat
    order = np.argsort(-scores)[:top_k]
    top = [(templates[i].kind, templates[i].value, templates[i].rotation,
            float(scores[i])) for i in order]
    kind, value, rot, score = _classify(cell, templates)
    return kind, value, rot, score, top


# ---------- public API ----------

def recognize_image(
    img: Image.Image,
    bbox: Tuple[int, int, int, int],
    *,
    slot_num: int = 34,
    include_artifacts: bool = True,
    min_score: float = 0.35,
) -> List[CellResult]:
    """Slice img[bbox] into a 6-col grid and classify each cell.

    bbox is in source-image pixel coords.
    """
    L, T, R, B = bbox
    crop = img.crop((L, T, R, B)).convert("RGB")
    grid = generate_grid_config(slot_num)
    if not grid:
        return []
    rows = len(grid)
    cell_w = (R - L) // GRID_COLS
    cell_h = (B - T) // rows
    templates = _build_templates(include_artifacts=include_artifacts)

    out: List[CellResult] = []
    for row in grid:
        y = row["rows"]
        for x in range(row["cols"]):
            cx0 = x * cell_w
            cy0 = y * cell_h
            cell = crop.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h))
            kind, value, rot, score = _classify(cell, templates, min_score=min_score)
            out.append(CellResult(f"{y}-{x}", y, x, kind, value, rot, score))
    return out


def dump_debug(
    img: Image.Image,
    bbox: Tuple[int, int, int, int],
    out_dir: str,
    *,
    slot_num: int = 34,
    include_artifacts: bool = True,
) -> str:
    """Save full screenshot, bbox crop, every cell crop and a top-3 match
    report to out_dir. Returns the path to the report file. Used to iterate
    on recognizer tuning from real captures.
    """
    import os
    os.makedirs(out_dir, exist_ok=True)
    img.save(os.path.join(out_dir, "screenshot.png"))
    L, T, R, B = bbox
    crop = img.crop((L, T, R, B)).convert("RGB")
    crop.save(os.path.join(out_dir, "bbox_crop.png"))
    grid = generate_grid_config(slot_num)
    if not grid:
        return out_dir
    rows = len(grid)
    cell_w = (R - L) // GRID_COLS
    cell_h = (B - T) // rows
    templates = _build_templates(include_artifacts=include_artifacts)
    stats = load_stats()
    lines = [
        f"bbox: {bbox}",
        f"grid: {len(grid)} rows x {GRID_COLS} cols, slot_num={slot_num}",
        f"cell px: {cell_w} x {cell_h}",
        f"templates loaded: total={len(templates)} stats={stats}",
        "",
    ]
    cells_dir = os.path.join(out_dir, "cells")
    os.makedirs(cells_dir, exist_ok=True)
    for row in grid:
        y = row["rows"]
        for x in range(row["cols"]):
            cx0 = x * cell_w
            cy0 = y * cell_h
            cell = crop.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h))
            cell.save(os.path.join(cells_dir, f"{y}-{x}.png"))
            kind, value, rot, score, top = _classify_with_top(cell, templates)
            top_s = ", ".join(f"{k}:{v}@r{r}={s:.3f}" for k, v, r, s in top)
            lines.append(
                f"  {y}-{x}: kind={kind} value={value} rot={rot} score={score:.3f} | top: {top_s}"
            )
    report = os.path.join(out_dir, "report.txt")
    with open(report, "w", encoding="utf-8") as fh:
        fh.write("\n".join(lines))
    return report


def recognize_file(
    path: str,
    bbox: Tuple[int, int, int, int],
    *,
    slot_num: int = 34,
    include_artifacts: bool = True,
    min_score: float = 0.55,
) -> List[CellResult]:
    img = Image.open(path)
    return recognize_image(
        img, bbox,
        slot_num=slot_num,
        include_artifacts=include_artifacts,
        min_score=min_score,
    )


def slab_values_from(results: List[CellResult]) -> List[str]:
    """Helper: just the slab values, ignoring artifacts/empty/unknown."""
    return [r.value for r in results if r.kind == "slab" and r.value]