v0.3.0: game-window picker + NCC recognition + artifacts + ?-merged

- window_capture.py: enumerate top-level windows (pygetwindow) and
  capture a specific one via PrintWindow PW_RENDERFULLCONTENT (works
  on non-focused windows). Linux falls back to mss region grab.
- recognizer.py: replace MAE matcher with NCC over numpy vectors.
  Each rotatable slab generates 4 templates (0/90/180/270). Adds 248
  artifact templates and an empty-cell heuristic (low mean/std-dev).
  Cells below confidence floor are tagged "unknown" — likely merged
  "?" boxes.
- gui.py: new ScreenshotFrame with [게임 창 선택] button → window
  picker dialog → bbox crop → recognize → editable preview grid with
  per-cell CellEditor that handles slab / artifact / merged(?) / empty.
  Merged cells let user pick which two slabs got combined + a level.
- artifacts.py + bundled _artifacts.json (248 entries from
  WhiteDog1004/sephiria) for matching and rendering.
- renderer.py: factored CDN fetch into _fetch_image; added
  fetch_artifact_image().
- requirements.txt: + numpy, pygetwindow (Win), pywin32 (Win).
- docker-build-cmd.sh: upgrade PyInstaller to 5.x inside cdrx
  container so numpy DLL manifest reads work.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Claude
2026-05-14 09:36:49 +09:00
parent e388c965bc
commit 2e23ad5d2f
9 changed files with 4878 additions and 358 deletions

View File

@@ -1,122 +1,42 @@
"""Recognize slabs from a screenshot of the in-game inventory.
"""Backward-compatible thin wrapper over the new recognizer.
Approach: template matching against the cached CDN images. Given a screenshot
and the inventory bounding box, we divide it into a grid and compare each cell
against every slab template (resized to the cell). Mean absolute error in RGB
picks the best match; cells above a threshold are treated as empty.
The old API exposed `Recognition` (slot_id, value, score) and `recognize()`
returning slabs only. Existing CLI code (`__main__.py`) and tests use that
surface, so we keep it working by delegating to recognizer.py.
This is a best-effort fallback. Accuracy depends heavily on the screenshot
resolution and the slab images matching the in-game render style. The CDN
images are the same pixel-art assets the game uses, so accuracy is usually
fine when the screenshot is sharp.
New code should call `recognizer.recognize_image()` / `recognize_file()`
directly for richer (kind, rotation, artifact) results.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import List, Optional, Tuple
from PIL import Image
from .renderer import fetch_slab_image
from .slabs import GRID_COLS, SLABS, generate_grid_config
from .recognizer import recognize_file
@dataclass
class Recognition:
slot_id: str
value: Optional[str] # None = empty
score: float # lower = better match
def _mae(a: Image.Image, b: Image.Image) -> float:
"""Mean absolute error in RGB. Both images must be the same size."""
if a.size != b.size:
b = b.resize(a.size)
a_rgb = a.convert("RGB")
b_rgb = b.convert("RGB")
pa = list(a_rgb.getdata())
pb = list(b_rgb.getdata())
n = len(pa)
if n == 0:
return 1e9
total = 0
for (ar, ag, ab), (br, bg, bb) in zip(pa, pb):
total += abs(ar - br) + abs(ag - bg) + abs(ab - bb)
return total / (n * 3)
def _alpha_composite_on_dark(img: Image.Image) -> Image.Image:
"""Slab templates are RGBA on transparent. Composite onto dark BG for fairer compare."""
if img.mode != "RGBA":
return img.convert("RGB")
bg = Image.new("RGBA", img.size, (50, 30, 50, 255))
bg.alpha_composite(img)
return bg.convert("RGB")
value: Optional[str] # slab value, or None if empty/unknown/artifact
score: float # NCC score in [-1, 1]; higher = better
def recognize(
screenshot_path: str,
bbox: Tuple[int, int, int, int],
slot_num: int = 34,
empty_threshold: float = 35.0,
empty_threshold: float = 35.0, # ignored; kept for arg-compat
) -> List[Recognition]:
"""Recognize slabs in the inventory area of a screenshot.
Args:
screenshot_path: Path to the game screenshot (PNG/JPG).
bbox: (left, top, right, bottom) pixel coords of the inventory grid.
Must enclose only the slot grid, not the surrounding UI.
slot_num: Total slot count (18..60). Used to compute row layout.
empty_threshold: MAE above this counts as empty.
Returns:
List of Recognition entries, one per slot in row-major order.
"""
img = Image.open(screenshot_path).convert("RGB")
left, top, right, bottom = bbox
img = img.crop((left, top, right, bottom))
grid = generate_grid_config(slot_num)
if not grid:
return []
rows = len(grid)
cell_w = (right - left) // GRID_COLS
cell_h = (bottom - top) // rows
template_size = (min(cell_w, cell_h), min(cell_w, cell_h))
# Pre-load and downscale templates
templates: List[Tuple[str, Image.Image]] = []
for slab in SLABS:
t = fetch_slab_image(slab.image)
if t is None:
continue
t = _alpha_composite_on_dark(t).resize(template_size)
templates.append((slab.value, t))
results: List[Recognition] = []
for row in grid:
y = row["rows"]
for x in range(row["cols"]):
cx0 = x * cell_w
cy0 = y * cell_h
cell = img.crop((cx0, cy0, cx0 + cell_w, cy0 + cell_h)).resize(template_size)
best_value: Optional[str] = None
best_score = 1e9
for v, t in templates:
s = _mae(cell, t)
if s < best_score:
best_score = s
best_value = v
if best_score > empty_threshold:
results.append(Recognition(f"{y}-{x}", None, best_score))
else:
results.append(Recognition(f"{y}-{x}", best_value, best_score))
return results
"""Recognize slabs in the inventory area of a screenshot (slabs only)."""
cells = recognize_file(screenshot_path, bbox, slot_num=slot_num)
out: List[Recognition] = []
for c in cells:
v = c.value if c.kind == "slab" else None
out.append(Recognition(c.slot_id, v, c.score))
return out
def recognized_values(recognitions: List[Recognition]) -> List[str]:
"""Helper: extract just the non-empty slab values."""
return [r.value for r in recognitions if r.value is not None]