"""OpenDART 공시 본문 수집. 키 없으면 SkippedMissingKey, 호출 측에서 skipped_missing_key 로 매핑. 사용 endpoint: - GET https://opendart.fss.or.kr/api/list.json : 공시 목록 (기간/종목별) - GET https://opendart.fss.or.kr/api/document.xml : 공시 원문 (zip in xml wrapper) 여기서는 list.json만 받아서 title + url 만 저장. 본문 다운로드는 후속 단계에서 선택적으로 추가 (용량/속도 트레이드오프). """ from __future__ import annotations import logging from dataclasses import dataclass from datetime import date, datetime from typing import Any import httpx from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential from app.config import settings logger = logging.getLogger(__name__) DART_BASE = "https://opendart.fss.or.kr/api" USER_AGENT = "stock_chart_site/0.1 (+personal)" class SkippedMissingKey(RuntimeError): pass @dataclass class Disclosure: code: str # corp 종목코드 (있을 경우) title: str url: str published_at: datetime def _has_key() -> bool: return bool(settings.dart_api_key) def _stock_code_to_corp_code_cache() -> dict[str, str]: """종목코드(6자리) -> corp_code(8자리) 매핑. 실제 운영에서는 corpCode.xml.zip 을 받아서 캐시해야 하지만, list.json 은 종목코드(stock_code)도 직접 받기 때문에 우선 빈 매핑으로 둔다. """ return {} @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8), retry=retry_if_exception_type((httpx.HTTPError, httpx.TimeoutException)), reraise=True, ) def fetch_disclosures( code: str, start: date, end: date | None = None, *, page_count: int = 100, ) -> list[Disclosure]: """종목별 공시 목록 가져오기.""" if not _has_key(): raise SkippedMissingKey("dart api_key missing") end = end or date.today() params = { "crtfc_key": settings.dart_api_key, "bgn_de": start.strftime("%Y%m%d"), "end_de": end.strftime("%Y%m%d"), "page_count": str(page_count), "stock_code": code, } with httpx.Client(timeout=15.0, headers={"User-Agent": USER_AGENT}) as cli: resp = cli.get(f"{DART_BASE}/list.json", params=params) resp.raise_for_status() data = resp.json() status = data.get("status") # status: '000' OK, '013' no data, '020' rate limit, etc if status == "013": return [] if status and status != "000": raise RuntimeError(f"dart error: status={status} msg={data.get('message')}") out: list[Disclosure] = [] for row in data.get("list", []) or []: rcept_no = row.get("rcept_no") if not rcept_no: continue title = (row.get("report_nm") or "").strip() url = f"https://dart.fss.or.kr/dsaf001/main.do?rcpNo={rcept_no}" try: published_at = datetime.strptime(row.get("rcept_dt", ""), "%Y%m%d") except ValueError: continue out.append( Disclosure( code=row.get("stock_code") or code, title=title, url=url, published_at=published_at, ) ) return out def ping() -> dict[str, Any]: if not _has_key(): return {"status": "skipped_missing_key"} try: # 어제 기준 1건만 조회 (sanity) from datetime import timedelta items = fetch_disclosures("005930", date.today() - timedelta(days=30), date.today(), page_count=1) return {"status": "ok", "sample_count": len(items)} except SkippedMissingKey: return {"status": "skipped_missing_key"} except Exception as exc: # noqa: BLE001 return {"status": "failed", "error": str(exc)}