feat(phase-1a): external data fetchers + refresh pipeline + scheduler
10종목 시드 + pykrx OHLCV / 외인·기관 거래대금, KIS read-only EOD, OpenDART
공시, 네이버 금융 뉴스 스크레이퍼, 구글 뉴스 RSS, yfinance 거시(KOSPI/KOSDAQ/
USDKRW/US10Y) fetcher 를 추가하고 refresh_one / daily_batch / backfill /
APScheduler(16:00 KST) 파이프라인으로 묶음.
- backend/app/seed: 10종목 시드 (대형/고변동/테마/플랫폼/방어)
- backend/app/fetch: pykrx, kis, dart, news, macro, symbols_seed
- backend/app/pipelines: refresh_one, daily_batch, backfill(CLI), scheduler
- backend/app/api/refresh.py: POST /api/refresh/{code}?lookback_days=N
- backend/app/main.py: lifespan 으로 스케줄러 기동/종료, /health/keys 추가
- README: .env 복사 안내 보강
스모크 테스트 (실제 키 사용) 결과:
KIS token : ok (token 346자 발급)
KIS daily : 005930 11rows
DART list : 005930 30일 10건
Naver news : 005930 12건
Google RSS : "삼성전자" 92건
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
147
backend/app/pipelines/refresh_one.py
Normal file
147
backend/app/pipelines/refresh_one.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""한 종목에 대해 모든 소스를 갱신 + 구조화된 status 리턴.
|
||||
|
||||
POST /api/refresh/{code} 와 daily_batch 둘 다 이 함수를 호출.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import date, timedelta
|
||||
from typing import Any
|
||||
|
||||
from app.fetch import dart as dart_mod
|
||||
from app.fetch import kis as kis_mod
|
||||
from app.fetch import news as news_mod
|
||||
from app.fetch import pykrx_helper
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceStatus:
|
||||
status: str # 'ok' / 'skipped_missing_key' / 'failed'
|
||||
inserted: int = 0
|
||||
updated: int = 0
|
||||
skipped: int = 0
|
||||
extra: dict[str, Any] = field(default_factory=dict)
|
||||
error: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RefreshReport:
|
||||
code: str
|
||||
pykrx_ohlcv: SourceStatus
|
||||
pykrx_trading_value: SourceStatus
|
||||
kis_daily: SourceStatus
|
||||
dart: SourceStatus
|
||||
naver_news: SourceStatus
|
||||
google_rss: SourceStatus
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
out: dict[str, Any] = {"code": self.code}
|
||||
for f in (
|
||||
"pykrx_ohlcv",
|
||||
"pykrx_trading_value",
|
||||
"kis_daily",
|
||||
"dart",
|
||||
"naver_news",
|
||||
"google_rss",
|
||||
):
|
||||
v: SourceStatus = getattr(self, f)
|
||||
out[f] = asdict(v)
|
||||
return out
|
||||
|
||||
|
||||
def _pykrx_ohlcv(code: str, start: date, end: date) -> SourceStatus:
|
||||
try:
|
||||
res = pykrx_helper.fetch_ohlcv_daily(code, start, end)
|
||||
return SourceStatus(
|
||||
status=res.status(),
|
||||
inserted=res.inserted,
|
||||
updated=res.updated,
|
||||
error=res.error,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return SourceStatus(status="failed", error=str(exc))
|
||||
|
||||
|
||||
def _pykrx_trading(code: str, start: date, end: date) -> SourceStatus:
|
||||
try:
|
||||
res = pykrx_helper.fetch_trading_value(code, start, end)
|
||||
return SourceStatus(
|
||||
status=res.status(),
|
||||
inserted=res.inserted,
|
||||
updated=res.updated,
|
||||
error=res.error,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return SourceStatus(status="failed", error=str(exc))
|
||||
|
||||
|
||||
def _kis(code: str, start: date, end: date) -> SourceStatus:
|
||||
"""KIS read-only EOD. 실제 DB 적재는 하지 않고 sanity 호출 + sample row 수만 리포트.
|
||||
pykrx 와 중복 데이터이므로 KIS 는 백업/실시간 용도이고, 일별 적재는 pykrx 가 1차.
|
||||
"""
|
||||
try:
|
||||
rows = kis_mod.fetch_daily_price(code, start, end)
|
||||
return SourceStatus(status="ok", extra={"sample_rows": len(rows)})
|
||||
except kis_mod.SkippedMissingKey:
|
||||
return SourceStatus(status="skipped_missing_key")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return SourceStatus(status="failed", error=str(exc))
|
||||
|
||||
|
||||
def _dart(code: str, start: date, end: date) -> SourceStatus:
|
||||
try:
|
||||
items = dart_mod.fetch_disclosures(code, start, end)
|
||||
# 공시는 news 테이블에 upsert
|
||||
news_items = [
|
||||
news_mod.NewsItem(
|
||||
code=d.code,
|
||||
source="dart",
|
||||
title=d.title,
|
||||
url=d.url,
|
||||
published_at=d.published_at,
|
||||
)
|
||||
for d in items
|
||||
]
|
||||
ins, skip = news_mod.upsert_news(news_items)
|
||||
return SourceStatus(status="ok", inserted=ins, skipped=skip, extra={"fetched": len(items)})
|
||||
except dart_mod.SkippedMissingKey:
|
||||
return SourceStatus(status="skipped_missing_key")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return SourceStatus(status="failed", error=str(exc))
|
||||
|
||||
|
||||
def _naver_news(code: str) -> SourceStatus:
|
||||
try:
|
||||
items = news_mod.fetch_naver_finance_news(code, max_pages=1)
|
||||
ins, skip = news_mod.upsert_news(items)
|
||||
return SourceStatus(status="ok", inserted=ins, skipped=skip, extra={"fetched": len(items)})
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return SourceStatus(status="failed", error=str(exc))
|
||||
|
||||
|
||||
def _google_rss(code: str, name: str) -> SourceStatus:
|
||||
try:
|
||||
query = name or code
|
||||
items = news_mod.fetch_google_news_rss(query, code=code)
|
||||
ins, skip = news_mod.upsert_news(items)
|
||||
return SourceStatus(status="ok", inserted=ins, skipped=skip, extra={"fetched": len(items)})
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return SourceStatus(status="failed", error=str(exc))
|
||||
|
||||
|
||||
def refresh_code(code: str, name: str, *, lookback_days: int = 7) -> RefreshReport:
|
||||
"""단기 갱신 (daily_batch 용). 최근 lookback_days 만 가져온다."""
|
||||
end = date.today()
|
||||
start = end - timedelta(days=lookback_days)
|
||||
return RefreshReport(
|
||||
code=code,
|
||||
pykrx_ohlcv=_pykrx_ohlcv(code, start, end),
|
||||
pykrx_trading_value=_pykrx_trading(code, start, end),
|
||||
kis_daily=_kis(code, start, end),
|
||||
dart=_dart(code, start, end),
|
||||
naver_news=_naver_news(code),
|
||||
google_rss=_google_rss(code, name),
|
||||
)
|
||||
Reference in New Issue
Block a user