feat(phase-1a): external data fetchers + refresh pipeline + scheduler

10종목 시드 + pykrx OHLCV / 외인·기관 거래대금, KIS read-only EOD, OpenDART
공시, 네이버 금융 뉴스 스크레이퍼, 구글 뉴스 RSS, yfinance 거시(KOSPI/KOSDAQ/
USDKRW/US10Y) fetcher 를 추가하고 refresh_one / daily_batch / backfill /
APScheduler(16:00 KST) 파이프라인으로 묶음.

- backend/app/seed: 10종목 시드 (대형/고변동/테마/플랫폼/방어)
- backend/app/fetch: pykrx, kis, dart, news, macro, symbols_seed
- backend/app/pipelines: refresh_one, daily_batch, backfill(CLI), scheduler
- backend/app/api/refresh.py: POST /api/refresh/{code}?lookback_days=N
- backend/app/main.py: lifespan 으로 스케줄러 기동/종료, /health/keys 추가
- README: .env 복사 안내 보강

스모크 테스트 (실제 키 사용) 결과:
  KIS token  : ok (token 346자 발급)
  KIS daily  : 005930 11rows
  DART list  : 005930 30일 10건
  Naver news : 005930 12건
  Google RSS : "삼성전자" 92건

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
tkrmagid
2026-05-20 15:43:18 +09:00
parent cacddf5adf
commit 56f73a1f12
15 changed files with 1203 additions and 7 deletions

View File

@@ -0,0 +1,99 @@
"""KRX 전 종목 리스트를 symbols 테이블에 시드한다.
검색 UX 가 KRX 전체 종목명을 대상으로 동작해야 하므로 전 종목을 미리 적재한다.
10 개 SEED_TICKERS 는 is_seed=TRUE 로 마크.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from sqlalchemy import text
from app.db.connection import get_engine
from app.seed.seed_tickers import SEED_CODES, SEED_TICKERS
logger = logging.getLogger(__name__)
@dataclass
class SeedReport:
inserted: int
updated: int
seed_marked: int
markets: dict[str, int]
def _fetch_market_listing(market: str) -> list[tuple[str, str]]:
"""pykrx 로 한 시장의 (code, name) 목록을 가져온다.
pykrx 가 외부 KRX 서버에 의존하므로 호출 측에서 예외 처리한다.
"""
from pykrx import stock as krx # local import: heavy import
tickers = krx.get_market_ticker_list(market=market)
out: list[tuple[str, str]] = []
for code in tickers:
name = krx.get_market_ticker_name(code) or ""
if not name:
continue
out.append((code, name))
return out
def seed_symbols() -> SeedReport:
"""KOSPI + KOSDAQ 전 종목을 upsert. SEED 10 종목은 is_seed=TRUE."""
rows: list[tuple[str, str, str]] = [] # (code, name, market)
market_counts: dict[str, int] = {}
for market in ("KOSPI", "KOSDAQ"):
listing = _fetch_market_listing(market)
market_counts[market] = len(listing)
for code, name in listing:
rows.append((code, name, market))
engine = get_engine()
inserted = updated = 0
seed_marked = 0
with engine.begin() as conn:
for code, name, market in rows:
is_seed = code in SEED_CODES
res = conn.execute(
text(
"""
INSERT INTO symbols (code, name, market, is_seed)
VALUES (:code, :name, :market, :is_seed)
ON CONFLICT (code) DO UPDATE
SET name = EXCLUDED.name,
market = EXCLUDED.market,
is_seed = symbols.is_seed OR EXCLUDED.is_seed
RETURNING (xmax = 0) AS inserted
"""
),
{"code": code, "name": name, "market": market, "is_seed": is_seed},
)
row = res.first()
if row and row[0]:
inserted += 1
else:
updated += 1
if is_seed:
seed_marked += 1
# SEED_TICKERS 중 KRX 리스팅에 없으면 (상장폐지 등) 그래도 명시적으로 시드 row 보장
for t in SEED_TICKERS:
conn.execute(
text(
"""
INSERT INTO symbols (code, name, market, is_seed)
VALUES (:code, :name, :market, TRUE)
ON CONFLICT (code) DO UPDATE SET is_seed = TRUE
"""
),
{"code": t.code, "name": t.name, "market": t.market},
)
logger.info(
"seed_symbols done: inserted=%d updated=%d seed_marked=%d markets=%s",
inserted, updated, seed_marked, market_counts,
)
return SeedReport(inserted=inserted, updated=updated, seed_marked=seed_marked, markets=market_counts)