- backend/app/nlp/finbert.py: snunlp/KR-FinBert-SC 어댑터. - score = P(pos) - P(neg) ∈ [-1, +1], label = argmax (neg/neu/pos) - 768d mean-pooled last hidden state → news.embedding (VECTOR) 저장 - settings.huggingface_token 인증, lazy singleton, cuda/cpu auto - backend/app/nlp/score_news.py: news 테이블에서 sentiment_score IS NULL 행을 배치 스코어 → UPDATE (... embedding=(:e)::vector). 종목 필터 + limit 옵션. - backend/app/db/migrations/002_sentiment_view.sql: v_sentiment_daily 뷰. 종목·KST 일별 n_articles, mean_score, pos/neg/neu_ratio, weighted_score (naver_finance 1.0 / google_rss 0.7 / dart 0.5). - backend/app/db/migrate.py: 이미 실행 중인 DB 에 새 SQL 마이그레이션 적용용 CLI. 모든 SQL 파일은 idempotent. - refresh_one.py: refresh 끝에 종목당 200건까지 finbert 스코어, finbert SourceStatus 를 RefreshReport 에 추가. - daily_batch.py: 모든 종목 처리 후 score_pending_news(limit=2000) 로 mop-up. 모델 캐시는 docker-compose hf_cache 볼륨(/root/.cache/huggingface). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
63 lines
2.0 KiB
Python
63 lines
2.0 KiB
Python
"""일별 배치: 16:00 KST 에 시드 10종목 + 거시 + 뉴스 + DART 갱신.
|
|
|
|
수동 실행:
|
|
python -m app.pipelines.daily_batch
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import time
|
|
from typing import Any
|
|
|
|
from app.fetch import macro as macro_mod
|
|
from app.nlp.score_news import score_pending_news
|
|
from app.pipelines.refresh_one import refresh_code
|
|
from app.seed.seed_tickers import SEED_TICKERS
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def run_daily_batch() -> dict[str, Any]:
|
|
start_ts = time.time()
|
|
reports: list[dict[str, Any]] = []
|
|
for t in SEED_TICKERS:
|
|
logger.info("daily_batch refresh %s %s", t.code, t.name)
|
|
rep = refresh_code(t.code, t.name, lookback_days=7)
|
|
reports.append(rep.to_dict())
|
|
|
|
macros = macro_mod.fetch_macro_daily(years=1)
|
|
macro_summary = [
|
|
{"key": m.key, "status": m.status(), "inserted": m.inserted,
|
|
"updated": m.updated, "error": m.error}
|
|
for m in macros
|
|
]
|
|
|
|
# 시드 종목 refresh 내에서 종목당 200건만 스코어함. 잔여(여러 소스 합쳐
|
|
# 200건 초과 또는 코드 매핑 안된 google_rss 등)는 여기서 한 번에 mop-up.
|
|
try:
|
|
mop = score_pending_news(limit=2000)
|
|
sentiment_summary: dict[str, Any] = {
|
|
"status": "ok" if mop.error is None else "failed",
|
|
"fetched": mop.fetched,
|
|
"scored": mop.scored,
|
|
"failed": mop.failed,
|
|
"error": mop.error,
|
|
}
|
|
except Exception as exc: # noqa: BLE001
|
|
sentiment_summary = {"status": "failed", "error": str(exc)}
|
|
|
|
elapsed = time.time() - start_ts
|
|
return {
|
|
"duration_seconds": round(elapsed, 2),
|
|
"tickers": reports,
|
|
"macro": macro_summary,
|
|
"sentiment_mop": sentiment_summary,
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
|
|
out = run_daily_batch()
|
|
print(json.dumps(out, ensure_ascii=False, indent=2, default=str))
|