441 lines
15 KiB
Python
441 lines
15 KiB
Python
"""APScheduler job definitions and FastAPI lifespan integration.
|
|
|
|
Defines four scheduled jobs:
|
|
- Data Collector (OHLCV fetch for all tickers)
|
|
- Sentiment Collector (sentiment for all tickers)
|
|
- Fundamental Collector (fundamentals for all tickers)
|
|
- R:R Scanner (trade setup scan for all tickers)
|
|
|
|
Each job processes tickers independently, logs errors as structured JSON,
|
|
handles rate limits by recording the last successful ticker, and checks
|
|
SystemSetting for enabled/disabled state.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from datetime import date, timedelta
|
|
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.config import settings
|
|
from app.database import async_session_factory
|
|
from app.models.settings import SystemSetting
|
|
from app.models.ticker import Ticker
|
|
from app.providers.alpaca import AlpacaOHLCVProvider
|
|
from app.providers.fmp import FMPFundamentalProvider
|
|
from app.providers.openai_sentiment import OpenAISentimentProvider
|
|
from app.services import fundamental_service, ingestion_service, sentiment_service
|
|
from app.services.rr_scanner_service import scan_all_tickers
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Module-level scheduler instance
|
|
scheduler = AsyncIOScheduler()
|
|
|
|
# Track last successful ticker per job for rate-limit resume
|
|
_last_successful: dict[str, str | None] = {
|
|
"data_collector": None,
|
|
"sentiment_collector": None,
|
|
"fundamental_collector": None,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _log_job_error(job_name: str, ticker: str, error: Exception) -> None:
|
|
"""Log a job error as structured JSON."""
|
|
logger.error(
|
|
json.dumps({
|
|
"event": "job_error",
|
|
"job": job_name,
|
|
"ticker": ticker,
|
|
"error_type": type(error).__name__,
|
|
"message": str(error),
|
|
})
|
|
)
|
|
|
|
|
|
async def _is_job_enabled(db: AsyncSession, job_name: str) -> bool:
|
|
"""Check SystemSetting for job enabled state. Defaults to True."""
|
|
key = f"job_{job_name}_enabled"
|
|
result = await db.execute(
|
|
select(SystemSetting).where(SystemSetting.key == key)
|
|
)
|
|
setting = result.scalar_one_or_none()
|
|
if setting is None:
|
|
return True
|
|
return setting.value.lower() == "true"
|
|
|
|
|
|
async def _get_all_tickers(db: AsyncSession) -> list[str]:
|
|
"""Return all tracked ticker symbols sorted alphabetically."""
|
|
result = await db.execute(select(Ticker.symbol).order_by(Ticker.symbol))
|
|
return list(result.scalars().all())
|
|
|
|
|
|
def _resume_tickers(symbols: list[str], job_name: str) -> list[str]:
|
|
"""Reorder tickers to resume after the last successful one (rate-limit resume).
|
|
|
|
If a previous run was rate-limited, start from the ticker after the last
|
|
successful one. Otherwise return the full list.
|
|
"""
|
|
last = _last_successful.get(job_name)
|
|
if last is None or last not in symbols:
|
|
return symbols
|
|
idx = symbols.index(last)
|
|
# Start from the next ticker, then wrap around
|
|
return symbols[idx + 1:] + symbols[:idx + 1]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Job: Data Collector (OHLCV)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def collect_ohlcv() -> None:
|
|
"""Fetch latest daily OHLCV for all tracked tickers.
|
|
|
|
Uses AlpacaOHLCVProvider. Processes each ticker independently.
|
|
On rate limit, records last successful ticker for resume.
|
|
"""
|
|
job_name = "data_collector"
|
|
logger.info(json.dumps({"event": "job_start", "job": job_name}))
|
|
|
|
async with async_session_factory() as db:
|
|
if not await _is_job_enabled(db, job_name):
|
|
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
|
|
return
|
|
|
|
symbols = await _get_all_tickers(db)
|
|
if not symbols:
|
|
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": 0}))
|
|
return
|
|
|
|
# Reorder for rate-limit resume
|
|
symbols = _resume_tickers(symbols, job_name)
|
|
|
|
# Build provider (skip if keys not configured)
|
|
if not settings.alpaca_api_key or not settings.alpaca_api_secret:
|
|
logger.warning(json.dumps({"event": "job_skipped", "job": job_name, "reason": "alpaca keys not configured"}))
|
|
return
|
|
|
|
try:
|
|
provider = AlpacaOHLCVProvider(settings.alpaca_api_key, settings.alpaca_api_secret)
|
|
except Exception as exc:
|
|
logger.error(json.dumps({"event": "job_error", "job": job_name, "error_type": type(exc).__name__, "message": str(exc)}))
|
|
return
|
|
|
|
end_date = date.today()
|
|
start_date = end_date - timedelta(days=5) # Fetch last 5 days to catch up
|
|
processed = 0
|
|
|
|
for symbol in symbols:
|
|
async with async_session_factory() as db:
|
|
try:
|
|
result = await ingestion_service.fetch_and_ingest(
|
|
db, provider, symbol, start_date=start_date, end_date=end_date,
|
|
)
|
|
_last_successful[job_name] = symbol
|
|
processed += 1
|
|
logger.info(json.dumps({
|
|
"event": "ticker_collected",
|
|
"job": job_name,
|
|
"ticker": symbol,
|
|
"status": result.status,
|
|
"records": result.records_ingested,
|
|
}))
|
|
if result.status == "partial":
|
|
# Rate limited — stop and resume next run
|
|
logger.warning(json.dumps({
|
|
"event": "rate_limited",
|
|
"job": job_name,
|
|
"ticker": symbol,
|
|
"processed": processed,
|
|
}))
|
|
return
|
|
except Exception as exc:
|
|
_log_job_error(job_name, symbol, exc)
|
|
|
|
# Reset resume pointer on full completion
|
|
_last_successful[job_name] = None
|
|
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": processed}))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Job: Sentiment Collector
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def collect_sentiment() -> None:
|
|
"""Fetch sentiment for all tracked tickers via OpenAI.
|
|
|
|
Processes each ticker independently. On rate limit, records last
|
|
successful ticker for resume.
|
|
"""
|
|
job_name = "sentiment_collector"
|
|
logger.info(json.dumps({"event": "job_start", "job": job_name}))
|
|
|
|
async with async_session_factory() as db:
|
|
if not await _is_job_enabled(db, job_name):
|
|
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
|
|
return
|
|
|
|
symbols = await _get_all_tickers(db)
|
|
if not symbols:
|
|
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": 0}))
|
|
return
|
|
|
|
symbols = _resume_tickers(symbols, job_name)
|
|
|
|
if not settings.openai_api_key:
|
|
logger.warning(json.dumps({"event": "job_skipped", "job": job_name, "reason": "openai key not configured"}))
|
|
return
|
|
|
|
try:
|
|
provider = OpenAISentimentProvider(settings.openai_api_key, settings.openai_model)
|
|
except Exception as exc:
|
|
logger.error(json.dumps({"event": "job_error", "job": job_name, "error_type": type(exc).__name__, "message": str(exc)}))
|
|
return
|
|
|
|
processed = 0
|
|
|
|
for symbol in symbols:
|
|
async with async_session_factory() as db:
|
|
try:
|
|
data = await provider.fetch_sentiment(symbol)
|
|
await sentiment_service.store_sentiment(
|
|
db,
|
|
symbol=symbol,
|
|
classification=data.classification,
|
|
confidence=data.confidence,
|
|
source=data.source,
|
|
timestamp=data.timestamp,
|
|
reasoning=data.reasoning,
|
|
citations=data.citations,
|
|
)
|
|
_last_successful[job_name] = symbol
|
|
processed += 1
|
|
logger.info(json.dumps({
|
|
"event": "ticker_collected",
|
|
"job": job_name,
|
|
"ticker": symbol,
|
|
"classification": data.classification,
|
|
"confidence": data.confidence,
|
|
}))
|
|
except Exception as exc:
|
|
msg = str(exc).lower()
|
|
if "rate" in msg or "quota" in msg or "429" in msg:
|
|
logger.warning(json.dumps({
|
|
"event": "rate_limited",
|
|
"job": job_name,
|
|
"ticker": symbol,
|
|
"processed": processed,
|
|
}))
|
|
return
|
|
_log_job_error(job_name, symbol, exc)
|
|
|
|
_last_successful[job_name] = None
|
|
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": processed}))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Job: Fundamental Collector
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def collect_fundamentals() -> None:
|
|
"""Fetch fundamentals for all tracked tickers via FMP.
|
|
|
|
Processes each ticker independently. On rate limit, records last
|
|
successful ticker for resume.
|
|
"""
|
|
job_name = "fundamental_collector"
|
|
logger.info(json.dumps({"event": "job_start", "job": job_name}))
|
|
|
|
async with async_session_factory() as db:
|
|
if not await _is_job_enabled(db, job_name):
|
|
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
|
|
return
|
|
|
|
symbols = await _get_all_tickers(db)
|
|
if not symbols:
|
|
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": 0}))
|
|
return
|
|
|
|
symbols = _resume_tickers(symbols, job_name)
|
|
|
|
if not settings.fmp_api_key:
|
|
logger.warning(json.dumps({"event": "job_skipped", "job": job_name, "reason": "fmp key not configured"}))
|
|
return
|
|
|
|
try:
|
|
provider = FMPFundamentalProvider(settings.fmp_api_key)
|
|
except Exception as exc:
|
|
logger.error(json.dumps({"event": "job_error", "job": job_name, "error_type": type(exc).__name__, "message": str(exc)}))
|
|
return
|
|
|
|
processed = 0
|
|
|
|
for symbol in symbols:
|
|
async with async_session_factory() as db:
|
|
try:
|
|
data = await provider.fetch_fundamentals(symbol)
|
|
await fundamental_service.store_fundamental(
|
|
db,
|
|
symbol=symbol,
|
|
pe_ratio=data.pe_ratio,
|
|
revenue_growth=data.revenue_growth,
|
|
earnings_surprise=data.earnings_surprise,
|
|
market_cap=data.market_cap,
|
|
unavailable_fields=data.unavailable_fields,
|
|
)
|
|
_last_successful[job_name] = symbol
|
|
processed += 1
|
|
logger.info(json.dumps({
|
|
"event": "ticker_collected",
|
|
"job": job_name,
|
|
"ticker": symbol,
|
|
}))
|
|
except Exception as exc:
|
|
msg = str(exc).lower()
|
|
if "rate" in msg or "429" in msg:
|
|
logger.warning(json.dumps({
|
|
"event": "rate_limited",
|
|
"job": job_name,
|
|
"ticker": symbol,
|
|
"processed": processed,
|
|
}))
|
|
return
|
|
_log_job_error(job_name, symbol, exc)
|
|
|
|
_last_successful[job_name] = None
|
|
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": processed}))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Job: R:R Scanner
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def scan_rr() -> None:
|
|
"""Scan all tickers for trade setups meeting the R:R threshold.
|
|
|
|
Uses rr_scanner_service.scan_all_tickers which already handles
|
|
per-ticker error isolation internally.
|
|
"""
|
|
job_name = "rr_scanner"
|
|
logger.info(json.dumps({"event": "job_start", "job": job_name}))
|
|
|
|
async with async_session_factory() as db:
|
|
if not await _is_job_enabled(db, job_name):
|
|
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
|
|
return
|
|
|
|
try:
|
|
setups = await scan_all_tickers(
|
|
db, rr_threshold=settings.default_rr_threshold,
|
|
)
|
|
logger.info(json.dumps({
|
|
"event": "job_complete",
|
|
"job": job_name,
|
|
"setups_found": len(setups),
|
|
}))
|
|
except Exception as exc:
|
|
logger.error(json.dumps({
|
|
"event": "job_error",
|
|
"job": job_name,
|
|
"error_type": type(exc).__name__,
|
|
"message": str(exc),
|
|
}))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Frequency helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_FREQUENCY_MAP: dict[str, dict[str, int]] = {
|
|
"hourly": {"hours": 1},
|
|
"daily": {"hours": 24},
|
|
}
|
|
|
|
|
|
def _parse_frequency(freq: str) -> dict[str, int]:
|
|
"""Convert a frequency string to APScheduler interval kwargs."""
|
|
return _FREQUENCY_MAP.get(freq.lower(), {"hours": 24})
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scheduler setup
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def configure_scheduler() -> None:
|
|
"""Add all jobs to the scheduler with configured intervals.
|
|
|
|
Call this once before scheduler.start(). Removes any existing jobs first
|
|
to ensure idempotency.
|
|
"""
|
|
scheduler.remove_all_jobs()
|
|
|
|
# Data Collector — configurable frequency (default: hourly)
|
|
ohlcv_interval = _parse_frequency(settings.data_collector_frequency)
|
|
scheduler.add_job(
|
|
collect_ohlcv,
|
|
"interval",
|
|
**ohlcv_interval,
|
|
id="data_collector",
|
|
name="Data Collector (OHLCV)",
|
|
replace_existing=True,
|
|
)
|
|
|
|
# Sentiment Collector — default 30 min
|
|
scheduler.add_job(
|
|
collect_sentiment,
|
|
"interval",
|
|
minutes=settings.sentiment_poll_interval_minutes,
|
|
id="sentiment_collector",
|
|
name="Sentiment Collector",
|
|
replace_existing=True,
|
|
)
|
|
|
|
# Fundamental Collector — configurable frequency (default: daily)
|
|
fund_interval = _parse_frequency(settings.fundamental_fetch_frequency)
|
|
scheduler.add_job(
|
|
collect_fundamentals,
|
|
"interval",
|
|
**fund_interval,
|
|
id="fundamental_collector",
|
|
name="Fundamental Collector",
|
|
replace_existing=True,
|
|
)
|
|
|
|
# R:R Scanner — configurable frequency (default: hourly)
|
|
rr_interval = _parse_frequency(settings.rr_scan_frequency)
|
|
scheduler.add_job(
|
|
scan_rr,
|
|
"interval",
|
|
**rr_interval,
|
|
id="rr_scanner",
|
|
name="R:R Scanner",
|
|
replace_existing=True,
|
|
)
|
|
|
|
logger.info(
|
|
json.dumps({
|
|
"event": "scheduler_configured",
|
|
"jobs": {
|
|
"data_collector": ohlcv_interval,
|
|
"sentiment_collector": {"minutes": settings.sentiment_poll_interval_minutes},
|
|
"fundamental_collector": fund_interval,
|
|
"rr_scanner": rr_interval,
|
|
},
|
|
})
|
|
)
|