Files
signal-platform/app/scheduler.py
Dennis Thiessen 61ab24490d
Some checks failed
Deploy / lint (push) Failing after 7s
Deploy / test (push) Has been skipped
Deploy / deploy (push) Has been skipped
first commit
2026-02-20 17:31:01 +01:00

438 lines
15 KiB
Python

"""APScheduler job definitions and FastAPI lifespan integration.
Defines four scheduled jobs:
- Data Collector (OHLCV fetch for all tickers)
- Sentiment Collector (sentiment for all tickers)
- Fundamental Collector (fundamentals for all tickers)
- R:R Scanner (trade setup scan for all tickers)
Each job processes tickers independently, logs errors as structured JSON,
handles rate limits by recording the last successful ticker, and checks
SystemSetting for enabled/disabled state.
"""
from __future__ import annotations
import json
import logging
from datetime import date, timedelta
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.database import async_session_factory
from app.models.settings import SystemSetting
from app.models.ticker import Ticker
from app.providers.alpaca import AlpacaOHLCVProvider
from app.providers.fmp import FMPFundamentalProvider
from app.providers.gemini_sentiment import GeminiSentimentProvider
from app.services import fundamental_service, ingestion_service, sentiment_service
from app.services.rr_scanner_service import scan_all_tickers
logger = logging.getLogger(__name__)
# Module-level scheduler instance
scheduler = AsyncIOScheduler()
# Track last successful ticker per job for rate-limit resume
_last_successful: dict[str, str | None] = {
"data_collector": None,
"sentiment_collector": None,
"fundamental_collector": None,
}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _log_job_error(job_name: str, ticker: str, error: Exception) -> None:
"""Log a job error as structured JSON."""
logger.error(
json.dumps({
"event": "job_error",
"job": job_name,
"ticker": ticker,
"error_type": type(error).__name__,
"message": str(error),
})
)
async def _is_job_enabled(db: AsyncSession, job_name: str) -> bool:
"""Check SystemSetting for job enabled state. Defaults to True."""
key = f"job_{job_name}_enabled"
result = await db.execute(
select(SystemSetting).where(SystemSetting.key == key)
)
setting = result.scalar_one_or_none()
if setting is None:
return True
return setting.value.lower() == "true"
async def _get_all_tickers(db: AsyncSession) -> list[str]:
"""Return all tracked ticker symbols sorted alphabetically."""
result = await db.execute(select(Ticker.symbol).order_by(Ticker.symbol))
return list(result.scalars().all())
def _resume_tickers(symbols: list[str], job_name: str) -> list[str]:
"""Reorder tickers to resume after the last successful one (rate-limit resume).
If a previous run was rate-limited, start from the ticker after the last
successful one. Otherwise return the full list.
"""
last = _last_successful.get(job_name)
if last is None or last not in symbols:
return symbols
idx = symbols.index(last)
# Start from the next ticker, then wrap around
return symbols[idx + 1:] + symbols[:idx + 1]
# ---------------------------------------------------------------------------
# Job: Data Collector (OHLCV)
# ---------------------------------------------------------------------------
async def collect_ohlcv() -> None:
"""Fetch latest daily OHLCV for all tracked tickers.
Uses AlpacaOHLCVProvider. Processes each ticker independently.
On rate limit, records last successful ticker for resume.
"""
job_name = "data_collector"
logger.info(json.dumps({"event": "job_start", "job": job_name}))
async with async_session_factory() as db:
if not await _is_job_enabled(db, job_name):
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
return
symbols = await _get_all_tickers(db)
if not symbols:
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": 0}))
return
# Reorder for rate-limit resume
symbols = _resume_tickers(symbols, job_name)
# Build provider (skip if keys not configured)
if not settings.alpaca_api_key or not settings.alpaca_api_secret:
logger.warning(json.dumps({"event": "job_skipped", "job": job_name, "reason": "alpaca keys not configured"}))
return
try:
provider = AlpacaOHLCVProvider(settings.alpaca_api_key, settings.alpaca_api_secret)
except Exception as exc:
logger.error(json.dumps({"event": "job_error", "job": job_name, "error_type": type(exc).__name__, "message": str(exc)}))
return
end_date = date.today()
start_date = end_date - timedelta(days=5) # Fetch last 5 days to catch up
processed = 0
for symbol in symbols:
async with async_session_factory() as db:
try:
result = await ingestion_service.fetch_and_ingest(
db, provider, symbol, start_date=start_date, end_date=end_date,
)
_last_successful[job_name] = symbol
processed += 1
logger.info(json.dumps({
"event": "ticker_collected",
"job": job_name,
"ticker": symbol,
"status": result.status,
"records": result.records_ingested,
}))
if result.status == "partial":
# Rate limited — stop and resume next run
logger.warning(json.dumps({
"event": "rate_limited",
"job": job_name,
"ticker": symbol,
"processed": processed,
}))
return
except Exception as exc:
_log_job_error(job_name, symbol, exc)
# Reset resume pointer on full completion
_last_successful[job_name] = None
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": processed}))
# ---------------------------------------------------------------------------
# Job: Sentiment Collector
# ---------------------------------------------------------------------------
async def collect_sentiment() -> None:
"""Fetch sentiment for all tracked tickers via Gemini.
Processes each ticker independently. On rate limit, records last
successful ticker for resume.
"""
job_name = "sentiment_collector"
logger.info(json.dumps({"event": "job_start", "job": job_name}))
async with async_session_factory() as db:
if not await _is_job_enabled(db, job_name):
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
return
symbols = await _get_all_tickers(db)
if not symbols:
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": 0}))
return
symbols = _resume_tickers(symbols, job_name)
if not settings.gemini_api_key:
logger.warning(json.dumps({"event": "job_skipped", "job": job_name, "reason": "gemini key not configured"}))
return
try:
provider = GeminiSentimentProvider(settings.gemini_api_key, settings.gemini_model)
except Exception as exc:
logger.error(json.dumps({"event": "job_error", "job": job_name, "error_type": type(exc).__name__, "message": str(exc)}))
return
processed = 0
for symbol in symbols:
async with async_session_factory() as db:
try:
data = await provider.fetch_sentiment(symbol)
await sentiment_service.store_sentiment(
db,
symbol=symbol,
classification=data.classification,
confidence=data.confidence,
source=data.source,
timestamp=data.timestamp,
)
_last_successful[job_name] = symbol
processed += 1
logger.info(json.dumps({
"event": "ticker_collected",
"job": job_name,
"ticker": symbol,
"classification": data.classification,
"confidence": data.confidence,
}))
except Exception as exc:
msg = str(exc).lower()
if "rate" in msg or "quota" in msg or "429" in msg:
logger.warning(json.dumps({
"event": "rate_limited",
"job": job_name,
"ticker": symbol,
"processed": processed,
}))
return
_log_job_error(job_name, symbol, exc)
_last_successful[job_name] = None
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": processed}))
# ---------------------------------------------------------------------------
# Job: Fundamental Collector
# ---------------------------------------------------------------------------
async def collect_fundamentals() -> None:
"""Fetch fundamentals for all tracked tickers via FMP.
Processes each ticker independently. On rate limit, records last
successful ticker for resume.
"""
job_name = "fundamental_collector"
logger.info(json.dumps({"event": "job_start", "job": job_name}))
async with async_session_factory() as db:
if not await _is_job_enabled(db, job_name):
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
return
symbols = await _get_all_tickers(db)
if not symbols:
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": 0}))
return
symbols = _resume_tickers(symbols, job_name)
if not settings.fmp_api_key:
logger.warning(json.dumps({"event": "job_skipped", "job": job_name, "reason": "fmp key not configured"}))
return
try:
provider = FMPFundamentalProvider(settings.fmp_api_key)
except Exception as exc:
logger.error(json.dumps({"event": "job_error", "job": job_name, "error_type": type(exc).__name__, "message": str(exc)}))
return
processed = 0
for symbol in symbols:
async with async_session_factory() as db:
try:
data = await provider.fetch_fundamentals(symbol)
await fundamental_service.store_fundamental(
db,
symbol=symbol,
pe_ratio=data.pe_ratio,
revenue_growth=data.revenue_growth,
earnings_surprise=data.earnings_surprise,
market_cap=data.market_cap,
)
_last_successful[job_name] = symbol
processed += 1
logger.info(json.dumps({
"event": "ticker_collected",
"job": job_name,
"ticker": symbol,
}))
except Exception as exc:
msg = str(exc).lower()
if "rate" in msg or "429" in msg:
logger.warning(json.dumps({
"event": "rate_limited",
"job": job_name,
"ticker": symbol,
"processed": processed,
}))
return
_log_job_error(job_name, symbol, exc)
_last_successful[job_name] = None
logger.info(json.dumps({"event": "job_complete", "job": job_name, "tickers": processed}))
# ---------------------------------------------------------------------------
# Job: R:R Scanner
# ---------------------------------------------------------------------------
async def scan_rr() -> None:
"""Scan all tickers for trade setups meeting the R:R threshold.
Uses rr_scanner_service.scan_all_tickers which already handles
per-ticker error isolation internally.
"""
job_name = "rr_scanner"
logger.info(json.dumps({"event": "job_start", "job": job_name}))
async with async_session_factory() as db:
if not await _is_job_enabled(db, job_name):
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
return
try:
setups = await scan_all_tickers(
db, rr_threshold=settings.default_rr_threshold,
)
logger.info(json.dumps({
"event": "job_complete",
"job": job_name,
"setups_found": len(setups),
}))
except Exception as exc:
logger.error(json.dumps({
"event": "job_error",
"job": job_name,
"error_type": type(exc).__name__,
"message": str(exc),
}))
# ---------------------------------------------------------------------------
# Frequency helpers
# ---------------------------------------------------------------------------
_FREQUENCY_MAP: dict[str, dict[str, int]] = {
"hourly": {"hours": 1},
"daily": {"hours": 24},
}
def _parse_frequency(freq: str) -> dict[str, int]:
"""Convert a frequency string to APScheduler interval kwargs."""
return _FREQUENCY_MAP.get(freq.lower(), {"hours": 24})
# ---------------------------------------------------------------------------
# Scheduler setup
# ---------------------------------------------------------------------------
def configure_scheduler() -> None:
"""Add all jobs to the scheduler with configured intervals.
Call this once before scheduler.start(). Removes any existing jobs first
to ensure idempotency.
"""
scheduler.remove_all_jobs()
# Data Collector — configurable frequency (default: hourly)
ohlcv_interval = _parse_frequency(settings.data_collector_frequency)
scheduler.add_job(
collect_ohlcv,
"interval",
**ohlcv_interval,
id="data_collector",
name="Data Collector (OHLCV)",
replace_existing=True,
)
# Sentiment Collector — default 30 min
scheduler.add_job(
collect_sentiment,
"interval",
minutes=settings.sentiment_poll_interval_minutes,
id="sentiment_collector",
name="Sentiment Collector",
replace_existing=True,
)
# Fundamental Collector — configurable frequency (default: daily)
fund_interval = _parse_frequency(settings.fundamental_fetch_frequency)
scheduler.add_job(
collect_fundamentals,
"interval",
**fund_interval,
id="fundamental_collector",
name="Fundamental Collector",
replace_existing=True,
)
# R:R Scanner — configurable frequency (default: hourly)
rr_interval = _parse_frequency(settings.rr_scan_frequency)
scheduler.add_job(
scan_rr,
"interval",
**rr_interval,
id="rr_scanner",
name="R:R Scanner",
replace_existing=True,
)
logger.info(
json.dumps({
"event": "scheduler_configured",
"jobs": {
"data_collector": ohlcv_interval,
"sentiment_collector": {"minutes": settings.sentiment_poll_interval_minutes},
"fundamental_collector": fund_interval,
"rr_scanner": rr_interval,
},
})
)