signal-platform/app/services/breadth_service.py

"""Market-breadth early-warning indicator (from the stored universe OHLCV).

Breadth is a genuinely *leading* construct: a few mega-caps can keep an index
rising while participation narrows underneath — the classic pre-top divergence.
We measure it from the OHLCV we already store for the whole universe, so it costs
no new data source.

Two layers:
  - breadth = % of the universe trading above its own 200-DMA (0-100).
  - divergence = an early-warning score (0-100, high = fragile): the benchmark
    price rising *while* breadth falls, plus a nudge for already-low breadth.

This module only *computes* the indicator. It is deliberately NOT wired into the
live regime index yet — the event study measures whether it actually leads before
it earns any weight.
"""

from __future__ import annotations

import logging
from datetime import date

from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from app.models.ticker import Ticker
from app.services.price_service import query_ohlcv

logger = logging.getLogger(__name__)

Series = list[tuple[date, float]]


def _breadth_from_closes(
    closes_by_symbol: dict[str, Series], window: int = 200, min_tickers: int = 20
) -> dict[date, float]:
    """Pure core: % of symbols above their own rolling SMA(window), per date.

    Each symbol's SMA is computed once with a sliding sum (O(bars)); dates with
    fewer than ``min_tickers`` qualifying names are dropped (too thin to trust).
    """
    counts: dict[date, list[int]] = {}  # date -> [above, total]
    for series in closes_by_symbol.values():
        ordered = sorted(series, key=lambda x: x[0])
        dates = [d for d, _ in ordered]
        closes = [c for _, c in ordered]
        if len(closes) < window:
            continue
        running = sum(closes[:window])
        for i in range(window - 1, len(closes)):
            if i >= window:
                running += closes[i] - closes[i - window]
            sma = running / window
            entry = counts.setdefault(dates[i], [0, 0])
            entry[1] += 1
            if closes[i] > sma:
                entry[0] += 1
    return {
        d: round(above / total * 100.0, 2)
        for d, (above, total) in counts.items()
        if total >= min_tickers
    }


def compute_divergence_series(
    breadth: dict[date, float], benchmark_closes: Series, lookback: int = 20
) -> dict[date, float]:
    """Early-warning score (0-100, high = fragile) per date.

    Fragility rises when the benchmark price climbs over ``lookback`` days while
    breadth deteriorates over the same window, and is nudged up when the absolute
    breadth level is already low. It is the *divergence* (not the level) that
    makes this leading.
    """
    bench = {d: c for d, c in benchmark_closes}
    common = sorted(d for d in bench if d in breadth)
    out: dict[date, float] = {}
    for i in range(lookback, len(common)):
        d, d0 = common[i], common[i - lookback]
        price_past = bench[d0]
        if price_past <= 0:
            continue
        price_ret = (bench[d] / price_past - 1.0) * 100.0   # %
        breadth_chg = breadth[d] - breadth[d0]              # percentage points
        raw = price_ret - breadth_chg                        # price up & breadth down -> large
        score = 50.0 + raw * 2.0 + (50.0 - breadth[d]) * 0.4
        out[d] = max(0.0, min(100.0, round(score, 2)))
    return out


async def _load_universe_closes(db: AsyncSession) -> dict[str, Series]:
    result = await db.execute(select(Ticker).order_by(Ticker.symbol))
    closes_by_symbol: dict[str, Series] = {}
    for ticker in result.scalars().all():
        try:
            records = await query_ohlcv(db, ticker.symbol)
        except Exception:
            logger.exception("Breadth: OHLCV load failed for %s", ticker.symbol)
            continue
        if records:
            closes_by_symbol[ticker.symbol] = [(r.date, float(r.close)) for r in records]
    return closes_by_symbol


async def compute_breadth_series(
    db: AsyncSession, window: int = 200, min_tickers: int = 20
) -> dict[date, float]:
    """Historical breadth series across the stored universe (for the event study)."""
    closes_by_symbol = await _load_universe_closes(db)
    return _breadth_from_closes(closes_by_symbol, window, min_tickers)


async def compute_breadth_today(db: AsyncSession) -> float | None:
    """Latest breadth reading (thin wrapper, for future live use)."""
    series = await compute_breadth_series(db)
    if not series:
        return None
    return series[max(series)]