deepen OHLCV history + make the factor-IC pass honest about overlap/regime

Two changes so the cross-sectional signal results can actually be trusted. (a) History depth — the binding constraint. Ingestion defaulted to 365 days, so long-lookback factors (12-month momentum, 52-week high) were only computable on a handful of weeks at the tail, and every IC reflected a single market regime. - New `settings.ohlcv_history_days` (default 1825 ≈ 5y); new tickers backfill this far instead of 1 year. - New manual "data_backfill" job (Admin → Jobs) re-fetches the full window for every ticker, ignoring incremental resume — run once to deepen existing 1-year histories. Idempotent (upsert); resumes after rate limits. (b) Factor-IC honesty. The IC was averaged over weekly rebalances whose 30-day forward windows overlap, inflating the t-stat ~sqrt(6)x. - IC now measured on NON-OVERLAPPING windows (weeks thinned to ~HORIZON apart). - Each signal carries a `reliable` flag (>= 12 independent windows); BacktestPanel greys out and de-stars thin signals so a lucky 9-week IC of 0.3 can't masquerade as an edge. 332 backend tests pass; frontend build clean. No migration (config + job + an added JSON field on the cached backtest report). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 18:20:59 +02:00
parent 402025692a
commit 099846513b
9 changed files with 148 additions and 38 deletions
@@ -538,6 +538,7 @@ async def get_pipeline_readiness(db: AsyncSession) -> list[dict]:

 VALID_JOB_NAMES = {
    "data_collector",
+    "data_backfill",
    "sentiment_collector",
    "fundamental_collector",
    "rr_scanner",
@@ -552,6 +553,7 @@ VALID_JOB_NAMES = {

 JOB_LABELS = {
    "data_collector": "Data Collector (OHLCV)",
+    "data_backfill": "Data Backfill (deep history)",
    "sentiment_collector": "Sentiment Collector",
    "fundamental_collector": "Fundamental Collector",
    "rr_scanner": "R:R Scanner",
@@ -79,7 +79,8 @@ _CAL_BUCKETS = [(0, 20), (20, 40), (40, 60), (60, 80), (80, 100.01)]
 # ranking stocks by this signal sort tomorrow's winners from losers. This is the
 # test the per-setup hit-rate report can't do: it measures predictive power of a
 # signal, not the outcome of a target/stop structure built on top of one.
-MIN_CROSS_SECTION = 20   # min tickers present in a week to score that week
+MIN_CROSS_SECTION = 20    # min tickers present in a week to score that week
+MIN_RELIABLE_PERIODS = 12 # min non-overlapping windows before a signal's IC is trusted


 def _wrap_levels(level_dicts: list[dict]) -> list[Any]:
@@ -407,26 +408,53 @@ def _quintile_spread(pairs: list[tuple[float, float]]) -> float | None:
    return sum(p[1] for p in top) / k - sum(p[1] for p in bottom) / k


+def _week_ordinal(week_key: tuple[int, int]) -> int:
+    """Monotonic absolute week number from an (ISO year, ISO week) key."""
+    year, week = week_key
+    return year * 53 + week
+
+
+def _nonoverlapping_weeks(
+    week_keys: list[tuple[int, int]], stride: int
+) -> list[tuple[int, int]]:
+    """Thin to weeks at least ``stride`` apart so their forward windows don't
+    overlap — greedy earliest-first. Removes the autocorrelation that would
+    otherwise inflate the IC t-stat across adjacent weekly rebalances."""
+    kept: list[tuple[int, int]] = []
+    last: int | None = None
+    for wk in sorted(week_keys, key=_week_ordinal):
+        o = _week_ordinal(wk)
+        if last is None or o - last >= stride:
+            kept.append(wk)
+            last = o
+    return kept
+
+
 def _signal_evaluation(collected: dict) -> list[dict]:
    """Per-signal factor diagnostics, one row per candidate signal:

-      mean_ic               average weekly rank-IC (Spearman of signal vs fwd ret)
+      mean_ic               average rank-IC (Spearman of signal vs fwd ret)
      ic_t_stat             mean_ic / stderr — is the IC reliably non-zero?
-      ic_positive_pct       share of weeks the IC is positive (consistency)
+      ic_positive_pct       share of windows the IC is positive (consistency)
      mean_quintile_spread  avg top-minus-bottom-quintile forward return
+      reliable              True once there are >= MIN_RELIABLE_PERIODS windows

-    A signal with no edge lands near IC 0 and spread 0. Caveat: weekly rebalances
-    with a HORIZON-day forward window overlap, so the t-stat overstates
-    significance — read it as directional, alongside ic_positive_pct.
+    IC is measured on NON-OVERLAPPING forward windows (weeks thinned to ~HORIZON
+    apart) so the t-stat isn't inflated by autocorrelation. A signal with no edge
+    lands near IC 0 / spread 0; one with too few independent windows is flagged
+    unreliable rather than trusted on a lucky handful.
    """
+    stride = max(1, round(HORIZON / 5))  # ISO weeks spanned by the forward window
    rows: list[dict] = []
    for name in sorted(collected):
+        weeks_map = collected[name]
+        usable = [wk for wk, recs in weeks_map.items() if len(recs) >= MIN_CROSS_SECTION]
+        kept = _nonoverlapping_weeks(usable, stride)
        ics: list[float] = []
        spreads: list[float] = []
        sizes: list[int] = []
-        for recs in collected[name].values():
-            if len(recs) < MIN_CROSS_SECTION:
-                continue
+        for wk in kept:
+            recs = weeks_map[wk]
            ic = _spearman([r[0] for r in recs], [r[1] for r in recs])
            if ic is not None:
                ics.append(ic)
@@ -450,6 +478,7 @@ def _signal_evaluation(collected: dict) -> list[dict]:
            "ic_t_stat": round(t_stat, 2) if t_stat is not None else None,
            "ic_positive_pct": round(sum(1 for x in ics if x > 0) / len(ics) * 100, 1),
            "mean_quintile_spread": round(sum(spreads) / len(spreads), 4) if spreads else None,
+            "reliable": len(ics) >= MIN_RELIABLE_PERIODS,
        })
    rows.sort(key=lambda r: r["mean_ic"], reverse=True)
    return rows
@@ -518,12 +547,13 @@ async def run_backtest(
        "signal_eval": _signal_evaluation(collected),
        "signal_eval_note": (
            "Cross-sectional rank-IC of price-only signals vs the forward "
-            f"{HORIZON}-day return (weekly rebalance, min {MIN_CROSS_SECTION} "
-            "names/week). |IC| ≳ 0.03 with a consistent sign is a real (if small) "
-            "edge; near 0 means ranking on it sorts nothing. Momentum factors and "
-            "high_52w are expected positive; reversal_1m and vol_6m are expected "
-            "negative (mean-reversion / low-vol anomaly). Overlapping windows inflate "
-            "the t-stat — read directionally."
+            f"{HORIZON}-day return (min {MIN_CROSS_SECTION} names/window). |IC| ≳ "
+            "0.03 with a consistent sign is a real (if small) edge; near 0 means "
+            "ranking on it sorts nothing. Momentum factors and high_52w are expected "
+            "positive; reversal_1m and vol_6m expected negative (mean-reversion / "
+            "low-vol anomaly). IC is measured on non-overlapping windows; signals "
+            f"with fewer than {MIN_RELIABLE_PERIODS} independent windows are flagged "
+            "unreliable (too few regimes — deepen history with the Data Backfill job)."
        ),
        "note": (
            "Sentiment & fundamentals held neutral (no point-in-time history). "
@@ -12,6 +12,7 @@ from datetime import date, timedelta
 from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession

+from app.config import settings
 from app.exceptions import NotFoundError, ProviderError, RateLimitError
 from app.models.ohlcv import OHLCVRecord
 from app.models.settings import IngestionProgress
@@ -92,20 +93,23 @@ async def fetch_and_ingest(
    if end_date is None:
        end_date = date.today()

-    # Resolve start_date: use progress resume or default to 1 year ago.
-    # If we have too little history, force a one-year backfill even if
-    # ingestion progress exists (upsert makes this safe and idempotent).
+    # Resolve start_date: use progress resume or backfill the configured history
+    # window. If we have too little history, force a full backfill even if
+    # ingestion progress exists (upsert makes this safe and idempotent). A caller
+    # that passes an explicit start_date (e.g. the manual deep-backfill job)
+    # bypasses this entirely.
    if start_date is None:
        progress = await _get_progress(db, ticker.id)
        bar_count = await _get_ohlcv_bar_count(db, ticker.id)
        minimum_backfill_bars = 200
+        backfill_start = end_date - timedelta(days=settings.ohlcv_history_days)

        if bar_count < minimum_backfill_bars:
-            start_date = end_date - timedelta(days=365)
+            start_date = backfill_start
        elif progress is not None:
            start_date = progress.last_ingested_date + timedelta(days=1)
        else:
-            start_date = end_date - timedelta(days=365)
+            start_date = backfill_start

    # If start > end, nothing to fetch
    if start_date > end_date: