deepen OHLCV history + make the factor-IC pass honest about overlap/regime
Deploy / lint (push) Successful in 7s
Deploy / test (push) Successful in 39s
Deploy / deploy (push) Successful in 25s

Two changes so the cross-sectional signal results can actually be trusted.

(a) History depth — the binding constraint. Ingestion defaulted to 365 days, so
long-lookback factors (12-month momentum, 52-week high) were only computable on a
handful of weeks at the tail, and every IC reflected a single market regime.
- New `settings.ohlcv_history_days` (default 1825 ≈ 5y); new tickers backfill this
  far instead of 1 year.
- New manual "data_backfill" job (Admin → Jobs) re-fetches the full window for
  every ticker, ignoring incremental resume — run once to deepen existing
  1-year histories. Idempotent (upsert); resumes after rate limits.

(b) Factor-IC honesty. The IC was averaged over weekly rebalances whose 30-day
forward windows overlap, inflating the t-stat ~sqrt(6)x.
- IC now measured on NON-OVERLAPPING windows (weeks thinned to ~HORIZON apart).
- Each signal carries a `reliable` flag (>= 12 independent windows); BacktestPanel
  greys out and de-stars thin signals so a lucky 9-week IC of 0.3 can't masquerade
  as an edge.

332 backend tests pass; frontend build clean. No migration (config + job + an
added JSON field on the cached backtest report).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-23 18:20:59 +02:00
parent 402025692a
commit 099846513b
9 changed files with 148 additions and 38 deletions
+2
View File
@@ -538,6 +538,7 @@ async def get_pipeline_readiness(db: AsyncSession) -> list[dict]:
VALID_JOB_NAMES = {
"data_collector",
"data_backfill",
"sentiment_collector",
"fundamental_collector",
"rr_scanner",
@@ -552,6 +553,7 @@ VALID_JOB_NAMES = {
JOB_LABELS = {
"data_collector": "Data Collector (OHLCV)",
"data_backfill": "Data Backfill (deep history)",
"sentiment_collector": "Sentiment Collector",
"fundamental_collector": "Fundamental Collector",
"rr_scanner": "R:R Scanner",
+45 -15
View File
@@ -79,7 +79,8 @@ _CAL_BUCKETS = [(0, 20), (20, 40), (40, 60), (60, 80), (80, 100.01)]
# ranking stocks by this signal sort tomorrow's winners from losers. This is the
# test the per-setup hit-rate report can't do: it measures predictive power of a
# signal, not the outcome of a target/stop structure built on top of one.
MIN_CROSS_SECTION = 20 # min tickers present in a week to score that week
MIN_CROSS_SECTION = 20 # min tickers present in a week to score that week
MIN_RELIABLE_PERIODS = 12 # min non-overlapping windows before a signal's IC is trusted
def _wrap_levels(level_dicts: list[dict]) -> list[Any]:
@@ -407,26 +408,53 @@ def _quintile_spread(pairs: list[tuple[float, float]]) -> float | None:
return sum(p[1] for p in top) / k - sum(p[1] for p in bottom) / k
def _week_ordinal(week_key: tuple[int, int]) -> int:
"""Monotonic absolute week number from an (ISO year, ISO week) key."""
year, week = week_key
return year * 53 + week
def _nonoverlapping_weeks(
week_keys: list[tuple[int, int]], stride: int
) -> list[tuple[int, int]]:
"""Thin to weeks at least ``stride`` apart so their forward windows don't
overlap — greedy earliest-first. Removes the autocorrelation that would
otherwise inflate the IC t-stat across adjacent weekly rebalances."""
kept: list[tuple[int, int]] = []
last: int | None = None
for wk in sorted(week_keys, key=_week_ordinal):
o = _week_ordinal(wk)
if last is None or o - last >= stride:
kept.append(wk)
last = o
return kept
def _signal_evaluation(collected: dict) -> list[dict]:
"""Per-signal factor diagnostics, one row per candidate signal:
mean_ic average weekly rank-IC (Spearman of signal vs fwd ret)
mean_ic average rank-IC (Spearman of signal vs fwd ret)
ic_t_stat mean_ic / stderr — is the IC reliably non-zero?
ic_positive_pct share of weeks the IC is positive (consistency)
ic_positive_pct share of windows the IC is positive (consistency)
mean_quintile_spread avg top-minus-bottom-quintile forward return
reliable True once there are >= MIN_RELIABLE_PERIODS windows
A signal with no edge lands near IC 0 and spread 0. Caveat: weekly rebalances
with a HORIZON-day forward window overlap, so the t-stat overstates
significance — read it as directional, alongside ic_positive_pct.
IC is measured on NON-OVERLAPPING forward windows (weeks thinned to ~HORIZON
apart) so the t-stat isn't inflated by autocorrelation. A signal with no edge
lands near IC 0 / spread 0; one with too few independent windows is flagged
unreliable rather than trusted on a lucky handful.
"""
stride = max(1, round(HORIZON / 5)) # ISO weeks spanned by the forward window
rows: list[dict] = []
for name in sorted(collected):
weeks_map = collected[name]
usable = [wk for wk, recs in weeks_map.items() if len(recs) >= MIN_CROSS_SECTION]
kept = _nonoverlapping_weeks(usable, stride)
ics: list[float] = []
spreads: list[float] = []
sizes: list[int] = []
for recs in collected[name].values():
if len(recs) < MIN_CROSS_SECTION:
continue
for wk in kept:
recs = weeks_map[wk]
ic = _spearman([r[0] for r in recs], [r[1] for r in recs])
if ic is not None:
ics.append(ic)
@@ -450,6 +478,7 @@ def _signal_evaluation(collected: dict) -> list[dict]:
"ic_t_stat": round(t_stat, 2) if t_stat is not None else None,
"ic_positive_pct": round(sum(1 for x in ics if x > 0) / len(ics) * 100, 1),
"mean_quintile_spread": round(sum(spreads) / len(spreads), 4) if spreads else None,
"reliable": len(ics) >= MIN_RELIABLE_PERIODS,
})
rows.sort(key=lambda r: r["mean_ic"], reverse=True)
return rows
@@ -518,12 +547,13 @@ async def run_backtest(
"signal_eval": _signal_evaluation(collected),
"signal_eval_note": (
"Cross-sectional rank-IC of price-only signals vs the forward "
f"{HORIZON}-day return (weekly rebalance, min {MIN_CROSS_SECTION} "
"names/week). |IC| ≳ 0.03 with a consistent sign is a real (if small) "
"edge; near 0 means ranking on it sorts nothing. Momentum factors and "
"high_52w are expected positive; reversal_1m and vol_6m are expected "
"negative (mean-reversion / low-vol anomaly). Overlapping windows inflate "
"the t-stat — read directionally."
f"{HORIZON}-day return (min {MIN_CROSS_SECTION} names/window). |IC| ≳ "
"0.03 with a consistent sign is a real (if small) edge; near 0 means "
"ranking on it sorts nothing. Momentum factors and high_52w are expected "
"positive; reversal_1m and vol_6m expected negative (mean-reversion / "
"low-vol anomaly). IC is measured on non-overlapping windows; signals "
f"with fewer than {MIN_RELIABLE_PERIODS} independent windows are flagged "
"unreliable (too few regimes — deepen history with the Data Backfill job)."
),
"note": (
"Sentiment & fundamentals held neutral (no point-in-time history). "
+9 -5
View File
@@ -12,6 +12,7 @@ from datetime import date, timedelta
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.exceptions import NotFoundError, ProviderError, RateLimitError
from app.models.ohlcv import OHLCVRecord
from app.models.settings import IngestionProgress
@@ -92,20 +93,23 @@ async def fetch_and_ingest(
if end_date is None:
end_date = date.today()
# Resolve start_date: use progress resume or default to 1 year ago.
# If we have too little history, force a one-year backfill even if
# ingestion progress exists (upsert makes this safe and idempotent).
# Resolve start_date: use progress resume or backfill the configured history
# window. If we have too little history, force a full backfill even if
# ingestion progress exists (upsert makes this safe and idempotent). A caller
# that passes an explicit start_date (e.g. the manual deep-backfill job)
# bypasses this entirely.
if start_date is None:
progress = await _get_progress(db, ticker.id)
bar_count = await _get_ohlcv_bar_count(db, ticker.id)
minimum_backfill_bars = 200
backfill_start = end_date - timedelta(days=settings.ohlcv_history_days)
if bar_count < minimum_backfill_bars:
start_date = end_date - timedelta(days=365)
start_date = backfill_start
elif progress is not None:
start_date = progress.last_ingested_date + timedelta(days=1)
else:
start_date = end_date - timedelta(days=365)
start_date = backfill_start
# If start > end, nothing to fetch
if start_date > end_date: