add cross-sectional signal evaluation (factor rank-IC) to the backtest
The per-setup hit-rate report can't tell whether a signal predicts returns — only how a target/stop structure built on one performs. This adds a cross-sectional factor-IC pass: each week the universe is ranked by a price-only signal and graded by its rank correlation (Spearman IC) and top-minus-bottom- quintile spread against the forward 30-day return. Candidate signals (point-in-time from price; sentiment/fundamentals have no history in the replay): 12-1/6-1/3-1 month momentum, 1-month reversal, price-vs-200d SMA, proximity to the 52-week high (George/Hwang), and 126-day realized volatility (low-vol anomaly). Reuses the existing per-ticker replay loop (no new data, no second DB pass); results land in the cached backtest_report as `signal_eval` and render as a "Signal edge" table in BacktestPanel beside the calibration curve. 330 backend tests pass (10 new in test_signal_eval); frontend build clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,8 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime, timezone
|
||||
from types import SimpleNamespace
|
||||
@@ -70,6 +72,15 @@ ATR_MULTIPLIER = 1.5
|
||||
|
||||
_CAL_BUCKETS = [(0, 20), (20, 40), (40, 60), (60, 80), (80, 100.01)]
|
||||
|
||||
# Cross-sectional signal evaluation (factor IC). Each candidate signal is a
|
||||
# point-in-time number computed from closes alone (sentiment/fundamentals have no
|
||||
# history here), sampled one as-of per ISO week, and graded by how its rank
|
||||
# correlates with the forward HORIZON-day return ACROSS the universe — i.e. does
|
||||
# ranking stocks by this signal sort tomorrow's winners from losers. This is the
|
||||
# test the per-setup hit-rate report can't do: it measures predictive power of a
|
||||
# signal, not the outcome of a target/stop structure built on top of one.
|
||||
MIN_CROSS_SECTION = 20 # min tickers present in a week to score that week
|
||||
|
||||
|
||||
def _wrap_levels(level_dicts: list[dict]) -> list[Any]:
|
||||
return [
|
||||
@@ -270,6 +281,180 @@ def _calibration(cands: list[dict]) -> list[dict]:
|
||||
return rows
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cross-sectional signal evaluation (factor information-coefficient)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _weekly_asof_indices(records: list) -> list[int]:
|
||||
"""Index of the last bar in each ISO week — the weekly rebalance as-of bars.
|
||||
|
||||
Keying on the calendar week (not the raw bar index) makes every ticker's
|
||||
as-of dates line up, so the cross-section on a given week is comparable.
|
||||
"""
|
||||
last_by_week: dict[tuple[int, int], int] = {}
|
||||
for idx, r in enumerate(records):
|
||||
iso = r.date.isocalendar()
|
||||
last_by_week[(iso[0], iso[1])] = idx
|
||||
return sorted(last_by_week.values())
|
||||
|
||||
|
||||
def _signal_values(closes: list[float], highs: list[float], i: int) -> dict[str, float]:
|
||||
"""Point-in-time candidate signals at as-of index ``i`` (price-only).
|
||||
|
||||
Momentum factors follow the standard "skip the last month" convention
|
||||
(return up to ~1 month ago) to avoid the short-term reversal effect, which
|
||||
``reversal_1m`` isolates on purpose — we expect its IC to be negative if the
|
||||
universe mean-reverts. ``trend_200`` is price vs its 200-bar SMA. ``high_52w``
|
||||
is closeness to the trailing 52-week high (George/Hwang anchoring effect:
|
||||
higher = nearer the high, expect positive IC). ``vol_6m`` is 126-day realized
|
||||
volatility (expect negative IC if the low-volatility anomaly holds).
|
||||
"""
|
||||
out: dict[str, float] = {}
|
||||
if i - 252 >= 0 and closes[i - 252] > 0:
|
||||
out["mom_12_1"] = closes[i - 21] / closes[i - 252] - 1.0
|
||||
if i - 126 >= 0 and closes[i - 126] > 0:
|
||||
out["mom_6_1"] = closes[i - 21] / closes[i - 126] - 1.0
|
||||
if i - 63 >= 0 and closes[i - 63] > 0:
|
||||
out["mom_3_1"] = closes[i - 21] / closes[i - 63] - 1.0
|
||||
if i - 21 >= 0 and closes[i - 21] > 0:
|
||||
out["reversal_1m"] = closes[i] / closes[i - 21] - 1.0
|
||||
if i - 199 >= 0:
|
||||
sma = sum(closes[i - 199 : i + 1]) / 200.0
|
||||
if sma > 0:
|
||||
out["trend_200"] = closes[i] / sma - 1.0
|
||||
if i - 251 >= 0:
|
||||
high_52w = max(highs[i - 251 : i + 1])
|
||||
if high_52w > 0:
|
||||
out["high_52w"] = closes[i] / high_52w
|
||||
if i - 126 >= 0:
|
||||
rets = [
|
||||
closes[k] / closes[k - 1] - 1.0
|
||||
for k in range(i - 125, i + 1)
|
||||
if closes[k - 1] > 0
|
||||
]
|
||||
if len(rets) >= 2:
|
||||
mean = sum(rets) / len(rets)
|
||||
var = sum((x - mean) ** 2 for x in rets) / (len(rets) - 1)
|
||||
out["vol_6m"] = math.sqrt(var)
|
||||
return out
|
||||
|
||||
|
||||
def _accumulate_signal_series(records: list, collected: dict) -> None:
|
||||
"""For each weekly as-of bar, emit (signal, forward-return) pairs keyed by ISO
|
||||
week into ``collected[name][week_key]``. Forward return is close-to-close over
|
||||
HORIZON trading days. Mutates ``collected`` (a dict of dict of list)."""
|
||||
n = len(records)
|
||||
if n < HORIZON + 21:
|
||||
return
|
||||
closes = [float(r.close) for r in records]
|
||||
highs = [float(r.high) for r in records]
|
||||
for i in _weekly_asof_indices(records):
|
||||
j = i + HORIZON
|
||||
if j >= n or closes[i] <= 0:
|
||||
continue
|
||||
fwd = closes[j] / closes[i] - 1.0
|
||||
iso = records[i].date.isocalendar()
|
||||
week_key = (iso[0], iso[1])
|
||||
for name, val in _signal_values(closes, highs, i).items():
|
||||
collected[name][week_key].append((val, fwd))
|
||||
|
||||
|
||||
def _rank(xs: list[float]) -> list[float]:
|
||||
"""Average (tie-corrected) ranks, 1-based."""
|
||||
order = sorted(range(len(xs)), key=lambda k: xs[k])
|
||||
ranks = [0.0] * len(xs)
|
||||
i = 0
|
||||
while i < len(xs):
|
||||
j = i
|
||||
while j + 1 < len(xs) and xs[order[j + 1]] == xs[order[i]]:
|
||||
j += 1
|
||||
avg_rank = (i + j) / 2.0 + 1.0
|
||||
for k in range(i, j + 1):
|
||||
ranks[order[k]] = avg_rank
|
||||
i = j + 1
|
||||
return ranks
|
||||
|
||||
|
||||
def _pearson(a: list[float], b: list[float]) -> float | None:
|
||||
n = len(a)
|
||||
if n < 3:
|
||||
return None
|
||||
ma, mb = sum(a) / n, sum(b) / n
|
||||
va = sum((x - ma) ** 2 for x in a)
|
||||
vb = sum((y - mb) ** 2 for y in b)
|
||||
if va <= 0 or vb <= 0:
|
||||
return None
|
||||
cov = sum((a[k] - ma) * (b[k] - mb) for k in range(n))
|
||||
return cov / math.sqrt(va * vb)
|
||||
|
||||
|
||||
def _spearman(xs: list[float], ys: list[float]) -> float | None:
|
||||
"""Rank correlation = Pearson on the ranks. None if too few/degenerate."""
|
||||
if len(xs) < 3:
|
||||
return None
|
||||
return _pearson(_rank(xs), _rank(ys))
|
||||
|
||||
|
||||
def _quintile_spread(pairs: list[tuple[float, float]]) -> float | None:
|
||||
"""Mean forward return of the top signal-quintile minus the bottom quintile."""
|
||||
n = len(pairs)
|
||||
if n < 10:
|
||||
return None
|
||||
ordered = sorted(pairs, key=lambda p: p[0])
|
||||
k = n // 5
|
||||
top = ordered[-k:]
|
||||
bottom = ordered[:k]
|
||||
return sum(p[1] for p in top) / k - sum(p[1] for p in bottom) / k
|
||||
|
||||
|
||||
def _signal_evaluation(collected: dict) -> list[dict]:
|
||||
"""Per-signal factor diagnostics, one row per candidate signal:
|
||||
|
||||
mean_ic average weekly rank-IC (Spearman of signal vs fwd ret)
|
||||
ic_t_stat mean_ic / stderr — is the IC reliably non-zero?
|
||||
ic_positive_pct share of weeks the IC is positive (consistency)
|
||||
mean_quintile_spread avg top-minus-bottom-quintile forward return
|
||||
|
||||
A signal with no edge lands near IC 0 and spread 0. Caveat: weekly rebalances
|
||||
with a HORIZON-day forward window overlap, so the t-stat overstates
|
||||
significance — read it as directional, alongside ic_positive_pct.
|
||||
"""
|
||||
rows: list[dict] = []
|
||||
for name in sorted(collected):
|
||||
ics: list[float] = []
|
||||
spreads: list[float] = []
|
||||
sizes: list[int] = []
|
||||
for recs in collected[name].values():
|
||||
if len(recs) < MIN_CROSS_SECTION:
|
||||
continue
|
||||
ic = _spearman([r[0] for r in recs], [r[1] for r in recs])
|
||||
if ic is not None:
|
||||
ics.append(ic)
|
||||
spread = _quintile_spread(recs)
|
||||
if spread is not None:
|
||||
spreads.append(spread)
|
||||
sizes.append(len(recs))
|
||||
if not ics:
|
||||
continue
|
||||
mean_ic = sum(ics) / len(ics)
|
||||
if len(ics) > 1:
|
||||
std = math.sqrt(sum((x - mean_ic) ** 2 for x in ics) / (len(ics) - 1))
|
||||
else:
|
||||
std = 0.0
|
||||
t_stat = mean_ic / std * math.sqrt(len(ics)) if std > 0 else None
|
||||
rows.append({
|
||||
"signal": name,
|
||||
"weeks": len(ics),
|
||||
"avg_cross_section": round(sum(sizes) / len(sizes), 1) if sizes else None,
|
||||
"mean_ic": round(mean_ic, 4),
|
||||
"ic_t_stat": round(t_stat, 2) if t_stat is not None else None,
|
||||
"ic_positive_pct": round(sum(1 for x in ics if x > 0) / len(ics) * 100, 1),
|
||||
"mean_quintile_spread": round(sum(spreads) / len(spreads), 4) if spreads else None,
|
||||
})
|
||||
rows.sort(key=lambda r: r["mean_ic"], reverse=True)
|
||||
return rows
|
||||
|
||||
|
||||
async def run_backtest(
|
||||
db: AsyncSession,
|
||||
progress_cb: Callable[[int, int, str], None] | None = None,
|
||||
@@ -283,12 +468,15 @@ async def run_backtest(
|
||||
total = len(tickers)
|
||||
|
||||
candidates: list[dict] = []
|
||||
# collected[signal_name][iso_week] -> list of (signal_value, forward_return)
|
||||
collected: dict = defaultdict(lambda: defaultdict(list))
|
||||
for index, ticker in enumerate(tickers):
|
||||
if progress_cb is not None:
|
||||
progress_cb(index, total, ticker.symbol)
|
||||
try:
|
||||
records = await query_ohlcv(db, ticker.symbol)
|
||||
candidates.extend(_replay_ticker(ticker.symbol, records, config, activation))
|
||||
_accumulate_signal_series(records, collected)
|
||||
except Exception:
|
||||
logger.exception("Backtest replay failed for %s", ticker.symbol)
|
||||
|
||||
@@ -327,6 +515,16 @@ async def run_backtest(
|
||||
"min_expected_value": current_min_ev,
|
||||
"sweep": sweep,
|
||||
"calibration": _calibration(candidates),
|
||||
"signal_eval": _signal_evaluation(collected),
|
||||
"signal_eval_note": (
|
||||
"Cross-sectional rank-IC of price-only signals vs the forward "
|
||||
f"{HORIZON}-day return (weekly rebalance, min {MIN_CROSS_SECTION} "
|
||||
"names/week). |IC| ≳ 0.03 with a consistent sign is a real (if small) "
|
||||
"edge; near 0 means ranking on it sorts nothing. Momentum factors and "
|
||||
"high_52w are expected positive; reversal_1m and vol_6m are expected "
|
||||
"negative (mean-reversion / low-vol anomaly). Overlapping windows inflate "
|
||||
"the t-stat — read directionally."
|
||||
),
|
||||
"note": (
|
||||
"Sentiment & fundamentals held neutral (no point-in-time history). "
|
||||
"~6 months ≈ one market regime — treat as directional, not gospel."
|
||||
|
||||
Reference in New Issue
Block a user