feat: portfolio simulation + per-trade stats (gaps, hold time, best/worst)
Deploy / lint (push) Successful in 6s
Deploy / test (push) Successful in 55s
Deploy / deploy (push) Successful in 38s

Per-trade additions to the report:
- Gap-through-stop fills: stops now fill at the worse of the stop or the
  bar's open across every exit model (target, TP, trailing, time), so a
  loss can exceed -1R; targets never fill better than their level.
- best_r / worst_r, avg holding days, and net R per day of capital
  deployed on the summary buckets and the time-exit sweep.

Portfolio simulation (the stats a per-setup replay cannot give):
- One capital-constrained book over the qualified setups: 10k start, max
  10 concurrent positions (one per ticker, best momentum first), 1%
  fixed-fractional risk with a 20% no-leverage notional cap, entries at
  the detection close, 0.1%/side costs, daily mark-to-market.
- Two exit policies compared: S/R target race vs hold-to-horizon.
- Equity-curve stats: final equity, total return, CAGR, max drawdown,
  annualized daily Sharpe, win rate, avg P&L, best/worst trade, avg
  hold, entries skipped on a full book, and SPY price return over the
  same window (benchmark history refreshed to cover the replay span).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-02 11:56:29 +02:00
parent 942a22ce65
commit 0f43e755f4
4 changed files with 634 additions and 33 deletions
+356 -22
View File
@@ -216,12 +216,27 @@ def _window_setups(
return out
def _stop_fill_r(direction: str, entry: float, stop: float, bar) -> float:
"""Realized R when the stop is hit on ``bar``: filled at the stop, or at the
bar's open when price gapped through it — so a gap can lose more than 1R,
matching real fills. Targets are never filled better than their level, so
gap modeling only ever makes results more conservative."""
risk = abs(entry - stop)
if risk <= 0 or entry <= 0:
return -1.0
if direction == "long":
fill = min(stop, bar.open)
return (fill - entry) / risk
fill = max(stop, bar.open)
return (entry - fill) / risk
def _tp_primitives(
direction: str, entry: float, stop: float, forward: list, horizon: int
) -> tuple[float, bool, float, float]:
) -> tuple[float, bool, float, float, int | None, float]:
"""Primitives for the take-profit exit model, from the bars after detection.
Returns ``(risk_pct, stopped, mfe_pct, close_pct)``:
Returns ``(risk_pct, stopped, mfe_pct, close_pct, stop_day, stop_r)``:
- ``risk_pct`` fraction from entry to stop (the 1R distance)
- ``stopped`` whether the stop was hit within the horizon
- ``mfe_pct`` best favourable excursion (fraction) reachable *before* the
@@ -229,27 +244,34 @@ def _tp_primitives(
counts as a loss (matching the conservative target model);
over the whole horizon if the stop is never hit
- ``close_pct`` directional return at the horizon-end close (the timeout exit)
- ``stop_day`` 1-based trading day the stop was pierced, None if never
- ``stop_r`` realized R at the stop fill (≤ 1 when the bar gapped
through the stop — see _stop_fill_r); 1.0 when unused
From these any fixed take-profit level can be scored without re-walking bars:
tp reached before stop (``mfe_pct >= tp``) → +tp; else stop → 1R; else the
horizon-close move.
tp reached before stop (``mfe_pct >= tp``) → +tp; else stop → ``stop_r``;
else the horizon-close move.
"""
long = direction == "long"
risk_pct = abs(entry - stop) / entry if entry else 0.0
bars = forward[:horizon]
if not bars:
return risk_pct, False, 0.0, 0.0
return risk_pct, False, 0.0, 0.0, None, -1.0
mfe = 0.0
stopped = False
for r in bars:
stop_day: int | None = None
stop_r = -1.0
for i, r in enumerate(bars):
if (r.low <= stop) if long else (r.high >= stop):
stopped = True
stop_day = i + 1
stop_r = _stop_fill_r(direction, entry, stop, r)
break
fav = (r.high - entry) / entry if long else (entry - r.low) / entry
if fav > mfe:
mfe = fav
close_pct = ((bars[-1].close - entry) / entry) * (1.0 if long else -1.0)
return risk_pct, stopped, mfe, close_pct
return risk_pct, stopped, mfe, close_pct, stop_day, stop_r
def _trailing_exits(
@@ -281,12 +303,14 @@ def _trailing_exits(
if long:
stop_level = max(init_stop, peak * (1 - f))
if r.low <= stop_level:
result[round(f * 100)] = ((stop_level - entry) / entry) / risk
fill = min(stop_level, r.open) # gap through fills at the open
result[round(f * 100)] = ((fill - entry) / entry) / risk
continue
else:
stop_level = min(init_stop, peak * (1 + f))
if r.high >= stop_level:
result[round(f * 100)] = ((entry - stop_level) / entry) / risk
fill = max(stop_level, r.open)
result[round(f * 100)] = ((entry - fill) / entry) / risk
continue
remaining.append(f)
active = remaining
@@ -325,10 +349,12 @@ def _time_exits(
return {int(n): 0.0 for n in horizons}
stop_day: int | None = None # 1-based trading day the stop was pierced
stop_r = -1.0
closes: list[float] = []
for i, r in enumerate(bars):
if (r.low <= stop) if long else (r.high >= stop):
stop_day = i + 1
stop_r = _stop_fill_r(direction, entry, stop, r)
break
closes.append(r.close)
@@ -336,7 +362,7 @@ def _time_exits(
for h in horizons:
n = int(h)
if stop_day is not None and stop_day <= n:
result[n] = -1.0
result[n] = stop_r
else:
# closes can't be empty here: an empty closes means the stop hit on
# day 1, which the branch above catches for every n >= 1.
@@ -359,21 +385,29 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
forward_bars = [Bar(date=r.date, high=r.high, low=r.low) for r in forward]
for s in _window_setups(window, config, activation):
outcome, _ = evaluate_setup_against_bars(
outcome, outcome_date = evaluate_setup_against_bars(
s["direction"], s["stop"], s["target"], forward_bars, HORIZON
)
if outcome is None:
continue
# Trading days from detection to resolution (expired = full horizon).
hold_days = next(
(idx + 1 for idx, r in enumerate(forward[:HORIZON]) if r.date == outcome_date),
min(HORIZON, len(forward)),
)
target_hit = outcome == OUTCOME_TARGET_HIT
if outcome == OUTCOME_TARGET_HIT:
realized_r = s["rr"]
elif outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS):
realized_r = -1.0
# Fill at the stop, or at the open when the bar gapped through it.
realized_r = _stop_fill_r(
s["direction"], s["entry"], s["stop"], forward[hold_days - 1]
)
else: # expired
realized_r = 0.0
# Take-profit exit primitives (parallel to the target-vs-stop outcome
# above; aggregated separately into the take-profit sweep).
risk_pct, tp_stopped, mfe_pct, tp_close_pct = _tp_primitives(
risk_pct, tp_stopped, mfe_pct, tp_close_pct, stop_day, tp_stop_r = _tp_primitives(
s["direction"], s["entry"], s["stop"], forward, HORIZON
)
trail_r = _trailing_exits(
@@ -388,6 +422,9 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
"date": records[i].date.isoformat(),
"iso_week": (iso[0], iso[1]),
"direction": s["direction"],
"entry": s["entry"],
"stop": s["stop"],
"target": s["target"],
"rr": s["rr"],
"confidence": s["confidence"],
"primary_prob": s["primary_prob"],
@@ -401,8 +438,11 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
"outcome": outcome,
"target_hit": target_hit,
"realized_r": realized_r,
"hold_days": hold_days,
"stop_day": stop_day,
"risk_pct": risk_pct,
"tp_stopped": tp_stopped,
"tp_stop_r": tp_stop_r,
"mfe_pct": mfe_pct,
"tp_close_pct": tp_close_pct,
"trail_r": trail_r,
@@ -418,6 +458,9 @@ def _bucket_stats(cands: list[dict]) -> dict:
decided = wins + losses
rs = [c["realized_r"] for c in cands]
net_rs = [c["realized_r"] - _cost_r(c) for c in cands]
holds = [c["hold_days"] for c in cands if c.get("hold_days")]
avg_hold = sum(holds) / len(holds) if holds else None
net_avg = sum(net_rs) / len(net_rs) if net_rs else None
return {
"total": len(cands),
"wins": wins,
@@ -426,8 +469,15 @@ def _bucket_stats(cands: list[dict]) -> dict:
"hit_rate": round(wins / decided * 100, 1) if decided else None,
"avg_r": round(sum(rs) / len(rs), 3) if rs else None,
"total_r": round(sum(rs), 2) if rs else None,
"net_avg_r": round(sum(net_rs) / len(net_rs), 3) if net_rs else None,
"net_avg_r": round(net_avg, 3) if net_avg is not None else None,
"net_total_r": round(sum(net_rs), 2) if net_rs else None,
"best_r": round(max(rs), 2) if rs else None,
"worst_r": round(min(rs), 2) if rs else None,
"avg_hold_days": round(avg_hold, 1) if avg_hold is not None else None,
# Capital efficiency: net expectancy per trading day the capital is tied up.
"net_r_per_day": (
round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None
),
}
@@ -473,7 +523,7 @@ def _take_profit_bucket(cands: list[dict], tp: float) -> dict:
r = tp / risk
wins += 1
elif c.get("tp_stopped"):
r = -1.0
r = c.get("tp_stop_r", -1.0) # gap-aware stop fill, ≤ 1R
else:
r = (c.get("tp_close_pct", 0.0)) / risk
rs.append(r)
@@ -519,16 +569,24 @@ def _trailing_bucket(cands: list[dict], trail_pct: int) -> dict:
def _time_exit_bucket(cands: list[dict], hold_days: int) -> dict:
"""Stats for the hold-``hold_days`` exit: initial stop active, otherwise out
at the day-N close. Each candidate carries its realized R per hold length in
``time_r``; a "win" is an exit in profit (R > 0)."""
pairs = [
(c["time_r"][hold_days], _cost_r(c))
``time_r``; a "win" is an exit in profit (R > 0). The realized hold is the
full N days unless the stop cut it short (``stop_day``)."""
rows = [
(
c["time_r"][hold_days],
_cost_r(c),
min(hold_days, c.get("stop_day") or hold_days),
)
for c in cands
if c.get("time_r", {}).get(hold_days) is not None
]
total = len(pairs)
rs = [r for r, _ in pairs]
net_rs = [r - cost for r, cost in pairs]
total = len(rows)
rs = [r for r, _, _ in rows]
net_rs = [r - cost for r, cost, _ in rows]
holds = [h for _, _, h in rows]
wins = sum(1 for r in rs if r > 0)
avg_hold = sum(holds) / total if total else None
net_avg = sum(net_rs) / total if total else None
return {
"hold_days": hold_days,
"total": total,
@@ -536,8 +594,14 @@ def _time_exit_bucket(cands: list[dict], hold_days: int) -> dict:
"win_rate": round(wins / total * 100, 1) if total else None,
"avg_r": round(sum(rs) / total, 3) if total else None,
"total_r": round(sum(rs), 2) if total else None,
"net_avg_r": round(sum(net_rs) / total, 3) if total else None,
"net_avg_r": round(net_avg, 3) if net_avg is not None else None,
"net_total_r": round(sum(net_rs), 2) if total else None,
"best_r": round(max(rs), 2) if rs else None,
"worst_r": round(min(rs), 2) if rs else None,
"avg_hold_days": round(avg_hold, 1) if avg_hold is not None else None,
"net_r_per_day": (
round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None
),
}
@@ -934,6 +998,214 @@ def _gate_ablation(candidates: list[dict], activation: dict, threshold: float) -
return rows
# ---------------------------------------------------------------------------
# Portfolio simulation
# ---------------------------------------------------------------------------
# Book parameters: fixed starting capital, a capped number of concurrent
# positions (one per ticker), fixed-fractional risk sizing with a no-leverage
# notional cap, and the same per-side cost as the per-trade tables. Entries are
# the QUALIFIED setups at their detection close, best momentum first while
# slots and cash allow.
SIM_STARTING_CAPITAL = 10_000.0
SIM_MAX_POSITIONS = 10
SIM_RISK_PER_TRADE = 0.01 # fraction of equity risked per position (entry→stop)
SIM_NOTIONAL_CAP = 0.20 # max fraction of equity per position (no margin)
def _simulate_portfolio(
candidates: list[dict],
prices: dict[str, tuple],
spy_closes: dict | None,
exit_policy: str,
hold_days: int,
) -> dict | None:
"""Replay the qualified setups as ONE capital-constrained book and report
portfolio economics from the daily equity curve (return, CAGR, drawdown,
Sharpe) — the numbers the per-setup tables cannot give, because they grade
every setup as if capital were infinite.
``exit_policy``: "target" races the S/R target against the stop with a
timeout at ``hold_days``; "hold" keeps only the initial stop and exits at
the ``hold_days``-th close. Stops fill at the worse of stop or open (gaps
modeled); positions still open at the end are closed at their last mark.
Returns None when there is nothing to trade.
"""
entries_by_ord: dict[int, list[dict]] = defaultdict(list)
for c in candidates:
if not c.get("qualified") or c.get("direction") != "long":
continue
if not c.get("entry") or not c.get("stop"):
continue
entries_by_ord[date.fromisoformat(c["date"]).toordinal()].append(c)
if not entries_by_ord:
return None
# Per-symbol bar lookup: date ordinal -> index into the column arrays.
index_of: dict[str, dict[int, int]] = {
sym: {o: i for i, o in enumerate(cols[0])} for sym, cols in prices.items()
}
first_ord = min(entries_by_ord)
calendar = sorted({o for cols in prices.values() for o in cols[0] if o >= first_ord})
if not calendar:
return None
cash = SIM_STARTING_CAPITAL
positions: dict[str, dict] = {}
curve: list[tuple[int, float]] = []
trades: list[dict] = []
skipped_full = 0
def _bar(sym: str, o: int):
idx = index_of.get(sym, {}).get(o)
if idx is None:
return None
cols = prices[sym]
return SimpleNamespace(
open=cols[1][idx], high=cols[2][idx], low=cols[3][idx], close=cols[4][idx]
)
def _close_trade(sym: str, fill: float, reason: str) -> None:
nonlocal cash
pos = positions.pop(sym)
proceeds = pos["shares"] * fill
cost = proceeds * COST_PER_SIDE
cash += proceeds - cost
risk = pos["entry"] - pos["stop"]
trades.append({
"pnl": proceeds - pos["shares"] * pos["entry"] - cost - pos["entry_cost"],
"r": (fill - pos["entry"]) / risk if risk > 0 else 0.0,
"hold": pos["bars_held"],
"reason": reason,
})
def _marked_equity() -> float:
return cash + sum(p["shares"] * p["last_close"] for p in positions.values())
for o in calendar:
# 1) exits on today's bars (stop intraday, target intraday, time at close)
for sym in list(positions):
pos = positions[sym]
bar = _bar(sym, o)
if bar is None:
continue
pos["bars_held"] += 1
pos["last_close"] = bar.close
if bar.low <= pos["stop"]:
# Same-bar stop+target resolves as the loss (conservative, like
# the evaluator); gap through the stop fills at the open.
_close_trade(sym, min(pos["stop"], bar.open), "stop")
continue
if exit_policy == "target" and pos["target"] and bar.high >= pos["target"]:
_close_trade(sym, pos["target"], "target")
continue
if pos["bars_held"] >= hold_days:
_close_trade(sym, bar.close, "time")
# 2) entries at today's close, best momentum first
equity = _marked_equity()
todays = sorted(
entries_by_ord.get(o, ()),
key=lambda c: c.get("momentum_percentile") or 0.0,
reverse=True,
)
for c in todays:
sym = c["symbol"]
if sym in positions:
continue
if len(positions) >= SIM_MAX_POSITIONS:
skipped_full += 1
continue
entry, stop = float(c["entry"]), float(c["stop"])
risk_ps = entry - stop
if risk_ps <= 0 or entry <= 0:
continue
shares = min(
(equity * SIM_RISK_PER_TRADE) / risk_ps,
(equity * SIM_NOTIONAL_CAP) / entry,
max(cash, 0.0) / (entry * (1.0 + COST_PER_SIDE)),
)
if shares * entry < 1.0: # can't fund a meaningful position
continue
entry_cost = shares * entry * COST_PER_SIDE
cash -= shares * entry + entry_cost
positions[sym] = {
"shares": shares,
"entry": entry,
"stop": stop,
"target": float(c["target"]) if c.get("target") else None,
"entry_cost": entry_cost,
"bars_held": 0,
"last_close": entry,
}
equity = _marked_equity()
curve.append((o, _marked_equity()))
# Close whatever is still open at its last mark so final equity is realized.
for sym in list(positions):
_close_trade(sym, positions[sym]["last_close"], "open_at_end")
final_equity = cash
curve[-1] = (calendar[-1], final_equity)
total_return_pct = (final_equity / SIM_STARTING_CAPITAL - 1.0) * 100.0
years = (calendar[-1] - calendar[0]) / 365.25
cagr_pct = (
((final_equity / SIM_STARTING_CAPITAL) ** (1.0 / years) - 1.0) * 100.0
if years > 0.25 and final_equity > 0
else None
)
peak = float("-inf")
max_dd = 0.0
for _, eq in curve:
peak = max(peak, eq)
if peak > 0:
max_dd = max(max_dd, (peak - eq) / peak)
rets = [b / a - 1.0 for (_, a), (_, b) in zip(curve, curve[1:]) if a > 0]
sharpe = None
if len(rets) > 2:
mean = sum(rets) / len(rets)
var = sum((x - mean) ** 2 for x in rets) / (len(rets) - 1)
if var > 0:
sharpe = mean / math.sqrt(var) * math.sqrt(252)
pnls = [t["pnl"] for t in trades]
wins = sum(1 for p in pnls if p > 0)
spy_pct = None
if spy_closes:
from app.services.benchmark_service import benchmark_return_pct
spy_pct = benchmark_return_pct(
spy_closes, date.fromordinal(calendar[0]), date.fromordinal(calendar[-1])
)
return {
"starting_capital": SIM_STARTING_CAPITAL,
"final_equity": round(final_equity, 2),
"total_return_pct": round(total_return_pct, 1),
"cagr_pct": round(cagr_pct, 1) if cagr_pct is not None else None,
"max_drawdown_pct": round(max_dd * 100.0, 1),
"sharpe": round(sharpe, 2) if sharpe is not None else None,
"trades": len(trades),
"win_rate": round(wins / len(trades) * 100.0, 1) if trades else None,
"avg_trade_pnl": round(sum(pnls) / len(pnls), 2) if pnls else None,
"best_trade_r": round(max(t["r"] for t in trades), 2) if trades else None,
"worst_trade_r": round(min(t["r"] for t in trades), 2) if trades else None,
"best_trade_pnl": round(max(pnls), 2) if pnls else None,
"worst_trade_pnl": round(min(pnls), 2) if pnls else None,
"avg_hold_days": (
round(sum(t["hold"] for t in trades) / len(trades), 1) if trades else None
),
"skipped_book_full": skipped_full,
"spy_return_pct": round(spy_pct, 1) if spy_pct is not None else None,
"start_date": date.fromordinal(calendar[0]).isoformat(),
"end_date": date.fromordinal(calendar[-1]).isoformat(),
}
async def run_backtest(
db: AsyncSession,
progress_cb: Callable[[int, int, str], None] | None = None,
@@ -1037,6 +1309,43 @@ async def run_backtest(
cands = [c for c in candidates if _momentum_qualifies(c, threshold)]
sweep.append({"min_momentum_percentile": threshold, **_bucket_stats(cands)})
# Portfolio simulation: re-fetch bars for just the qualified symbols (memory-
# light vs retaining every ticker's columns through the replay) and replay
# the book once per exit policy. Best-effort — the report stands without it.
hold_horizon = max(TIME_EXIT_DAYS)
sim_policies: list[dict] = []
try:
qual_symbols = sorted({c["symbol"] for c in candidates if c.get("qualified")})
price_columns: dict[str, tuple] = {}
for sym in qual_symbols:
cols = await _fetch_columns(db, sym)
if cols is not None:
price_columns[sym] = cols
spy_closes: dict | None = None
try:
from app.services.benchmark_service import (
load_benchmark_closes,
refresh_benchmark_prices,
)
oldest = min((cols[0][0] for cols in price_columns.values()), default=None)
if oldest is not None:
days_needed = (date.today() - date.fromordinal(oldest)).days + 30
await refresh_benchmark_prices(db, days=days_needed)
spy_closes = await load_benchmark_closes(db)
except Exception:
logger.exception("Benchmark load for the portfolio sim failed")
for policy in ("target", "hold"):
sim = _simulate_portfolio(
candidates, price_columns, spy_closes, policy, hold_horizon
)
if sim is not None:
sim_policies.append({"policy": policy, **sim})
except Exception:
logger.exception("Portfolio simulation failed")
return {
"generated_at": datetime.now(timezone.utc).isoformat(),
"tickers": total,
@@ -1070,6 +1379,28 @@ async def run_backtest(
"take_profit_sweep": [_take_profit_bucket(qualified, tp) for tp in TP_LEVELS],
"trailing_sweep": [_trailing_bucket(qualified, round(f * 100)) for f in TRAIL_LEVELS],
"time_exit_sweep": [_time_exit_bucket(qualified, n) for n in TIME_EXIT_DAYS],
"portfolio_sim": {
"params": {
"starting_capital": SIM_STARTING_CAPITAL,
"max_positions": SIM_MAX_POSITIONS,
"risk_per_trade_pct": round(SIM_RISK_PER_TRADE * 100, 2),
"notional_cap_pct": round(SIM_NOTIONAL_CAP * 100, 1),
"cost_per_side_pct": round(COST_PER_SIDE * 100, 3),
"hold_days": hold_horizon,
},
"policies": sim_policies,
"note": (
"One capital-constrained book over the same qualified setups the "
"tables above grade per-setup: at most "
f"{SIM_MAX_POSITIONS} concurrent positions (one per ticker), best "
"momentum first, fixed-fractional risk sizing with a no-leverage "
"cap, entries at the detection close, stops filled at the worse "
"of stop or open. 'target' races the S/R target against the stop "
"(timeout at the horizon); 'hold' keeps the initial stop and "
"exits at the horizon close. SPY return is price-only over the "
"same window. In-sample; no dividends."
),
},
"calibration": _calibration(candidates),
"signal_eval": _signal_evaluation(collected),
"signal_eval_note": (
@@ -1084,6 +1415,9 @@ async def run_backtest(
),
"note": (
"Sentiment & fundamentals held neutral (no point-in-time history). "
"Stops fill at the worse of the stop or the bar's open (gaps through "
"the stop are modeled, so a loss can exceed 1R); targets never fill "
"better than their level. "
"~6 months ≈ one market regime — treat as directional, not gospel."
),
}
@@ -6,15 +6,30 @@ import { Callout } from '../ui/Callout';
import { Disclosure } from '../ui/Disclosure';
import { Section } from '../ui/Section';
import { useToast } from '../ui/Toast';
import type { BacktestBucket } from '../../lib/types';
import type { BacktestBucket, BacktestPortfolioPolicy } from '../../lib/types';
function fmtR(v: number | null): string {
if (v === null) return '—';
function fmtR(v: number | null | undefined): string {
if (v === null || v === undefined) return '—';
return `${v > 0 ? '+' : ''}${v.toFixed(2)}R`;
}
function fmtPct(v: number | null): string {
return v === null ? '—' : `${v.toFixed(1)}%`;
}
function fmtMoney(v: number | null | undefined): string {
if (v === null || v === undefined) return '—';
return v.toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 });
}
function fmtSignedPct(v: number | null | undefined): string {
if (v === null || v === undefined) return '—';
return `${v > 0 ? '+' : ''}${v.toFixed(1)}%`;
}
function fmtDays(v: number | null | undefined): string {
return v === null || v === undefined ? '—' : `${v.toFixed(1)}d`;
}
function fmtRPerDay(v: number | null | undefined): string {
if (v === null || v === undefined) return '—';
return `${v > 0 ? '+' : ''}${v.toFixed(3)}R`;
}
function rColor(v: number | null): string {
if (v === null) return 'text-gray-400';
if (v > 0) return 'text-emerald-400';
@@ -40,6 +55,11 @@ const ABLATION_LABELS: Record<string, string> = {
momentum_only: 'Momentum only (no floors)',
};
const POLICY_LABELS: Record<string, string> = {
target: 'S/R target exit',
hold: 'Hold to horizon',
};
// Prefer the net-of-costs number when the report carries it; older cached
// reports (pre-cost model) fall back to gross.
function netOrGross(r: { avg_r: number | null; net_avg_r?: number | null }): number | null {
@@ -91,6 +111,10 @@ function BucketRow({ label, b }: { label: string; b: BacktestBucket }) {
<td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(b.hit_rate)}</td>
<td className={`num px-4 py-2.5 text-right ${rColor(b.avg_r)}`}>{fmtR(b.avg_r)}</td>
<td className={`num px-4 py-2.5 text-right ${rColor(b.net_avg_r ?? null)}`}>{fmtR(b.net_avg_r ?? null)}</td>
<td className="num px-4 py-2.5 text-right text-emerald-400">{fmtR(b.best_r)}</td>
<td className="num px-4 py-2.5 text-right text-red-400">{fmtR(b.worst_r)}</td>
<td className="num px-4 py-2.5 text-right text-gray-400">{fmtDays(b.avg_hold_days)}</td>
<td className={`num px-4 py-2.5 text-right ${rColor(b.net_r_per_day ?? null)}`}>{fmtRPerDay(b.net_r_per_day)}</td>
</tr>
);
}
@@ -112,6 +136,7 @@ export function BacktestPanel() {
report?.time_exit_sweep && report.time_exit_sweep.length > 0
? Math.max(...report.time_exit_sweep.map((r) => netOrGross(r) ?? -Infinity))
: null;
const sim = report?.portfolio_sim ?? null;
const run = useMutation({
mutationFn: () => triggerJob('backtest'),
@@ -202,6 +227,10 @@ export function BacktestPanel() {
<th className="px-4 py-2.5 text-right">Hit Rate</th>
<th className="px-4 py-2.5 text-right">Avg R</th>
<th className="px-4 py-2.5 text-right">Net Avg R</th>
<th className="px-4 py-2.5 text-right">Best R</th>
<th className="px-4 py-2.5 text-right">Worst R</th>
<th className="px-4 py-2.5 text-right">Avg Hold</th>
<th className="px-4 py-2.5 text-right">Net R/d</th>
</tr>
</thead>
<tbody>
@@ -326,8 +355,9 @@ export function BacktestPanel() {
</p>
<p className="mb-2 text-[11px] text-gray-500">
Models a realistic exit instead of waiting for the far S/R target: bank{' '}
<span className="text-gray-300">+X%</span> if price reaches it before the stop, else 1R on
the stop, else exit at the {report.params.horizon_days}-day close. In R, so it compares to the
<span className="text-gray-300">+X%</span> if price reaches it before the stop, else the
stop-fill loss (a gap through the stop fills at the open, so it can exceed 1R), else exit
at the {report.params.horizon_days}-day close. In R, so it compares to the
target model above. <span className="text-gray-300">Hit Rate = how often you'd have banked
+X%</span> (how far winners actually run) — no top-ticking, it's the level you'd really set.
The setup's own S/R target is <em>not</em> used here (exiting at that target is the model
@@ -440,6 +470,10 @@ export function BacktestPanel() {
<th className="px-4 py-2.5 text-right">Avg R</th>
<th className="px-4 py-2.5 text-right">Net Avg R</th>
<th className="px-4 py-2.5 text-right">Total R</th>
<th className="px-4 py-2.5 text-right">Best R</th>
<th className="px-4 py-2.5 text-right">Worst R</th>
<th className="px-4 py-2.5 text-right">Avg Hold</th>
<th className="px-4 py-2.5 text-right">Net R/d</th>
</tr>
</thead>
<tbody>
@@ -457,6 +491,10 @@ export function BacktestPanel() {
<td className={`num px-4 py-2.5 text-right ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
<td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.net_avg_r ?? null)}`}>{fmtR(row.net_avg_r ?? null)}</td>
<td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
<td className="num px-4 py-2.5 text-right text-emerald-400">{fmtR(row.best_r)}</td>
<td className="num px-4 py-2.5 text-right text-red-400">{fmtR(row.worst_r)}</td>
<td className="num px-4 py-2.5 text-right text-gray-400">{fmtDays(row.avg_hold_days)}</td>
<td className={`num px-4 py-2.5 text-right ${rColor(row.net_r_per_day ?? null)}`}>{fmtRPerDay(row.net_r_per_day)}</td>
</tr>
);
})}
@@ -466,6 +504,63 @@ export function BacktestPanel() {
</div>
)}
{sim && sim.policies.length > 0 && (
<div>
<p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
Portfolio simulation
</p>
<p className="mb-2 text-[11px] text-gray-500">
{sim.note ?? 'One capital-constrained book over the qualified setups.'}{' '}
<span className="text-gray-300">
Start {fmtMoney(sim.params.starting_capital)} · max {sim.params.max_positions} positions ·{' '}
{sim.params.risk_per_trade_pct}% risk/trade · {sim.params.notional_cap_pct}% notional cap ·{' '}
{sim.params.cost_per_side_pct}%/side costs · {sim.policies[0].start_date} → {sim.policies[0].end_date}
</span>
</p>
<div className="glass overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
<th className="px-4 py-2.5">Metric</th>
{sim.policies.map((p) => (
<th key={p.policy} className="px-4 py-2.5 text-right">
{POLICY_LABELS[p.policy] ?? p.policy}
</th>
))}
</tr>
</thead>
<tbody>
{(
[
['Final equity', (p) => fmtMoney(p.final_equity), (p) => rColor(p.final_equity - p.starting_capital)],
['Total return', (p) => fmtSignedPct(p.total_return_pct), (p) => rColor(p.total_return_pct)],
['SPY return (same window)', (p) => fmtSignedPct(p.spy_return_pct), () => 'text-gray-300'],
['CAGR', (p) => fmtSignedPct(p.cagr_pct), (p) => rColor(p.cagr_pct)],
['Max drawdown', (p) => `${p.max_drawdown_pct.toFixed(1)}%`, () => 'text-amber-400'],
['Sharpe (daily, annualized)', (p) => (p.sharpe === null ? '' : p.sharpe.toFixed(2)), () => 'text-gray-200'],
['Trades', (p) => String(p.trades), () => 'text-gray-300'],
['Win rate', (p) => fmtPct(p.win_rate), () => 'text-gray-200'],
['Avg P&L / trade', (p) => fmtMoney(p.avg_trade_pnl), (p) => rColor(p.avg_trade_pnl)],
['Best / worst trade', (p) => `${fmtR(p.best_trade_r)} / ${fmtR(p.worst_trade_r)}`, () => 'text-gray-300'],
['Avg holding time', (p) => fmtDays(p.avg_hold_days), () => 'text-gray-300'],
['Entries skipped (book full)', (p) => String(p.skipped_book_full), () => 'text-gray-500'],
] as [string, (p: BacktestPortfolioPolicy) => string, (p: BacktestPortfolioPolicy) => string][]
).map(([label, fmt, color]) => (
<tr key={label} className="border-b border-white/[0.04]">
<td className="px-4 py-2.5 font-medium text-gray-200">{label}</td>
{sim.policies.map((p) => (
<td key={p.policy} className={`num px-4 py-2.5 text-right ${color(p)}`}>
{fmt(p)}
</td>
))}
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
<div>
<p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
Probability calibration
+44
View File
@@ -232,6 +232,10 @@ export interface BacktestBucket {
// Net of transaction costs — optional so a stale cached report still renders.
net_avg_r?: number | null;
net_total_r?: number | null;
best_r?: number | null;
worst_r?: number | null;
avg_hold_days?: number | null;
net_r_per_day?: number | null;
}
export interface BacktestCalibrationRow {
@@ -276,6 +280,45 @@ export interface BacktestTimeExitRow {
total_r: number | null;
net_avg_r?: number | null;
net_total_r?: number | null;
best_r?: number | null;
worst_r?: number | null;
avg_hold_days?: number | null;
net_r_per_day?: number | null;
}
export interface BacktestPortfolioPolicy {
policy: string;
starting_capital: number;
final_equity: number;
total_return_pct: number;
cagr_pct: number | null;
max_drawdown_pct: number;
sharpe: number | null;
trades: number;
win_rate: number | null;
avg_trade_pnl: number | null;
best_trade_r: number | null;
worst_trade_r: number | null;
best_trade_pnl: number | null;
worst_trade_pnl: number | null;
avg_hold_days: number | null;
skipped_book_full: number;
spy_return_pct: number | null;
start_date: string;
end_date: string;
}
export interface BacktestPortfolioSim {
params: {
starting_capital: number;
max_positions: number;
risk_per_trade_pct: number;
notional_cap_pct: number;
cost_per_side_pct: number;
hold_days: number;
};
policies: BacktestPortfolioPolicy[];
note?: string;
}
export interface BacktestGateAblationRow extends BacktestBucket {
@@ -319,6 +362,7 @@ export interface BacktestReport {
take_profit_sweep?: BacktestTakeProfitRow[];
trailing_sweep?: BacktestTrailingRow[];
time_exit_sweep?: BacktestTimeExitRow[];
portfolio_sim?: BacktestPortfolioSim;
calibration: BacktestCalibrationRow[];
signal_eval?: BacktestSignalEvalRow[];
signal_eval_note?: string;
+134 -6
View File
@@ -32,6 +32,7 @@ def _cand(
qualified: bool = True,
direction: str = "long",
risk_pct: float = 0.05,
hold_days: int = 10,
) -> dict:
target_hit = outcome == OUTCOME_TARGET_HIT
realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
@@ -44,6 +45,7 @@ def _cand(
"qualified": qualified,
"direction": direction,
"risk_pct": risk_pct,
"hold_days": hold_days,
}
@@ -51,35 +53,64 @@ def _cand(
_COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05
def _bar(high: float, low: float, close: float) -> SimpleNamespace:
return SimpleNamespace(high=high, low=low, close=close)
def _bar(high: float, low: float, close: float, open_: float | None = None) -> SimpleNamespace:
"""Synthetic daily bar. ``open`` defaults to the high so a stop is pierced
intraday (fill at the stop level); pass an explicit open beyond the stop to
model a gap through it."""
return SimpleNamespace(
high=high, low=low, close=close, open=open_ if open_ is not None else high
)
class TestStopFillR:
def test_intraday_fill_at_stop(self):
assert bt._stop_fill_r("long", 100.0, 95.0, _bar(101, 94, 96)) == pytest.approx(-1.0)
def test_gap_fill_at_open(self):
# Opens at 92, below the 95 stop → filled at the open, worse than 1R.
assert bt._stop_fill_r("long", 100.0, 95.0, _bar(93, 90, 91, open_=92)) == pytest.approx(-1.6)
def test_short_gap_fill_at_open(self):
# Short stop 105; opens at 107 above it → fill 107.
assert bt._stop_fill_r("short", 100.0, 105.0, _bar(110, 104, 108, open_=107)) == pytest.approx(-1.4)
class TestTakeProfitPrimitives:
def test_long_tp_reachable_before_stop(self):
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
risk, stopped, mfe, close_pct, stop_day, _ = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
assert risk == pytest.approx(0.05)
assert stopped is False
assert mfe == pytest.approx(0.09)
assert close_pct == pytest.approx(0.08)
assert stop_day is None
def test_long_stop_zeroes_mfe(self):
# Low pierces the stop on the only bar → loss, nothing banked before it.
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
risk, stopped, mfe, close_pct, stop_day, stop_r = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
assert stopped is True
assert mfe == pytest.approx(0.0)
assert close_pct == pytest.approx(-0.04)
assert stop_day == 1
assert stop_r == pytest.approx(-1.0)
def test_gap_through_stop_loses_more_than_1r(self):
_, stopped, _, _, stop_day, stop_r = bt._tp_primitives(
"long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], 30
)
assert stopped is True
assert stop_day == 1
assert stop_r == pytest.approx(-1.6) # filled at the 92 open, not the 95 stop
def test_long_drift_no_trigger(self):
bars = [_bar(102, 99, 101), _bar(103, 100, 102)]
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
assert stopped is False
assert mfe == pytest.approx(0.03)
assert close_pct == pytest.approx(0.02)
def test_short_direction(self):
# short entry 100, stop 105; price falls → favourable = (entry - low)/entry
risk, stopped, mfe, close_pct = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
assert risk == pytest.approx(0.05)
assert stopped is False
assert mfe == pytest.approx(0.08)
@@ -131,6 +162,12 @@ class TestTrailingExits:
assert res[10] == pytest.approx(0.8)
assert res[5] == pytest.approx(1.4)
def test_gap_through_stop_fills_at_open(self):
# Initial stop 90 governs (20% trail from peak 100 is lower); the bar
# opens at 85, below it → fill at the open.
res = bt._trailing_exits("long", 100.0, 90.0, (0.20,), [_bar(88, 84, 86, open_=85)], 30)
assert res[20] == pytest.approx(-1.5)
class TestTrailingBucket:
def test_bucket(self):
@@ -177,6 +214,10 @@ class TestTimeExits:
res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,))
assert res[5] == 0.0
def test_gap_through_stop_fills_at_open(self):
res = bt._time_exits("long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], (5,))
assert res[5] == pytest.approx(-1.6)
class TestTimeExitBucket:
def test_bucket(self):
@@ -192,6 +233,11 @@ class TestTimeExitBucket:
assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
assert b["best_r"] == pytest.approx(1.4)
assert b["worst_r"] == pytest.approx(-1.0)
# No stop_day on any candidate → every hold runs the full 5 days.
assert b["avg_hold_days"] == 5.0
assert b["net_r_per_day"] == pytest.approx(0.28 / 5.0, abs=0.001)
def test_missing_hold_skipped(self):
b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
@@ -263,6 +309,78 @@ class TestGateAblation:
assert rows["all_floors"]["total"] == 2
def _sim_prices(start_ord: int, closes: list[float]) -> tuple:
"""Column arrays for consecutive daily bars: open = close (no gaps),
high/low = close ± 1."""
ords = list(range(start_ord, start_ord + len(closes)))
return (
ords,
list(closes),
[c + 1.0 for c in closes],
[c - 1.0 for c in closes],
list(closes),
[1_000_000] * len(closes),
)
def _sim_cand(
sym: str, day_ord: int, entry: float, stop: float, target: float, mp: float = 90.0
) -> dict:
return {
"qualified": True,
"direction": "long",
"symbol": sym,
"date": date.fromordinal(day_ord).isoformat(),
"entry": entry,
"stop": stop,
"target": target,
"momentum_percentile": mp,
}
class TestSimulatePortfolio:
ORD = date(2025, 1, 6).toordinal()
def test_hold_policy_accounting(self):
closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
prices = {"AAA": _sim_prices(self.ORD, closes)}
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=130.0)
sim = bt._simulate_portfolio([cand], prices, None, "hold", 3)
assert sim is not None
assert sim["trades"] == 1
# 20 shares (1% risk / $5 stop distance), exit at the day-3 close 106:
# pnl = 2120 2000 2.00 entry cost 2.12 exit cost = 115.88
assert sim["final_equity"] == pytest.approx(10_115.88, abs=0.01)
assert sim["win_rate"] == 100.0
assert sim["best_trade_r"] == pytest.approx(1.2)
assert sim["avg_hold_days"] == 3.0
assert sim["max_drawdown_pct"] == 0.0
assert sim["cagr_pct"] is None # window far too short to annualize
assert sim["spy_return_pct"] is None
def test_target_policy_exits_at_target(self):
closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
prices = {"AAA": _sim_prices(self.ORD, closes)}
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=105.0)
sim = bt._simulate_portfolio([cand], prices, None, "target", 30)
assert sim is not None
assert sim["trades"] == 1
assert sim["best_trade_r"] == pytest.approx(1.0) # filled exactly at 105
def test_stop_gap_fills_at_open(self):
# Day-1 bar gaps to a 90 open, below the 95 stop → fill at the open.
ords = list(range(self.ORD, self.ORD + 2))
prices = {"AAA": (ords, [100.0, 90.0], [101.0, 92.0], [99.0, 88.0], [100.0, 91.0], [1, 1])}
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=120.0)
sim = bt._simulate_portfolio([cand], prices, None, "hold", 30)
assert sim is not None
assert sim["trades"] == 1
assert sim["worst_trade_r"] == pytest.approx(-2.0) # (90 100) / 5
def test_nothing_qualified_returns_none(self):
assert bt._simulate_portfolio([], {}, None, "hold", 30) is None
def test_bucket_stats_counts_and_expectancy():
cands = [
_cand(70, OUTCOME_TARGET_HIT, 3.0), # +3R win
@@ -283,6 +401,10 @@ def test_bucket_stats_counts_and_expectancy():
# net = gross minus a 0.04R round trip per candidate (risk_pct 0.05)
assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001)
assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01)
assert s["best_r"] == 3.0
assert s["worst_r"] == -1.0
assert s["avg_hold_days"] == 10.0
assert s["net_r_per_day"] == pytest.approx((1.0 - _COST_R_005) / 10.0, abs=0.001)
def test_bucket_stats_empty():
@@ -394,6 +516,12 @@ async def test_run_backtest_smoke(session):
# time-exit sweep covers the configured hold lengths
assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)
# portfolio simulation section is always present (policies may be empty
# when nothing qualifies)
assert "portfolio_sim" in report
assert isinstance(report["portfolio_sim"]["policies"], list)
assert report["portfolio_sim"]["params"]["max_positions"] == bt.SIM_MAX_POSITIONS
# sweep: lowering the momentum-percentile cutoff can only add qualifiers
sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
counts = [r["total"] for r in sweep]