diff --git a/app/services/backtest_service.py b/app/services/backtest_service.py index 1dcbdbf..b17e410 100644 --- a/app/services/backtest_service.py +++ b/app/services/backtest_service.py @@ -216,12 +216,27 @@ def _window_setups( return out +def _stop_fill_r(direction: str, entry: float, stop: float, bar) -> float: + """Realized R when the stop is hit on ``bar``: filled at the stop, or at the + bar's open when price gapped through it — so a gap can lose more than −1R, + matching real fills. Targets are never filled better than their level, so + gap modeling only ever makes results more conservative.""" + risk = abs(entry - stop) + if risk <= 0 or entry <= 0: + return -1.0 + if direction == "long": + fill = min(stop, bar.open) + return (fill - entry) / risk + fill = max(stop, bar.open) + return (entry - fill) / risk + + def _tp_primitives( direction: str, entry: float, stop: float, forward: list, horizon: int -) -> tuple[float, bool, float, float]: +) -> tuple[float, bool, float, float, int | None, float]: """Primitives for the take-profit exit model, from the bars after detection. - Returns ``(risk_pct, stopped, mfe_pct, close_pct)``: + Returns ``(risk_pct, stopped, mfe_pct, close_pct, stop_day, stop_r)``: - ``risk_pct`` fraction from entry to stop (the 1R distance) - ``stopped`` whether the stop was hit within the horizon - ``mfe_pct`` best favourable excursion (fraction) reachable *before* the @@ -229,27 +244,34 @@ def _tp_primitives( counts as a loss (matching the conservative target model); over the whole horizon if the stop is never hit - ``close_pct`` directional return at the horizon-end close (the timeout exit) + - ``stop_day`` 1-based trading day the stop was pierced, None if never + - ``stop_r`` realized R at the stop fill (≤ −1 when the bar gapped + through the stop — see _stop_fill_r); −1.0 when unused From these any fixed take-profit level can be scored without re-walking bars: - tp reached before stop (``mfe_pct >= tp``) → +tp; else stop → −1R; else the - horizon-close move. + tp reached before stop (``mfe_pct >= tp``) → +tp; else stop → ``stop_r``; + else the horizon-close move. """ long = direction == "long" risk_pct = abs(entry - stop) / entry if entry else 0.0 bars = forward[:horizon] if not bars: - return risk_pct, False, 0.0, 0.0 + return risk_pct, False, 0.0, 0.0, None, -1.0 mfe = 0.0 stopped = False - for r in bars: + stop_day: int | None = None + stop_r = -1.0 + for i, r in enumerate(bars): if (r.low <= stop) if long else (r.high >= stop): stopped = True + stop_day = i + 1 + stop_r = _stop_fill_r(direction, entry, stop, r) break fav = (r.high - entry) / entry if long else (entry - r.low) / entry if fav > mfe: mfe = fav close_pct = ((bars[-1].close - entry) / entry) * (1.0 if long else -1.0) - return risk_pct, stopped, mfe, close_pct + return risk_pct, stopped, mfe, close_pct, stop_day, stop_r def _trailing_exits( @@ -281,12 +303,14 @@ def _trailing_exits( if long: stop_level = max(init_stop, peak * (1 - f)) if r.low <= stop_level: - result[round(f * 100)] = ((stop_level - entry) / entry) / risk + fill = min(stop_level, r.open) # gap through fills at the open + result[round(f * 100)] = ((fill - entry) / entry) / risk continue else: stop_level = min(init_stop, peak * (1 + f)) if r.high >= stop_level: - result[round(f * 100)] = ((entry - stop_level) / entry) / risk + fill = max(stop_level, r.open) + result[round(f * 100)] = ((entry - fill) / entry) / risk continue remaining.append(f) active = remaining @@ -325,10 +349,12 @@ def _time_exits( return {int(n): 0.0 for n in horizons} stop_day: int | None = None # 1-based trading day the stop was pierced + stop_r = -1.0 closes: list[float] = [] for i, r in enumerate(bars): if (r.low <= stop) if long else (r.high >= stop): stop_day = i + 1 + stop_r = _stop_fill_r(direction, entry, stop, r) break closes.append(r.close) @@ -336,7 +362,7 @@ def _time_exits( for h in horizons: n = int(h) if stop_day is not None and stop_day <= n: - result[n] = -1.0 + result[n] = stop_r else: # closes can't be empty here: an empty closes means the stop hit on # day 1, which the branch above catches for every n >= 1. @@ -359,21 +385,29 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) - forward_bars = [Bar(date=r.date, high=r.high, low=r.low) for r in forward] for s in _window_setups(window, config, activation): - outcome, _ = evaluate_setup_against_bars( + outcome, outcome_date = evaluate_setup_against_bars( s["direction"], s["stop"], s["target"], forward_bars, HORIZON ) if outcome is None: continue + # Trading days from detection to resolution (expired = full horizon). + hold_days = next( + (idx + 1 for idx, r in enumerate(forward[:HORIZON]) if r.date == outcome_date), + min(HORIZON, len(forward)), + ) target_hit = outcome == OUTCOME_TARGET_HIT if outcome == OUTCOME_TARGET_HIT: realized_r = s["rr"] elif outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS): - realized_r = -1.0 + # Fill at the stop, or at the open when the bar gapped through it. + realized_r = _stop_fill_r( + s["direction"], s["entry"], s["stop"], forward[hold_days - 1] + ) else: # expired realized_r = 0.0 # Take-profit exit primitives (parallel to the target-vs-stop outcome # above; aggregated separately into the take-profit sweep). - risk_pct, tp_stopped, mfe_pct, tp_close_pct = _tp_primitives( + risk_pct, tp_stopped, mfe_pct, tp_close_pct, stop_day, tp_stop_r = _tp_primitives( s["direction"], s["entry"], s["stop"], forward, HORIZON ) trail_r = _trailing_exits( @@ -388,6 +422,9 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) - "date": records[i].date.isoformat(), "iso_week": (iso[0], iso[1]), "direction": s["direction"], + "entry": s["entry"], + "stop": s["stop"], + "target": s["target"], "rr": s["rr"], "confidence": s["confidence"], "primary_prob": s["primary_prob"], @@ -401,8 +438,11 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) - "outcome": outcome, "target_hit": target_hit, "realized_r": realized_r, + "hold_days": hold_days, + "stop_day": stop_day, "risk_pct": risk_pct, "tp_stopped": tp_stopped, + "tp_stop_r": tp_stop_r, "mfe_pct": mfe_pct, "tp_close_pct": tp_close_pct, "trail_r": trail_r, @@ -418,6 +458,9 @@ def _bucket_stats(cands: list[dict]) -> dict: decided = wins + losses rs = [c["realized_r"] for c in cands] net_rs = [c["realized_r"] - _cost_r(c) for c in cands] + holds = [c["hold_days"] for c in cands if c.get("hold_days")] + avg_hold = sum(holds) / len(holds) if holds else None + net_avg = sum(net_rs) / len(net_rs) if net_rs else None return { "total": len(cands), "wins": wins, @@ -426,8 +469,15 @@ def _bucket_stats(cands: list[dict]) -> dict: "hit_rate": round(wins / decided * 100, 1) if decided else None, "avg_r": round(sum(rs) / len(rs), 3) if rs else None, "total_r": round(sum(rs), 2) if rs else None, - "net_avg_r": round(sum(net_rs) / len(net_rs), 3) if net_rs else None, + "net_avg_r": round(net_avg, 3) if net_avg is not None else None, "net_total_r": round(sum(net_rs), 2) if net_rs else None, + "best_r": round(max(rs), 2) if rs else None, + "worst_r": round(min(rs), 2) if rs else None, + "avg_hold_days": round(avg_hold, 1) if avg_hold is not None else None, + # Capital efficiency: net expectancy per trading day the capital is tied up. + "net_r_per_day": ( + round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None + ), } @@ -473,7 +523,7 @@ def _take_profit_bucket(cands: list[dict], tp: float) -> dict: r = tp / risk wins += 1 elif c.get("tp_stopped"): - r = -1.0 + r = c.get("tp_stop_r", -1.0) # gap-aware stop fill, ≤ −1R else: r = (c.get("tp_close_pct", 0.0)) / risk rs.append(r) @@ -519,16 +569,24 @@ def _trailing_bucket(cands: list[dict], trail_pct: int) -> dict: def _time_exit_bucket(cands: list[dict], hold_days: int) -> dict: """Stats for the hold-``hold_days`` exit: initial stop active, otherwise out at the day-N close. Each candidate carries its realized R per hold length in - ``time_r``; a "win" is an exit in profit (R > 0).""" - pairs = [ - (c["time_r"][hold_days], _cost_r(c)) + ``time_r``; a "win" is an exit in profit (R > 0). The realized hold is the + full N days unless the stop cut it short (``stop_day``).""" + rows = [ + ( + c["time_r"][hold_days], + _cost_r(c), + min(hold_days, c.get("stop_day") or hold_days), + ) for c in cands if c.get("time_r", {}).get(hold_days) is not None ] - total = len(pairs) - rs = [r for r, _ in pairs] - net_rs = [r - cost for r, cost in pairs] + total = len(rows) + rs = [r for r, _, _ in rows] + net_rs = [r - cost for r, cost, _ in rows] + holds = [h for _, _, h in rows] wins = sum(1 for r in rs if r > 0) + avg_hold = sum(holds) / total if total else None + net_avg = sum(net_rs) / total if total else None return { "hold_days": hold_days, "total": total, @@ -536,8 +594,14 @@ def _time_exit_bucket(cands: list[dict], hold_days: int) -> dict: "win_rate": round(wins / total * 100, 1) if total else None, "avg_r": round(sum(rs) / total, 3) if total else None, "total_r": round(sum(rs), 2) if total else None, - "net_avg_r": round(sum(net_rs) / total, 3) if total else None, + "net_avg_r": round(net_avg, 3) if net_avg is not None else None, "net_total_r": round(sum(net_rs), 2) if total else None, + "best_r": round(max(rs), 2) if rs else None, + "worst_r": round(min(rs), 2) if rs else None, + "avg_hold_days": round(avg_hold, 1) if avg_hold is not None else None, + "net_r_per_day": ( + round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None + ), } @@ -934,6 +998,214 @@ def _gate_ablation(candidates: list[dict], activation: dict, threshold: float) - return rows +# --------------------------------------------------------------------------- +# Portfolio simulation +# --------------------------------------------------------------------------- + +# Book parameters: fixed starting capital, a capped number of concurrent +# positions (one per ticker), fixed-fractional risk sizing with a no-leverage +# notional cap, and the same per-side cost as the per-trade tables. Entries are +# the QUALIFIED setups at their detection close, best momentum first while +# slots and cash allow. +SIM_STARTING_CAPITAL = 10_000.0 +SIM_MAX_POSITIONS = 10 +SIM_RISK_PER_TRADE = 0.01 # fraction of equity risked per position (entry→stop) +SIM_NOTIONAL_CAP = 0.20 # max fraction of equity per position (no margin) + + +def _simulate_portfolio( + candidates: list[dict], + prices: dict[str, tuple], + spy_closes: dict | None, + exit_policy: str, + hold_days: int, +) -> dict | None: + """Replay the qualified setups as ONE capital-constrained book and report + portfolio economics from the daily equity curve (return, CAGR, drawdown, + Sharpe) — the numbers the per-setup tables cannot give, because they grade + every setup as if capital were infinite. + + ``exit_policy``: "target" races the S/R target against the stop with a + timeout at ``hold_days``; "hold" keeps only the initial stop and exits at + the ``hold_days``-th close. Stops fill at the worse of stop or open (gaps + modeled); positions still open at the end are closed at their last mark. + Returns None when there is nothing to trade. + """ + entries_by_ord: dict[int, list[dict]] = defaultdict(list) + for c in candidates: + if not c.get("qualified") or c.get("direction") != "long": + continue + if not c.get("entry") or not c.get("stop"): + continue + entries_by_ord[date.fromisoformat(c["date"]).toordinal()].append(c) + if not entries_by_ord: + return None + + # Per-symbol bar lookup: date ordinal -> index into the column arrays. + index_of: dict[str, dict[int, int]] = { + sym: {o: i for i, o in enumerate(cols[0])} for sym, cols in prices.items() + } + + first_ord = min(entries_by_ord) + calendar = sorted({o for cols in prices.values() for o in cols[0] if o >= first_ord}) + if not calendar: + return None + + cash = SIM_STARTING_CAPITAL + positions: dict[str, dict] = {} + curve: list[tuple[int, float]] = [] + trades: list[dict] = [] + skipped_full = 0 + + def _bar(sym: str, o: int): + idx = index_of.get(sym, {}).get(o) + if idx is None: + return None + cols = prices[sym] + return SimpleNamespace( + open=cols[1][idx], high=cols[2][idx], low=cols[3][idx], close=cols[4][idx] + ) + + def _close_trade(sym: str, fill: float, reason: str) -> None: + nonlocal cash + pos = positions.pop(sym) + proceeds = pos["shares"] * fill + cost = proceeds * COST_PER_SIDE + cash += proceeds - cost + risk = pos["entry"] - pos["stop"] + trades.append({ + "pnl": proceeds - pos["shares"] * pos["entry"] - cost - pos["entry_cost"], + "r": (fill - pos["entry"]) / risk if risk > 0 else 0.0, + "hold": pos["bars_held"], + "reason": reason, + }) + + def _marked_equity() -> float: + return cash + sum(p["shares"] * p["last_close"] for p in positions.values()) + + for o in calendar: + # 1) exits on today's bars (stop intraday, target intraday, time at close) + for sym in list(positions): + pos = positions[sym] + bar = _bar(sym, o) + if bar is None: + continue + pos["bars_held"] += 1 + pos["last_close"] = bar.close + if bar.low <= pos["stop"]: + # Same-bar stop+target resolves as the loss (conservative, like + # the evaluator); gap through the stop fills at the open. + _close_trade(sym, min(pos["stop"], bar.open), "stop") + continue + if exit_policy == "target" and pos["target"] and bar.high >= pos["target"]: + _close_trade(sym, pos["target"], "target") + continue + if pos["bars_held"] >= hold_days: + _close_trade(sym, bar.close, "time") + + # 2) entries at today's close, best momentum first + equity = _marked_equity() + todays = sorted( + entries_by_ord.get(o, ()), + key=lambda c: c.get("momentum_percentile") or 0.0, + reverse=True, + ) + for c in todays: + sym = c["symbol"] + if sym in positions: + continue + if len(positions) >= SIM_MAX_POSITIONS: + skipped_full += 1 + continue + entry, stop = float(c["entry"]), float(c["stop"]) + risk_ps = entry - stop + if risk_ps <= 0 or entry <= 0: + continue + shares = min( + (equity * SIM_RISK_PER_TRADE) / risk_ps, + (equity * SIM_NOTIONAL_CAP) / entry, + max(cash, 0.0) / (entry * (1.0 + COST_PER_SIDE)), + ) + if shares * entry < 1.0: # can't fund a meaningful position + continue + entry_cost = shares * entry * COST_PER_SIDE + cash -= shares * entry + entry_cost + positions[sym] = { + "shares": shares, + "entry": entry, + "stop": stop, + "target": float(c["target"]) if c.get("target") else None, + "entry_cost": entry_cost, + "bars_held": 0, + "last_close": entry, + } + equity = _marked_equity() + + curve.append((o, _marked_equity())) + + # Close whatever is still open at its last mark so final equity is realized. + for sym in list(positions): + _close_trade(sym, positions[sym]["last_close"], "open_at_end") + final_equity = cash + curve[-1] = (calendar[-1], final_equity) + + total_return_pct = (final_equity / SIM_STARTING_CAPITAL - 1.0) * 100.0 + years = (calendar[-1] - calendar[0]) / 365.25 + cagr_pct = ( + ((final_equity / SIM_STARTING_CAPITAL) ** (1.0 / years) - 1.0) * 100.0 + if years > 0.25 and final_equity > 0 + else None + ) + + peak = float("-inf") + max_dd = 0.0 + for _, eq in curve: + peak = max(peak, eq) + if peak > 0: + max_dd = max(max_dd, (peak - eq) / peak) + + rets = [b / a - 1.0 for (_, a), (_, b) in zip(curve, curve[1:]) if a > 0] + sharpe = None + if len(rets) > 2: + mean = sum(rets) / len(rets) + var = sum((x - mean) ** 2 for x in rets) / (len(rets) - 1) + if var > 0: + sharpe = mean / math.sqrt(var) * math.sqrt(252) + + pnls = [t["pnl"] for t in trades] + wins = sum(1 for p in pnls if p > 0) + spy_pct = None + if spy_closes: + from app.services.benchmark_service import benchmark_return_pct + + spy_pct = benchmark_return_pct( + spy_closes, date.fromordinal(calendar[0]), date.fromordinal(calendar[-1]) + ) + + return { + "starting_capital": SIM_STARTING_CAPITAL, + "final_equity": round(final_equity, 2), + "total_return_pct": round(total_return_pct, 1), + "cagr_pct": round(cagr_pct, 1) if cagr_pct is not None else None, + "max_drawdown_pct": round(max_dd * 100.0, 1), + "sharpe": round(sharpe, 2) if sharpe is not None else None, + "trades": len(trades), + "win_rate": round(wins / len(trades) * 100.0, 1) if trades else None, + "avg_trade_pnl": round(sum(pnls) / len(pnls), 2) if pnls else None, + "best_trade_r": round(max(t["r"] for t in trades), 2) if trades else None, + "worst_trade_r": round(min(t["r"] for t in trades), 2) if trades else None, + "best_trade_pnl": round(max(pnls), 2) if pnls else None, + "worst_trade_pnl": round(min(pnls), 2) if pnls else None, + "avg_hold_days": ( + round(sum(t["hold"] for t in trades) / len(trades), 1) if trades else None + ), + "skipped_book_full": skipped_full, + "spy_return_pct": round(spy_pct, 1) if spy_pct is not None else None, + "start_date": date.fromordinal(calendar[0]).isoformat(), + "end_date": date.fromordinal(calendar[-1]).isoformat(), + } + + async def run_backtest( db: AsyncSession, progress_cb: Callable[[int, int, str], None] | None = None, @@ -1037,6 +1309,43 @@ async def run_backtest( cands = [c for c in candidates if _momentum_qualifies(c, threshold)] sweep.append({"min_momentum_percentile": threshold, **_bucket_stats(cands)}) + # Portfolio simulation: re-fetch bars for just the qualified symbols (memory- + # light vs retaining every ticker's columns through the replay) and replay + # the book once per exit policy. Best-effort — the report stands without it. + hold_horizon = max(TIME_EXIT_DAYS) + sim_policies: list[dict] = [] + try: + qual_symbols = sorted({c["symbol"] for c in candidates if c.get("qualified")}) + price_columns: dict[str, tuple] = {} + for sym in qual_symbols: + cols = await _fetch_columns(db, sym) + if cols is not None: + price_columns[sym] = cols + + spy_closes: dict | None = None + try: + from app.services.benchmark_service import ( + load_benchmark_closes, + refresh_benchmark_prices, + ) + + oldest = min((cols[0][0] for cols in price_columns.values()), default=None) + if oldest is not None: + days_needed = (date.today() - date.fromordinal(oldest)).days + 30 + await refresh_benchmark_prices(db, days=days_needed) + spy_closes = await load_benchmark_closes(db) + except Exception: + logger.exception("Benchmark load for the portfolio sim failed") + + for policy in ("target", "hold"): + sim = _simulate_portfolio( + candidates, price_columns, spy_closes, policy, hold_horizon + ) + if sim is not None: + sim_policies.append({"policy": policy, **sim}) + except Exception: + logger.exception("Portfolio simulation failed") + return { "generated_at": datetime.now(timezone.utc).isoformat(), "tickers": total, @@ -1070,6 +1379,28 @@ async def run_backtest( "take_profit_sweep": [_take_profit_bucket(qualified, tp) for tp in TP_LEVELS], "trailing_sweep": [_trailing_bucket(qualified, round(f * 100)) for f in TRAIL_LEVELS], "time_exit_sweep": [_time_exit_bucket(qualified, n) for n in TIME_EXIT_DAYS], + "portfolio_sim": { + "params": { + "starting_capital": SIM_STARTING_CAPITAL, + "max_positions": SIM_MAX_POSITIONS, + "risk_per_trade_pct": round(SIM_RISK_PER_TRADE * 100, 2), + "notional_cap_pct": round(SIM_NOTIONAL_CAP * 100, 1), + "cost_per_side_pct": round(COST_PER_SIDE * 100, 3), + "hold_days": hold_horizon, + }, + "policies": sim_policies, + "note": ( + "One capital-constrained book over the same qualified setups the " + "tables above grade per-setup: at most " + f"{SIM_MAX_POSITIONS} concurrent positions (one per ticker), best " + "momentum first, fixed-fractional risk sizing with a no-leverage " + "cap, entries at the detection close, stops filled at the worse " + "of stop or open. 'target' races the S/R target against the stop " + "(timeout at the horizon); 'hold' keeps the initial stop and " + "exits at the horizon close. SPY return is price-only over the " + "same window. In-sample; no dividends." + ), + }, "calibration": _calibration(candidates), "signal_eval": _signal_evaluation(collected), "signal_eval_note": ( @@ -1084,6 +1415,9 @@ async def run_backtest( ), "note": ( "Sentiment & fundamentals held neutral (no point-in-time history). " + "Stops fill at the worse of the stop or the bar's open (gaps through " + "the stop are modeled, so a loss can exceed −1R); targets never fill " + "better than their level. " "~6 months ≈ one market regime — treat as directional, not gospel." ), } diff --git a/frontend/src/components/signals/BacktestPanel.tsx b/frontend/src/components/signals/BacktestPanel.tsx index c4aa152..cb3a4fb 100644 --- a/frontend/src/components/signals/BacktestPanel.tsx +++ b/frontend/src/components/signals/BacktestPanel.tsx @@ -6,15 +6,30 @@ import { Callout } from '../ui/Callout'; import { Disclosure } from '../ui/Disclosure'; import { Section } from '../ui/Section'; import { useToast } from '../ui/Toast'; -import type { BacktestBucket } from '../../lib/types'; +import type { BacktestBucket, BacktestPortfolioPolicy } from '../../lib/types'; -function fmtR(v: number | null): string { - if (v === null) return '—'; +function fmtR(v: number | null | undefined): string { + if (v === null || v === undefined) return '—'; return `${v > 0 ? '+' : ''}${v.toFixed(2)}R`; } function fmtPct(v: number | null): string { return v === null ? '—' : `${v.toFixed(1)}%`; } +function fmtMoney(v: number | null | undefined): string { + if (v === null || v === undefined) return '—'; + return v.toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 }); +} +function fmtSignedPct(v: number | null | undefined): string { + if (v === null || v === undefined) return '—'; + return `${v > 0 ? '+' : ''}${v.toFixed(1)}%`; +} +function fmtDays(v: number | null | undefined): string { + return v === null || v === undefined ? '—' : `${v.toFixed(1)}d`; +} +function fmtRPerDay(v: number | null | undefined): string { + if (v === null || v === undefined) return '—'; + return `${v > 0 ? '+' : ''}${v.toFixed(3)}R`; +} function rColor(v: number | null): string { if (v === null) return 'text-gray-400'; if (v > 0) return 'text-emerald-400'; @@ -40,6 +55,11 @@ const ABLATION_LABELS: Record = { momentum_only: 'Momentum only (no floors)', }; +const POLICY_LABELS: Record = { + target: 'S/R target exit', + hold: 'Hold to horizon', +}; + // Prefer the net-of-costs number when the report carries it; older cached // reports (pre-cost model) fall back to gross. function netOrGross(r: { avg_r: number | null; net_avg_r?: number | null }): number | null { @@ -91,6 +111,10 @@ function BucketRow({ label, b }: { label: string; b: BacktestBucket }) { {fmtPct(b.hit_rate)} {fmtR(b.avg_r)} {fmtR(b.net_avg_r ?? null)} + {fmtR(b.best_r)} + {fmtR(b.worst_r)} + {fmtDays(b.avg_hold_days)} + {fmtRPerDay(b.net_r_per_day)} ); } @@ -112,6 +136,7 @@ export function BacktestPanel() { report?.time_exit_sweep && report.time_exit_sweep.length > 0 ? Math.max(...report.time_exit_sweep.map((r) => netOrGross(r) ?? -Infinity)) : null; + const sim = report?.portfolio_sim ?? null; const run = useMutation({ mutationFn: () => triggerJob('backtest'), @@ -202,6 +227,10 @@ export function BacktestPanel() { Hit Rate Avg R Net Avg R + Best R + Worst R + Avg Hold + Net R/d @@ -326,8 +355,9 @@ export function BacktestPanel() {

Models a realistic exit instead of waiting for the far S/R target: bank{' '} - +X% if price reaches it before the stop, else −1R on - the stop, else exit at the {report.params.horizon_days}-day close. In R, so it compares to the + +X% if price reaches it before the stop, else the + stop-fill loss (a gap through the stop fills at the open, so it can exceed −1R), else exit + at the {report.params.horizon_days}-day close. In R, so it compares to the target model above. Hit Rate = how often you'd have banked +X% (how far winners actually run) — no top-ticking, it's the level you'd really set. The setup's own S/R target is not used here (exiting at that target is the model @@ -440,6 +470,10 @@ export function BacktestPanel() { Avg R Net Avg R Total R + Best R + Worst R + Avg Hold + Net R/d @@ -457,6 +491,10 @@ export function BacktestPanel() { {fmtR(row.avg_r)} {fmtR(row.net_avg_r ?? null)} {fmtR(row.total_r)} + {fmtR(row.best_r)} + {fmtR(row.worst_r)} + {fmtDays(row.avg_hold_days)} + {fmtRPerDay(row.net_r_per_day)} ); })} @@ -466,6 +504,63 @@ export function BacktestPanel() { )} + {sim && sim.policies.length > 0 && ( +

+

+ Portfolio simulation +

+

+ {sim.note ?? 'One capital-constrained book over the qualified setups.'}{' '} + + Start {fmtMoney(sim.params.starting_capital)} · max {sim.params.max_positions} positions ·{' '} + {sim.params.risk_per_trade_pct}% risk/trade · {sim.params.notional_cap_pct}% notional cap ·{' '} + {sim.params.cost_per_side_pct}%/side costs · {sim.policies[0].start_date} → {sim.policies[0].end_date} + +

+
+ + + + + {sim.policies.map((p) => ( + + ))} + + + + {( + [ + ['Final equity', (p) => fmtMoney(p.final_equity), (p) => rColor(p.final_equity - p.starting_capital)], + ['Total return', (p) => fmtSignedPct(p.total_return_pct), (p) => rColor(p.total_return_pct)], + ['SPY return (same window)', (p) => fmtSignedPct(p.spy_return_pct), () => 'text-gray-300'], + ['CAGR', (p) => fmtSignedPct(p.cagr_pct), (p) => rColor(p.cagr_pct)], + ['Max drawdown', (p) => `−${p.max_drawdown_pct.toFixed(1)}%`, () => 'text-amber-400'], + ['Sharpe (daily, annualized)', (p) => (p.sharpe === null ? '—' : p.sharpe.toFixed(2)), () => 'text-gray-200'], + ['Trades', (p) => String(p.trades), () => 'text-gray-300'], + ['Win rate', (p) => fmtPct(p.win_rate), () => 'text-gray-200'], + ['Avg P&L / trade', (p) => fmtMoney(p.avg_trade_pnl), (p) => rColor(p.avg_trade_pnl)], + ['Best / worst trade', (p) => `${fmtR(p.best_trade_r)} / ${fmtR(p.worst_trade_r)}`, () => 'text-gray-300'], + ['Avg holding time', (p) => fmtDays(p.avg_hold_days), () => 'text-gray-300'], + ['Entries skipped (book full)', (p) => String(p.skipped_book_full), () => 'text-gray-500'], + ] as [string, (p: BacktestPortfolioPolicy) => string, (p: BacktestPortfolioPolicy) => string][] + ).map(([label, fmt, color]) => ( + + + {sim.policies.map((p) => ( + + ))} + + ))} + +
Metric + {POLICY_LABELS[p.policy] ?? p.policy} +
{label} + {fmt(p)} +
+
+
+ )} +

Probability calibration diff --git a/frontend/src/lib/types.ts b/frontend/src/lib/types.ts index c8d45a1..ea8a8c0 100644 --- a/frontend/src/lib/types.ts +++ b/frontend/src/lib/types.ts @@ -232,6 +232,10 @@ export interface BacktestBucket { // Net of transaction costs — optional so a stale cached report still renders. net_avg_r?: number | null; net_total_r?: number | null; + best_r?: number | null; + worst_r?: number | null; + avg_hold_days?: number | null; + net_r_per_day?: number | null; } export interface BacktestCalibrationRow { @@ -276,6 +280,45 @@ export interface BacktestTimeExitRow { total_r: number | null; net_avg_r?: number | null; net_total_r?: number | null; + best_r?: number | null; + worst_r?: number | null; + avg_hold_days?: number | null; + net_r_per_day?: number | null; +} + +export interface BacktestPortfolioPolicy { + policy: string; + starting_capital: number; + final_equity: number; + total_return_pct: number; + cagr_pct: number | null; + max_drawdown_pct: number; + sharpe: number | null; + trades: number; + win_rate: number | null; + avg_trade_pnl: number | null; + best_trade_r: number | null; + worst_trade_r: number | null; + best_trade_pnl: number | null; + worst_trade_pnl: number | null; + avg_hold_days: number | null; + skipped_book_full: number; + spy_return_pct: number | null; + start_date: string; + end_date: string; +} + +export interface BacktestPortfolioSim { + params: { + starting_capital: number; + max_positions: number; + risk_per_trade_pct: number; + notional_cap_pct: number; + cost_per_side_pct: number; + hold_days: number; + }; + policies: BacktestPortfolioPolicy[]; + note?: string; } export interface BacktestGateAblationRow extends BacktestBucket { @@ -319,6 +362,7 @@ export interface BacktestReport { take_profit_sweep?: BacktestTakeProfitRow[]; trailing_sweep?: BacktestTrailingRow[]; time_exit_sweep?: BacktestTimeExitRow[]; + portfolio_sim?: BacktestPortfolioSim; calibration: BacktestCalibrationRow[]; signal_eval?: BacktestSignalEvalRow[]; signal_eval_note?: string; diff --git a/tests/unit/test_backtest_service.py b/tests/unit/test_backtest_service.py index fbe6750..53c31a5 100644 --- a/tests/unit/test_backtest_service.py +++ b/tests/unit/test_backtest_service.py @@ -32,6 +32,7 @@ def _cand( qualified: bool = True, direction: str = "long", risk_pct: float = 0.05, + hold_days: int = 10, ) -> dict: target_hit = outcome == OUTCOME_TARGET_HIT realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0) @@ -44,6 +45,7 @@ def _cand( "qualified": qualified, "direction": direction, "risk_pct": risk_pct, + "hold_days": hold_days, } @@ -51,35 +53,64 @@ def _cand( _COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05 -def _bar(high: float, low: float, close: float) -> SimpleNamespace: - return SimpleNamespace(high=high, low=low, close=close) +def _bar(high: float, low: float, close: float, open_: float | None = None) -> SimpleNamespace: + """Synthetic daily bar. ``open`` defaults to the high so a stop is pierced + intraday (fill at the stop level); pass an explicit open beyond the stop to + model a gap through it.""" + return SimpleNamespace( + high=high, low=low, close=close, open=open_ if open_ is not None else high + ) + + +class TestStopFillR: + def test_intraday_fill_at_stop(self): + assert bt._stop_fill_r("long", 100.0, 95.0, _bar(101, 94, 96)) == pytest.approx(-1.0) + + def test_gap_fill_at_open(self): + # Opens at 92, below the 95 stop → filled at the open, worse than −1R. + assert bt._stop_fill_r("long", 100.0, 95.0, _bar(93, 90, 91, open_=92)) == pytest.approx(-1.6) + + def test_short_gap_fill_at_open(self): + # Short stop 105; opens at 107 above it → fill 107. + assert bt._stop_fill_r("short", 100.0, 105.0, _bar(110, 104, 108, open_=107)) == pytest.approx(-1.4) class TestTakeProfitPrimitives: def test_long_tp_reachable_before_stop(self): - risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30) + risk, stopped, mfe, close_pct, stop_day, _ = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30) assert risk == pytest.approx(0.05) assert stopped is False assert mfe == pytest.approx(0.09) assert close_pct == pytest.approx(0.08) + assert stop_day is None def test_long_stop_zeroes_mfe(self): # Low pierces the stop on the only bar → loss, nothing banked before it. - risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30) + risk, stopped, mfe, close_pct, stop_day, stop_r = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30) assert stopped is True assert mfe == pytest.approx(0.0) assert close_pct == pytest.approx(-0.04) + assert stop_day == 1 + assert stop_r == pytest.approx(-1.0) + + def test_gap_through_stop_loses_more_than_1r(self): + _, stopped, _, _, stop_day, stop_r = bt._tp_primitives( + "long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], 30 + ) + assert stopped is True + assert stop_day == 1 + assert stop_r == pytest.approx(-1.6) # filled at the 92 open, not the 95 stop def test_long_drift_no_trigger(self): bars = [_bar(102, 99, 101), _bar(103, 100, 102)] - risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, bars, 30) + risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("long", 100.0, 95.0, bars, 30) assert stopped is False assert mfe == pytest.approx(0.03) assert close_pct == pytest.approx(0.02) def test_short_direction(self): # short entry 100, stop 105; price falls → favourable = (entry - low)/entry - risk, stopped, mfe, close_pct = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30) + risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30) assert risk == pytest.approx(0.05) assert stopped is False assert mfe == pytest.approx(0.08) @@ -131,6 +162,12 @@ class TestTrailingExits: assert res[10] == pytest.approx(0.8) assert res[5] == pytest.approx(1.4) + def test_gap_through_stop_fills_at_open(self): + # Initial stop 90 governs (20% trail from peak 100 is lower); the bar + # opens at 85, below it → fill at the open. + res = bt._trailing_exits("long", 100.0, 90.0, (0.20,), [_bar(88, 84, 86, open_=85)], 30) + assert res[20] == pytest.approx(-1.5) + class TestTrailingBucket: def test_bucket(self): @@ -177,6 +214,10 @@ class TestTimeExits: res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,)) assert res[5] == 0.0 + def test_gap_through_stop_fills_at_open(self): + res = bt._time_exits("long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], (5,)) + assert res[5] == pytest.approx(-1.6) + class TestTimeExitBucket: def test_bucket(self): @@ -192,6 +233,11 @@ class TestTimeExitBucket: assert b["win_rate"] == pytest.approx(66.7, abs=0.1) assert b["avg_r"] == pytest.approx(0.3, abs=0.01) assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01) + assert b["best_r"] == pytest.approx(1.4) + assert b["worst_r"] == pytest.approx(-1.0) + # No stop_day on any candidate → every hold runs the full 5 days. + assert b["avg_hold_days"] == 5.0 + assert b["net_r_per_day"] == pytest.approx(0.28 / 5.0, abs=0.001) def test_missing_hold_skipped(self): b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21) @@ -263,6 +309,78 @@ class TestGateAblation: assert rows["all_floors"]["total"] == 2 +def _sim_prices(start_ord: int, closes: list[float]) -> tuple: + """Column arrays for consecutive daily bars: open = close (no gaps), + high/low = close ± 1.""" + ords = list(range(start_ord, start_ord + len(closes))) + return ( + ords, + list(closes), + [c + 1.0 for c in closes], + [c - 1.0 for c in closes], + list(closes), + [1_000_000] * len(closes), + ) + + +def _sim_cand( + sym: str, day_ord: int, entry: float, stop: float, target: float, mp: float = 90.0 +) -> dict: + return { + "qualified": True, + "direction": "long", + "symbol": sym, + "date": date.fromordinal(day_ord).isoformat(), + "entry": entry, + "stop": stop, + "target": target, + "momentum_percentile": mp, + } + + +class TestSimulatePortfolio: + ORD = date(2025, 1, 6).toordinal() + + def test_hold_policy_accounting(self): + closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0] + prices = {"AAA": _sim_prices(self.ORD, closes)} + cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=130.0) + sim = bt._simulate_portfolio([cand], prices, None, "hold", 3) + assert sim is not None + assert sim["trades"] == 1 + # 20 shares (1% risk / $5 stop distance), exit at the day-3 close 106: + # pnl = 2120 − 2000 − 2.00 entry cost − 2.12 exit cost = 115.88 + assert sim["final_equity"] == pytest.approx(10_115.88, abs=0.01) + assert sim["win_rate"] == 100.0 + assert sim["best_trade_r"] == pytest.approx(1.2) + assert sim["avg_hold_days"] == 3.0 + assert sim["max_drawdown_pct"] == 0.0 + assert sim["cagr_pct"] is None # window far too short to annualize + assert sim["spy_return_pct"] is None + + def test_target_policy_exits_at_target(self): + closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0] + prices = {"AAA": _sim_prices(self.ORD, closes)} + cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=105.0) + sim = bt._simulate_portfolio([cand], prices, None, "target", 30) + assert sim is not None + assert sim["trades"] == 1 + assert sim["best_trade_r"] == pytest.approx(1.0) # filled exactly at 105 + + def test_stop_gap_fills_at_open(self): + # Day-1 bar gaps to a 90 open, below the 95 stop → fill at the open. + ords = list(range(self.ORD, self.ORD + 2)) + prices = {"AAA": (ords, [100.0, 90.0], [101.0, 92.0], [99.0, 88.0], [100.0, 91.0], [1, 1])} + cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=120.0) + sim = bt._simulate_portfolio([cand], prices, None, "hold", 30) + assert sim is not None + assert sim["trades"] == 1 + assert sim["worst_trade_r"] == pytest.approx(-2.0) # (90 − 100) / 5 + + def test_nothing_qualified_returns_none(self): + assert bt._simulate_portfolio([], {}, None, "hold", 30) is None + + def test_bucket_stats_counts_and_expectancy(): cands = [ _cand(70, OUTCOME_TARGET_HIT, 3.0), # +3R win @@ -283,6 +401,10 @@ def test_bucket_stats_counts_and_expectancy(): # net = gross minus a 0.04R round trip per candidate (risk_pct 0.05) assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001) assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01) + assert s["best_r"] == 3.0 + assert s["worst_r"] == -1.0 + assert s["avg_hold_days"] == 10.0 + assert s["net_r_per_day"] == pytest.approx((1.0 - _COST_R_005) / 10.0, abs=0.001) def test_bucket_stats_empty(): @@ -394,6 +516,12 @@ async def test_run_backtest_smoke(session): # time-exit sweep covers the configured hold lengths assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS) + # portfolio simulation section is always present (policies may be empty + # when nothing qualifies) + assert "portfolio_sim" in report + assert isinstance(report["portfolio_sim"]["policies"], list) + assert report["portfolio_sim"]["params"]["max_positions"] == bt.SIM_MAX_POSITIONS + # sweep: lowering the momentum-percentile cutoff can only add qualifiers sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True) counts = [r["total"] for r in sweep]