feat: portfolio simulation + per-trade stats (gaps, hold time, best/worst)

Per-trade additions to the report: - Gap-through-stop fills: stops now fill at the worse of the stop or the bar's open across every exit model (target, TP, trailing, time), so a loss can exceed -1R; targets never fill better than their level. - best_r / worst_r, avg holding days, and net R per day of capital deployed on the summary buckets and the time-exit sweep. Portfolio simulation (the stats a per-setup replay cannot give): - One capital-constrained book over the qualified setups: 10k start, max 10 concurrent positions (one per ticker, best momentum first), 1% fixed-fractional risk with a 20% no-leverage notional cap, entries at the detection close, 0.1%/side costs, daily mark-to-market. - Two exit policies compared: S/R target race vs hold-to-horizon. - Equity-curve stats: final equity, total return, CAGR, max drawdown, annualized daily Sharpe, win rate, avg P&L, best/worst trade, avg hold, entries skipped on a full book, and SPY price return over the same window (benchmark history refreshed to cover the replay span). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 11:56:29 +02:00
parent 942a22ce65
commit 0f43e755f4
4 changed files with 634 additions and 33 deletions
@@ -216,12 +216,27 @@ def _window_setups(
    return out


+def _stop_fill_r(direction: str, entry: float, stop: float, bar) -> float:
+    """Realized R when the stop is hit on ``bar``: filled at the stop, or at the
+    bar's open when price gapped through it — so a gap can lose more than −1R,
+    matching real fills. Targets are never filled better than their level, so
+    gap modeling only ever makes results more conservative."""
+    risk = abs(entry - stop)
+    if risk <= 0 or entry <= 0:
+        return -1.0
+    if direction == "long":
+        fill = min(stop, bar.open)
+        return (fill - entry) / risk
+    fill = max(stop, bar.open)
+    return (entry - fill) / risk
+
+
 def _tp_primitives(
    direction: str, entry: float, stop: float, forward: list, horizon: int
-) -> tuple[float, bool, float, float]:
+) -> tuple[float, bool, float, float, int | None, float]:
    """Primitives for the take-profit exit model, from the bars after detection.

-    Returns ``(risk_pct, stopped, mfe_pct, close_pct)``:
+    Returns ``(risk_pct, stopped, mfe_pct, close_pct, stop_day, stop_r)``:
      - ``risk_pct``  fraction from entry to stop (the 1R distance)
      - ``stopped``   whether the stop was hit within the horizon
      - ``mfe_pct``   best favourable excursion (fraction) reachable *before* the
@@ -229,27 +244,34 @@ def _tp_primitives(
                      counts as a loss (matching the conservative target model);
                      over the whole horizon if the stop is never hit
      - ``close_pct`` directional return at the horizon-end close (the timeout exit)
+      - ``stop_day``  1-based trading day the stop was pierced, None if never
+      - ``stop_r``    realized R at the stop fill (≤ −1 when the bar gapped
+                      through the stop — see _stop_fill_r); −1.0 when unused

    From these any fixed take-profit level can be scored without re-walking bars:
-    tp reached before stop (``mfe_pct >= tp``) → +tp; else stop → −1R; else the
-    horizon-close move.
+    tp reached before stop (``mfe_pct >= tp``) → +tp; else stop → ``stop_r``;
+    else the horizon-close move.
    """
    long = direction == "long"
    risk_pct = abs(entry - stop) / entry if entry else 0.0
    bars = forward[:horizon]
    if not bars:
-        return risk_pct, False, 0.0, 0.0
+        return risk_pct, False, 0.0, 0.0, None, -1.0
    mfe = 0.0
    stopped = False
-    for r in bars:
+    stop_day: int | None = None
+    stop_r = -1.0
+    for i, r in enumerate(bars):
        if (r.low <= stop) if long else (r.high >= stop):
            stopped = True
+            stop_day = i + 1
+            stop_r = _stop_fill_r(direction, entry, stop, r)
            break
        fav = (r.high - entry) / entry if long else (entry - r.low) / entry
        if fav > mfe:
            mfe = fav
    close_pct = ((bars[-1].close - entry) / entry) * (1.0 if long else -1.0)
-    return risk_pct, stopped, mfe, close_pct
+    return risk_pct, stopped, mfe, close_pct, stop_day, stop_r


 def _trailing_exits(
@@ -281,12 +303,14 @@ def _trailing_exits(
            if long:
                stop_level = max(init_stop, peak * (1 - f))
                if r.low <= stop_level:
-                    result[round(f * 100)] = ((stop_level - entry) / entry) / risk
+                    fill = min(stop_level, r.open)  # gap through fills at the open
+                    result[round(f * 100)] = ((fill - entry) / entry) / risk
                    continue
            else:
                stop_level = min(init_stop, peak * (1 + f))
                if r.high >= stop_level:
-                    result[round(f * 100)] = ((entry - stop_level) / entry) / risk
+                    fill = max(stop_level, r.open)
+                    result[round(f * 100)] = ((entry - fill) / entry) / risk
                    continue
            remaining.append(f)
        active = remaining
@@ -325,10 +349,12 @@ def _time_exits(
        return {int(n): 0.0 for n in horizons}

    stop_day: int | None = None  # 1-based trading day the stop was pierced
+    stop_r = -1.0
    closes: list[float] = []
    for i, r in enumerate(bars):
        if (r.low <= stop) if long else (r.high >= stop):
            stop_day = i + 1
+            stop_r = _stop_fill_r(direction, entry, stop, r)
            break
        closes.append(r.close)

@@ -336,7 +362,7 @@ def _time_exits(
    for h in horizons:
        n = int(h)
        if stop_day is not None and stop_day <= n:
-            result[n] = -1.0
+            result[n] = stop_r
        else:
            # closes can't be empty here: an empty closes means the stop hit on
            # day 1, which the branch above catches for every n >= 1.
@@ -359,21 +385,29 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
        forward_bars = [Bar(date=r.date, high=r.high, low=r.low) for r in forward]

        for s in _window_setups(window, config, activation):
-            outcome, _ = evaluate_setup_against_bars(
+            outcome, outcome_date = evaluate_setup_against_bars(
                s["direction"], s["stop"], s["target"], forward_bars, HORIZON
            )
            if outcome is None:
                continue
+            # Trading days from detection to resolution (expired = full horizon).
+            hold_days = next(
+                (idx + 1 for idx, r in enumerate(forward[:HORIZON]) if r.date == outcome_date),
+                min(HORIZON, len(forward)),
+            )
            target_hit = outcome == OUTCOME_TARGET_HIT
            if outcome == OUTCOME_TARGET_HIT:
                realized_r = s["rr"]
            elif outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS):
-                realized_r = -1.0
+                # Fill at the stop, or at the open when the bar gapped through it.
+                realized_r = _stop_fill_r(
+                    s["direction"], s["entry"], s["stop"], forward[hold_days - 1]
+                )
            else:  # expired
                realized_r = 0.0
            # Take-profit exit primitives (parallel to the target-vs-stop outcome
            # above; aggregated separately into the take-profit sweep).
-            risk_pct, tp_stopped, mfe_pct, tp_close_pct = _tp_primitives(
+            risk_pct, tp_stopped, mfe_pct, tp_close_pct, stop_day, tp_stop_r = _tp_primitives(
                s["direction"], s["entry"], s["stop"], forward, HORIZON
            )
            trail_r = _trailing_exits(
@@ -388,6 +422,9 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
                "date": records[i].date.isoformat(),
                "iso_week": (iso[0], iso[1]),
                "direction": s["direction"],
+                "entry": s["entry"],
+                "stop": s["stop"],
+                "target": s["target"],
                "rr": s["rr"],
                "confidence": s["confidence"],
                "primary_prob": s["primary_prob"],
@@ -401,8 +438,11 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
                "outcome": outcome,
                "target_hit": target_hit,
                "realized_r": realized_r,
+                "hold_days": hold_days,
+                "stop_day": stop_day,
                "risk_pct": risk_pct,
                "tp_stopped": tp_stopped,
+                "tp_stop_r": tp_stop_r,
                "mfe_pct": mfe_pct,
                "tp_close_pct": tp_close_pct,
                "trail_r": trail_r,
@@ -418,6 +458,9 @@ def _bucket_stats(cands: list[dict]) -> dict:
    decided = wins + losses
    rs = [c["realized_r"] for c in cands]
    net_rs = [c["realized_r"] - _cost_r(c) for c in cands]
+    holds = [c["hold_days"] for c in cands if c.get("hold_days")]
+    avg_hold = sum(holds) / len(holds) if holds else None
+    net_avg = sum(net_rs) / len(net_rs) if net_rs else None
    return {
        "total": len(cands),
        "wins": wins,
@@ -426,8 +469,15 @@ def _bucket_stats(cands: list[dict]) -> dict:
        "hit_rate": round(wins / decided * 100, 1) if decided else None,
        "avg_r": round(sum(rs) / len(rs), 3) if rs else None,
        "total_r": round(sum(rs), 2) if rs else None,
-        "net_avg_r": round(sum(net_rs) / len(net_rs), 3) if net_rs else None,
+        "net_avg_r": round(net_avg, 3) if net_avg is not None else None,
        "net_total_r": round(sum(net_rs), 2) if net_rs else None,
+        "best_r": round(max(rs), 2) if rs else None,
+        "worst_r": round(min(rs), 2) if rs else None,
+        "avg_hold_days": round(avg_hold, 1) if avg_hold is not None else None,
+        # Capital efficiency: net expectancy per trading day the capital is tied up.
+        "net_r_per_day": (
+            round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None
+        ),
    }


@@ -473,7 +523,7 @@ def _take_profit_bucket(cands: list[dict], tp: float) -> dict:
            r = tp / risk
            wins += 1
        elif c.get("tp_stopped"):
-            r = -1.0
+            r = c.get("tp_stop_r", -1.0)  # gap-aware stop fill, ≤ −1R
        else:
            r = (c.get("tp_close_pct", 0.0)) / risk
        rs.append(r)
@@ -519,16 +569,24 @@ def _trailing_bucket(cands: list[dict], trail_pct: int) -> dict:
 def _time_exit_bucket(cands: list[dict], hold_days: int) -> dict:
    """Stats for the hold-``hold_days`` exit: initial stop active, otherwise out
    at the day-N close. Each candidate carries its realized R per hold length in
-    ``time_r``; a "win" is an exit in profit (R > 0)."""
-    pairs = [
-        (c["time_r"][hold_days], _cost_r(c))
+    ``time_r``; a "win" is an exit in profit (R > 0). The realized hold is the
+    full N days unless the stop cut it short (``stop_day``)."""
+    rows = [
+        (
+            c["time_r"][hold_days],
+            _cost_r(c),
+            min(hold_days, c.get("stop_day") or hold_days),
+        )
        for c in cands
        if c.get("time_r", {}).get(hold_days) is not None
    ]
-    total = len(pairs)
-    rs = [r for r, _ in pairs]
-    net_rs = [r - cost for r, cost in pairs]
+    total = len(rows)
+    rs = [r for r, _, _ in rows]
+    net_rs = [r - cost for r, cost, _ in rows]
+    holds = [h for _, _, h in rows]
    wins = sum(1 for r in rs if r > 0)
+    avg_hold = sum(holds) / total if total else None
+    net_avg = sum(net_rs) / total if total else None
    return {
        "hold_days": hold_days,
        "total": total,
@@ -536,8 +594,14 @@ def _time_exit_bucket(cands: list[dict], hold_days: int) -> dict:
        "win_rate": round(wins / total * 100, 1) if total else None,
        "avg_r": round(sum(rs) / total, 3) if total else None,
        "total_r": round(sum(rs), 2) if total else None,
-        "net_avg_r": round(sum(net_rs) / total, 3) if total else None,
+        "net_avg_r": round(net_avg, 3) if net_avg is not None else None,
        "net_total_r": round(sum(net_rs), 2) if total else None,
+        "best_r": round(max(rs), 2) if rs else None,
+        "worst_r": round(min(rs), 2) if rs else None,
+        "avg_hold_days": round(avg_hold, 1) if avg_hold is not None else None,
+        "net_r_per_day": (
+            round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None
+        ),
    }


@@ -934,6 +998,214 @@ def _gate_ablation(candidates: list[dict], activation: dict, threshold: float) -
    return rows


+# ---------------------------------------------------------------------------
+# Portfolio simulation
+# ---------------------------------------------------------------------------
+
+# Book parameters: fixed starting capital, a capped number of concurrent
+# positions (one per ticker), fixed-fractional risk sizing with a no-leverage
+# notional cap, and the same per-side cost as the per-trade tables. Entries are
+# the QUALIFIED setups at their detection close, best momentum first while
+# slots and cash allow.
+SIM_STARTING_CAPITAL = 10_000.0
+SIM_MAX_POSITIONS = 10
+SIM_RISK_PER_TRADE = 0.01  # fraction of equity risked per position (entry→stop)
+SIM_NOTIONAL_CAP = 0.20    # max fraction of equity per position (no margin)
+
+
+def _simulate_portfolio(
+    candidates: list[dict],
+    prices: dict[str, tuple],
+    spy_closes: dict | None,
+    exit_policy: str,
+    hold_days: int,
+) -> dict | None:
+    """Replay the qualified setups as ONE capital-constrained book and report
+    portfolio economics from the daily equity curve (return, CAGR, drawdown,
+    Sharpe) — the numbers the per-setup tables cannot give, because they grade
+    every setup as if capital were infinite.
+
+    ``exit_policy``: "target" races the S/R target against the stop with a
+    timeout at ``hold_days``; "hold" keeps only the initial stop and exits at
+    the ``hold_days``-th close. Stops fill at the worse of stop or open (gaps
+    modeled); positions still open at the end are closed at their last mark.
+    Returns None when there is nothing to trade.
+    """
+    entries_by_ord: dict[int, list[dict]] = defaultdict(list)
+    for c in candidates:
+        if not c.get("qualified") or c.get("direction") != "long":
+            continue
+        if not c.get("entry") or not c.get("stop"):
+            continue
+        entries_by_ord[date.fromisoformat(c["date"]).toordinal()].append(c)
+    if not entries_by_ord:
+        return None
+
+    # Per-symbol bar lookup: date ordinal -> index into the column arrays.
+    index_of: dict[str, dict[int, int]] = {
+        sym: {o: i for i, o in enumerate(cols[0])} for sym, cols in prices.items()
+    }
+
+    first_ord = min(entries_by_ord)
+    calendar = sorted({o for cols in prices.values() for o in cols[0] if o >= first_ord})
+    if not calendar:
+        return None
+
+    cash = SIM_STARTING_CAPITAL
+    positions: dict[str, dict] = {}
+    curve: list[tuple[int, float]] = []
+    trades: list[dict] = []
+    skipped_full = 0
+
+    def _bar(sym: str, o: int):
+        idx = index_of.get(sym, {}).get(o)
+        if idx is None:
+            return None
+        cols = prices[sym]
+        return SimpleNamespace(
+            open=cols[1][idx], high=cols[2][idx], low=cols[3][idx], close=cols[4][idx]
+        )
+
+    def _close_trade(sym: str, fill: float, reason: str) -> None:
+        nonlocal cash
+        pos = positions.pop(sym)
+        proceeds = pos["shares"] * fill
+        cost = proceeds * COST_PER_SIDE
+        cash += proceeds - cost
+        risk = pos["entry"] - pos["stop"]
+        trades.append({
+            "pnl": proceeds - pos["shares"] * pos["entry"] - cost - pos["entry_cost"],
+            "r": (fill - pos["entry"]) / risk if risk > 0 else 0.0,
+            "hold": pos["bars_held"],
+            "reason": reason,
+        })
+
+    def _marked_equity() -> float:
+        return cash + sum(p["shares"] * p["last_close"] for p in positions.values())
+
+    for o in calendar:
+        # 1) exits on today's bars (stop intraday, target intraday, time at close)
+        for sym in list(positions):
+            pos = positions[sym]
+            bar = _bar(sym, o)
+            if bar is None:
+                continue
+            pos["bars_held"] += 1
+            pos["last_close"] = bar.close
+            if bar.low <= pos["stop"]:
+                # Same-bar stop+target resolves as the loss (conservative, like
+                # the evaluator); gap through the stop fills at the open.
+                _close_trade(sym, min(pos["stop"], bar.open), "stop")
+                continue
+            if exit_policy == "target" and pos["target"] and bar.high >= pos["target"]:
+                _close_trade(sym, pos["target"], "target")
+                continue
+            if pos["bars_held"] >= hold_days:
+                _close_trade(sym, bar.close, "time")
+
+        # 2) entries at today's close, best momentum first
+        equity = _marked_equity()
+        todays = sorted(
+            entries_by_ord.get(o, ()),
+            key=lambda c: c.get("momentum_percentile") or 0.0,
+            reverse=True,
+        )
+        for c in todays:
+            sym = c["symbol"]
+            if sym in positions:
+                continue
+            if len(positions) >= SIM_MAX_POSITIONS:
+                skipped_full += 1
+                continue
+            entry, stop = float(c["entry"]), float(c["stop"])
+            risk_ps = entry - stop
+            if risk_ps <= 0 or entry <= 0:
+                continue
+            shares = min(
+                (equity * SIM_RISK_PER_TRADE) / risk_ps,
+                (equity * SIM_NOTIONAL_CAP) / entry,
+                max(cash, 0.0) / (entry * (1.0 + COST_PER_SIDE)),
+            )
+            if shares * entry < 1.0:  # can't fund a meaningful position
+                continue
+            entry_cost = shares * entry * COST_PER_SIDE
+            cash -= shares * entry + entry_cost
+            positions[sym] = {
+                "shares": shares,
+                "entry": entry,
+                "stop": stop,
+                "target": float(c["target"]) if c.get("target") else None,
+                "entry_cost": entry_cost,
+                "bars_held": 0,
+                "last_close": entry,
+            }
+            equity = _marked_equity()
+
+        curve.append((o, _marked_equity()))
+
+    # Close whatever is still open at its last mark so final equity is realized.
+    for sym in list(positions):
+        _close_trade(sym, positions[sym]["last_close"], "open_at_end")
+    final_equity = cash
+    curve[-1] = (calendar[-1], final_equity)
+
+    total_return_pct = (final_equity / SIM_STARTING_CAPITAL - 1.0) * 100.0
+    years = (calendar[-1] - calendar[0]) / 365.25
+    cagr_pct = (
+        ((final_equity / SIM_STARTING_CAPITAL) ** (1.0 / years) - 1.0) * 100.0
+        if years > 0.25 and final_equity > 0
+        else None
+    )
+
+    peak = float("-inf")
+    max_dd = 0.0
+    for _, eq in curve:
+        peak = max(peak, eq)
+        if peak > 0:
+            max_dd = max(max_dd, (peak - eq) / peak)
+
+    rets = [b / a - 1.0 for (_, a), (_, b) in zip(curve, curve[1:]) if a > 0]
+    sharpe = None
+    if len(rets) > 2:
+        mean = sum(rets) / len(rets)
+        var = sum((x - mean) ** 2 for x in rets) / (len(rets) - 1)
+        if var > 0:
+            sharpe = mean / math.sqrt(var) * math.sqrt(252)
+
+    pnls = [t["pnl"] for t in trades]
+    wins = sum(1 for p in pnls if p > 0)
+    spy_pct = None
+    if spy_closes:
+        from app.services.benchmark_service import benchmark_return_pct
+
+        spy_pct = benchmark_return_pct(
+            spy_closes, date.fromordinal(calendar[0]), date.fromordinal(calendar[-1])
+        )
+
+    return {
+        "starting_capital": SIM_STARTING_CAPITAL,
+        "final_equity": round(final_equity, 2),
+        "total_return_pct": round(total_return_pct, 1),
+        "cagr_pct": round(cagr_pct, 1) if cagr_pct is not None else None,
+        "max_drawdown_pct": round(max_dd * 100.0, 1),
+        "sharpe": round(sharpe, 2) if sharpe is not None else None,
+        "trades": len(trades),
+        "win_rate": round(wins / len(trades) * 100.0, 1) if trades else None,
+        "avg_trade_pnl": round(sum(pnls) / len(pnls), 2) if pnls else None,
+        "best_trade_r": round(max(t["r"] for t in trades), 2) if trades else None,
+        "worst_trade_r": round(min(t["r"] for t in trades), 2) if trades else None,
+        "best_trade_pnl": round(max(pnls), 2) if pnls else None,
+        "worst_trade_pnl": round(min(pnls), 2) if pnls else None,
+        "avg_hold_days": (
+            round(sum(t["hold"] for t in trades) / len(trades), 1) if trades else None
+        ),
+        "skipped_book_full": skipped_full,
+        "spy_return_pct": round(spy_pct, 1) if spy_pct is not None else None,
+        "start_date": date.fromordinal(calendar[0]).isoformat(),
+        "end_date": date.fromordinal(calendar[-1]).isoformat(),
+    }
+
+
 async def run_backtest(
    db: AsyncSession,
    progress_cb: Callable[[int, int, str], None] | None = None,
@@ -1037,6 +1309,43 @@ async def run_backtest(
        cands = [c for c in candidates if _momentum_qualifies(c, threshold)]
        sweep.append({"min_momentum_percentile": threshold, **_bucket_stats(cands)})

+    # Portfolio simulation: re-fetch bars for just the qualified symbols (memory-
+    # light vs retaining every ticker's columns through the replay) and replay
+    # the book once per exit policy. Best-effort — the report stands without it.
+    hold_horizon = max(TIME_EXIT_DAYS)
+    sim_policies: list[dict] = []
+    try:
+        qual_symbols = sorted({c["symbol"] for c in candidates if c.get("qualified")})
+        price_columns: dict[str, tuple] = {}
+        for sym in qual_symbols:
+            cols = await _fetch_columns(db, sym)
+            if cols is not None:
+                price_columns[sym] = cols
+
+        spy_closes: dict | None = None
+        try:
+            from app.services.benchmark_service import (
+                load_benchmark_closes,
+                refresh_benchmark_prices,
+            )
+
+            oldest = min((cols[0][0] for cols in price_columns.values()), default=None)
+            if oldest is not None:
+                days_needed = (date.today() - date.fromordinal(oldest)).days + 30
+                await refresh_benchmark_prices(db, days=days_needed)
+            spy_closes = await load_benchmark_closes(db)
+        except Exception:
+            logger.exception("Benchmark load for the portfolio sim failed")
+
+        for policy in ("target", "hold"):
+            sim = _simulate_portfolio(
+                candidates, price_columns, spy_closes, policy, hold_horizon
+            )
+            if sim is not None:
+                sim_policies.append({"policy": policy, **sim})
+    except Exception:
+        logger.exception("Portfolio simulation failed")
+
    return {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "tickers": total,
@@ -1070,6 +1379,28 @@ async def run_backtest(
        "take_profit_sweep": [_take_profit_bucket(qualified, tp) for tp in TP_LEVELS],
        "trailing_sweep": [_trailing_bucket(qualified, round(f * 100)) for f in TRAIL_LEVELS],
        "time_exit_sweep": [_time_exit_bucket(qualified, n) for n in TIME_EXIT_DAYS],
+        "portfolio_sim": {
+            "params": {
+                "starting_capital": SIM_STARTING_CAPITAL,
+                "max_positions": SIM_MAX_POSITIONS,
+                "risk_per_trade_pct": round(SIM_RISK_PER_TRADE * 100, 2),
+                "notional_cap_pct": round(SIM_NOTIONAL_CAP * 100, 1),
+                "cost_per_side_pct": round(COST_PER_SIDE * 100, 3),
+                "hold_days": hold_horizon,
+            },
+            "policies": sim_policies,
+            "note": (
+                "One capital-constrained book over the same qualified setups the "
+                "tables above grade per-setup: at most "
+                f"{SIM_MAX_POSITIONS} concurrent positions (one per ticker), best "
+                "momentum first, fixed-fractional risk sizing with a no-leverage "
+                "cap, entries at the detection close, stops filled at the worse "
+                "of stop or open. 'target' races the S/R target against the stop "
+                "(timeout at the horizon); 'hold' keeps the initial stop and "
+                "exits at the horizon close. SPY return is price-only over the "
+                "same window. In-sample; no dividends."
+            ),
+        },
        "calibration": _calibration(candidates),
        "signal_eval": _signal_evaluation(collected),
        "signal_eval_note": (
@@ -1084,6 +1415,9 @@ async def run_backtest(
        ),
        "note": (
            "Sentiment & fundamentals held neutral (no point-in-time history). "
+            "Stops fill at the worse of the stop or the bar's open (gaps through "
+            "the stop are modeled, so a loss can exceed −1R); targets never fill "
+            "better than their level. "
            "~6 months ≈ one market regime — treat as directional, not gospel."
        ),
    }
@@ -6,15 +6,30 @@ import { Callout } from '../ui/Callout';
 import { Disclosure } from '../ui/Disclosure';
 import { Section } from '../ui/Section';
 import { useToast } from '../ui/Toast';
-import type { BacktestBucket } from '../../lib/types';
+import type { BacktestBucket, BacktestPortfolioPolicy } from '../../lib/types';

-function fmtR(v: number | null): string {
-  if (v === null) return '—';
+function fmtR(v: number | null | undefined): string {
+  if (v === null || v === undefined) return '—';
  return `${v > 0 ? '+' : ''}${v.toFixed(2)}R`;
 }
 function fmtPct(v: number | null): string {
  return v === null ? '—' : `${v.toFixed(1)}%`;
 }
+function fmtMoney(v: number | null | undefined): string {
+  if (v === null || v === undefined) return '—';
+  return v.toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 });
+}
+function fmtSignedPct(v: number | null | undefined): string {
+  if (v === null || v === undefined) return '—';
+  return `${v > 0 ? '+' : ''}${v.toFixed(1)}%`;
+}
+function fmtDays(v: number | null | undefined): string {
+  return v === null || v === undefined ? '—' : `${v.toFixed(1)}d`;
+}
+function fmtRPerDay(v: number | null | undefined): string {
+  if (v === null || v === undefined) return '—';
+  return `${v > 0 ? '+' : ''}${v.toFixed(3)}R`;
+}
 function rColor(v: number | null): string {
  if (v === null) return 'text-gray-400';
  if (v > 0) return 'text-emerald-400';
@@ -40,6 +55,11 @@ const ABLATION_LABELS: Record<string, string> = {
  momentum_only: 'Momentum only (no floors)',
 };

+const POLICY_LABELS: Record<string, string> = {
+  target: 'S/R target exit',
+  hold: 'Hold to horizon',
+};
+
 // Prefer the net-of-costs number when the report carries it; older cached
 // reports (pre-cost model) fall back to gross.
 function netOrGross(r: { avg_r: number | null; net_avg_r?: number | null }): number | null {
@@ -91,6 +111,10 @@ function BucketRow({ label, b }: { label: string; b: BacktestBucket }) {
      <td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(b.hit_rate)}</td>
      <td className={`num px-4 py-2.5 text-right ${rColor(b.avg_r)}`}>{fmtR(b.avg_r)}</td>
      <td className={`num px-4 py-2.5 text-right ${rColor(b.net_avg_r ?? null)}`}>{fmtR(b.net_avg_r ?? null)}</td>
+      <td className="num px-4 py-2.5 text-right text-emerald-400">{fmtR(b.best_r)}</td>
+      <td className="num px-4 py-2.5 text-right text-red-400">{fmtR(b.worst_r)}</td>
+      <td className="num px-4 py-2.5 text-right text-gray-400">{fmtDays(b.avg_hold_days)}</td>
+      <td className={`num px-4 py-2.5 text-right ${rColor(b.net_r_per_day ?? null)}`}>{fmtRPerDay(b.net_r_per_day)}</td>
    </tr>
  );
 }
@@ -112,6 +136,7 @@ export function BacktestPanel() {
    report?.time_exit_sweep && report.time_exit_sweep.length > 0
      ? Math.max(...report.time_exit_sweep.map((r) => netOrGross(r) ?? -Infinity))
      : null;
+  const sim = report?.portfolio_sim ?? null;

  const run = useMutation({
    mutationFn: () => triggerJob('backtest'),
@@ -202,6 +227,10 @@ export function BacktestPanel() {
                    <th className="px-4 py-2.5 text-right">Hit Rate</th>
                    <th className="px-4 py-2.5 text-right">Avg R</th>
                    <th className="px-4 py-2.5 text-right">Net Avg R</th>
+                    <th className="px-4 py-2.5 text-right">Best R</th>
+                    <th className="px-4 py-2.5 text-right">Worst R</th>
+                    <th className="px-4 py-2.5 text-right">Avg Hold</th>
+                    <th className="px-4 py-2.5 text-right">Net R/d</th>
                  </tr>
                </thead>
                <tbody>
@@ -326,8 +355,9 @@ export function BacktestPanel() {
                </p>
                <p className="mb-2 text-[11px] text-gray-500">
                  Models a realistic exit instead of waiting for the far S/R target: bank{' '}
-                  <span className="text-gray-300">+X%</span> if price reaches it before the stop, else −1R on
-                  the stop, else exit at the {report.params.horizon_days}-day close. In R, so it compares to the
+                  <span className="text-gray-300">+X%</span> if price reaches it before the stop, else the
+                  stop-fill loss (a gap through the stop fills at the open, so it can exceed −1R), else exit
+                  at the {report.params.horizon_days}-day close. In R, so it compares to the
                  target model above. <span className="text-gray-300">Hit Rate = how often you'd have banked
                  +X%</span> (how far winners actually run) — no top-ticking, it's the level you'd really set.
                  The setup's own S/R target is <em>not</em> used here (exiting at that target is the model
@@ -440,6 +470,10 @@ export function BacktestPanel() {
                        <th className="px-4 py-2.5 text-right">Avg R</th>
                        <th className="px-4 py-2.5 text-right">Net Avg R</th>
                        <th className="px-4 py-2.5 text-right">Total R</th>
+                        <th className="px-4 py-2.5 text-right">Best R</th>
+                        <th className="px-4 py-2.5 text-right">Worst R</th>
+                        <th className="px-4 py-2.5 text-right">Avg Hold</th>
+                        <th className="px-4 py-2.5 text-right">Net R/d</th>
                      </tr>
                    </thead>
                    <tbody>
@@ -457,6 +491,10 @@ export function BacktestPanel() {
                            <td className={`num px-4 py-2.5 text-right ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
                            <td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.net_avg_r ?? null)}`}>{fmtR(row.net_avg_r ?? null)}</td>
                            <td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
+                            <td className="num px-4 py-2.5 text-right text-emerald-400">{fmtR(row.best_r)}</td>
+                            <td className="num px-4 py-2.5 text-right text-red-400">{fmtR(row.worst_r)}</td>
+                            <td className="num px-4 py-2.5 text-right text-gray-400">{fmtDays(row.avg_hold_days)}</td>
+                            <td className={`num px-4 py-2.5 text-right ${rColor(row.net_r_per_day ?? null)}`}>{fmtRPerDay(row.net_r_per_day)}</td>
                          </tr>
                        );
                      })}
@@ -466,6 +504,63 @@ export function BacktestPanel() {
              </div>
            )}

+            {sim && sim.policies.length > 0 && (
+              <div>
+                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
+                  Portfolio simulation
+                </p>
+                <p className="mb-2 text-[11px] text-gray-500">
+                  {sim.note ?? 'One capital-constrained book over the qualified setups.'}{' '}
+                  <span className="text-gray-300">
+                    Start {fmtMoney(sim.params.starting_capital)} · max {sim.params.max_positions} positions ·{' '}
+                    {sim.params.risk_per_trade_pct}% risk/trade · {sim.params.notional_cap_pct}% notional cap ·{' '}
+                    {sim.params.cost_per_side_pct}%/side costs · {sim.policies[0].start_date} → {sim.policies[0].end_date}
+                  </span>
+                </p>
+                <div className="glass overflow-x-auto">
+                  <table className="w-full text-sm">
+                    <thead>
+                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
+                        <th className="px-4 py-2.5">Metric</th>
+                        {sim.policies.map((p) => (
+                          <th key={p.policy} className="px-4 py-2.5 text-right">
+                            {POLICY_LABELS[p.policy] ?? p.policy}
+                          </th>
+                        ))}
+                      </tr>
+                    </thead>
+                    <tbody>
+                      {(
+                        [
+                          ['Final equity', (p) => fmtMoney(p.final_equity), (p) => rColor(p.final_equity - p.starting_capital)],
+                          ['Total return', (p) => fmtSignedPct(p.total_return_pct), (p) => rColor(p.total_return_pct)],
+                          ['SPY return (same window)', (p) => fmtSignedPct(p.spy_return_pct), () => 'text-gray-300'],
+                          ['CAGR', (p) => fmtSignedPct(p.cagr_pct), (p) => rColor(p.cagr_pct)],
+                          ['Max drawdown', (p) => `−${p.max_drawdown_pct.toFixed(1)}%`, () => 'text-amber-400'],
+                          ['Sharpe (daily, annualized)', (p) => (p.sharpe === null ? '—' : p.sharpe.toFixed(2)), () => 'text-gray-200'],
+                          ['Trades', (p) => String(p.trades), () => 'text-gray-300'],
+                          ['Win rate', (p) => fmtPct(p.win_rate), () => 'text-gray-200'],
+                          ['Avg P&L / trade', (p) => fmtMoney(p.avg_trade_pnl), (p) => rColor(p.avg_trade_pnl)],
+                          ['Best / worst trade', (p) => `${fmtR(p.best_trade_r)} / ${fmtR(p.worst_trade_r)}`, () => 'text-gray-300'],
+                          ['Avg holding time', (p) => fmtDays(p.avg_hold_days), () => 'text-gray-300'],
+                          ['Entries skipped (book full)', (p) => String(p.skipped_book_full), () => 'text-gray-500'],
+                        ] as [string, (p: BacktestPortfolioPolicy) => string, (p: BacktestPortfolioPolicy) => string][]
+                      ).map(([label, fmt, color]) => (
+                        <tr key={label} className="border-b border-white/[0.04]">
+                          <td className="px-4 py-2.5 font-medium text-gray-200">{label}</td>
+                          {sim.policies.map((p) => (
+                            <td key={p.policy} className={`num px-4 py-2.5 text-right ${color(p)}`}>
+                              {fmt(p)}
+                            </td>
+                          ))}
+                        </tr>
+                      ))}
+                    </tbody>
+                  </table>
+                </div>
+              </div>
+            )}
+
            <div>
              <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
                Probability calibration
@@ -232,6 +232,10 @@ export interface BacktestBucket {
  // Net of transaction costs — optional so a stale cached report still renders.
  net_avg_r?: number | null;
  net_total_r?: number | null;
+  best_r?: number | null;
+  worst_r?: number | null;
+  avg_hold_days?: number | null;
+  net_r_per_day?: number | null;
 }

 export interface BacktestCalibrationRow {
@@ -276,6 +280,45 @@ export interface BacktestTimeExitRow {
  total_r: number | null;
  net_avg_r?: number | null;
  net_total_r?: number | null;
+  best_r?: number | null;
+  worst_r?: number | null;
+  avg_hold_days?: number | null;
+  net_r_per_day?: number | null;
+}
+
+export interface BacktestPortfolioPolicy {
+  policy: string;
+  starting_capital: number;
+  final_equity: number;
+  total_return_pct: number;
+  cagr_pct: number | null;
+  max_drawdown_pct: number;
+  sharpe: number | null;
+  trades: number;
+  win_rate: number | null;
+  avg_trade_pnl: number | null;
+  best_trade_r: number | null;
+  worst_trade_r: number | null;
+  best_trade_pnl: number | null;
+  worst_trade_pnl: number | null;
+  avg_hold_days: number | null;
+  skipped_book_full: number;
+  spy_return_pct: number | null;
+  start_date: string;
+  end_date: string;
+}
+
+export interface BacktestPortfolioSim {
+  params: {
+    starting_capital: number;
+    max_positions: number;
+    risk_per_trade_pct: number;
+    notional_cap_pct: number;
+    cost_per_side_pct: number;
+    hold_days: number;
+  };
+  policies: BacktestPortfolioPolicy[];
+  note?: string;
 }

 export interface BacktestGateAblationRow extends BacktestBucket {
@@ -319,6 +362,7 @@ export interface BacktestReport {
  take_profit_sweep?: BacktestTakeProfitRow[];
  trailing_sweep?: BacktestTrailingRow[];
  time_exit_sweep?: BacktestTimeExitRow[];
+  portfolio_sim?: BacktestPortfolioSim;
  calibration: BacktestCalibrationRow[];
  signal_eval?: BacktestSignalEvalRow[];
  signal_eval_note?: string;
@@ -32,6 +32,7 @@ def _cand(
    qualified: bool = True,
    direction: str = "long",
    risk_pct: float = 0.05,
+    hold_days: int = 10,
 ) -> dict:
    target_hit = outcome == OUTCOME_TARGET_HIT
    realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
@@ -44,6 +45,7 @@ def _cand(
        "qualified": qualified,
        "direction": direction,
        "risk_pct": risk_pct,
+        "hold_days": hold_days,
    }


@@ -51,35 +53,64 @@ def _cand(
 _COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05


-def _bar(high: float, low: float, close: float) -> SimpleNamespace:
-    return SimpleNamespace(high=high, low=low, close=close)
+def _bar(high: float, low: float, close: float, open_: float | None = None) -> SimpleNamespace:
+    """Synthetic daily bar. ``open`` defaults to the high so a stop is pierced
+    intraday (fill at the stop level); pass an explicit open beyond the stop to
+    model a gap through it."""
+    return SimpleNamespace(
+        high=high, low=low, close=close, open=open_ if open_ is not None else high
+    )
+
+
+class TestStopFillR:
+    def test_intraday_fill_at_stop(self):
+        assert bt._stop_fill_r("long", 100.0, 95.0, _bar(101, 94, 96)) == pytest.approx(-1.0)
+
+    def test_gap_fill_at_open(self):
+        # Opens at 92, below the 95 stop → filled at the open, worse than −1R.
+        assert bt._stop_fill_r("long", 100.0, 95.0, _bar(93, 90, 91, open_=92)) == pytest.approx(-1.6)
+
+    def test_short_gap_fill_at_open(self):
+        # Short stop 105; opens at 107 above it → fill 107.
+        assert bt._stop_fill_r("short", 100.0, 105.0, _bar(110, 104, 108, open_=107)) == pytest.approx(-1.4)


 class TestTakeProfitPrimitives:
    def test_long_tp_reachable_before_stop(self):
-        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
+        risk, stopped, mfe, close_pct, stop_day, _ = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
        assert risk == pytest.approx(0.05)
        assert stopped is False
        assert mfe == pytest.approx(0.09)
        assert close_pct == pytest.approx(0.08)
+        assert stop_day is None

    def test_long_stop_zeroes_mfe(self):
        # Low pierces the stop on the only bar → loss, nothing banked before it.
-        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
+        risk, stopped, mfe, close_pct, stop_day, stop_r = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
        assert stopped is True
        assert mfe == pytest.approx(0.0)
        assert close_pct == pytest.approx(-0.04)
+        assert stop_day == 1
+        assert stop_r == pytest.approx(-1.0)
+
+    def test_gap_through_stop_loses_more_than_1r(self):
+        _, stopped, _, _, stop_day, stop_r = bt._tp_primitives(
+            "long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], 30
+        )
+        assert stopped is True
+        assert stop_day == 1
+        assert stop_r == pytest.approx(-1.6)  # filled at the 92 open, not the 95 stop

    def test_long_drift_no_trigger(self):
        bars = [_bar(102, 99, 101), _bar(103, 100, 102)]
-        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
+        risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
        assert stopped is False
        assert mfe == pytest.approx(0.03)
        assert close_pct == pytest.approx(0.02)

    def test_short_direction(self):
        # short entry 100, stop 105; price falls → favourable = (entry - low)/entry
-        risk, stopped, mfe, close_pct = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
+        risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
        assert risk == pytest.approx(0.05)
        assert stopped is False
        assert mfe == pytest.approx(0.08)
@@ -131,6 +162,12 @@ class TestTrailingExits:
        assert res[10] == pytest.approx(0.8)
        assert res[5] == pytest.approx(1.4)

+    def test_gap_through_stop_fills_at_open(self):
+        # Initial stop 90 governs (20% trail from peak 100 is lower); the bar
+        # opens at 85, below it → fill at the open.
+        res = bt._trailing_exits("long", 100.0, 90.0, (0.20,), [_bar(88, 84, 86, open_=85)], 30)
+        assert res[20] == pytest.approx(-1.5)
+

 class TestTrailingBucket:
    def test_bucket(self):
@@ -177,6 +214,10 @@ class TestTimeExits:
        res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,))
        assert res[5] == 0.0

+    def test_gap_through_stop_fills_at_open(self):
+        res = bt._time_exits("long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], (5,))
+        assert res[5] == pytest.approx(-1.6)
+

 class TestTimeExitBucket:
    def test_bucket(self):
@@ -192,6 +233,11 @@ class TestTimeExitBucket:
        assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
        assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
        assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
+        assert b["best_r"] == pytest.approx(1.4)
+        assert b["worst_r"] == pytest.approx(-1.0)
+        # No stop_day on any candidate → every hold runs the full 5 days.
+        assert b["avg_hold_days"] == 5.0
+        assert b["net_r_per_day"] == pytest.approx(0.28 / 5.0, abs=0.001)

    def test_missing_hold_skipped(self):
        b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
@@ -263,6 +309,78 @@ class TestGateAblation:
        assert rows["all_floors"]["total"] == 2


+def _sim_prices(start_ord: int, closes: list[float]) -> tuple:
+    """Column arrays for consecutive daily bars: open = close (no gaps),
+    high/low = close ± 1."""
+    ords = list(range(start_ord, start_ord + len(closes)))
+    return (
+        ords,
+        list(closes),
+        [c + 1.0 for c in closes],
+        [c - 1.0 for c in closes],
+        list(closes),
+        [1_000_000] * len(closes),
+    )
+
+
+def _sim_cand(
+    sym: str, day_ord: int, entry: float, stop: float, target: float, mp: float = 90.0
+) -> dict:
+    return {
+        "qualified": True,
+        "direction": "long",
+        "symbol": sym,
+        "date": date.fromordinal(day_ord).isoformat(),
+        "entry": entry,
+        "stop": stop,
+        "target": target,
+        "momentum_percentile": mp,
+    }
+
+
+class TestSimulatePortfolio:
+    ORD = date(2025, 1, 6).toordinal()
+
+    def test_hold_policy_accounting(self):
+        closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
+        prices = {"AAA": _sim_prices(self.ORD, closes)}
+        cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=130.0)
+        sim = bt._simulate_portfolio([cand], prices, None, "hold", 3)
+        assert sim is not None
+        assert sim["trades"] == 1
+        # 20 shares (1% risk / $5 stop distance), exit at the day-3 close 106:
+        # pnl = 2120 − 2000 − 2.00 entry cost − 2.12 exit cost = 115.88
+        assert sim["final_equity"] == pytest.approx(10_115.88, abs=0.01)
+        assert sim["win_rate"] == 100.0
+        assert sim["best_trade_r"] == pytest.approx(1.2)
+        assert sim["avg_hold_days"] == 3.0
+        assert sim["max_drawdown_pct"] == 0.0
+        assert sim["cagr_pct"] is None  # window far too short to annualize
+        assert sim["spy_return_pct"] is None
+
+    def test_target_policy_exits_at_target(self):
+        closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
+        prices = {"AAA": _sim_prices(self.ORD, closes)}
+        cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=105.0)
+        sim = bt._simulate_portfolio([cand], prices, None, "target", 30)
+        assert sim is not None
+        assert sim["trades"] == 1
+        assert sim["best_trade_r"] == pytest.approx(1.0)  # filled exactly at 105
+
+    def test_stop_gap_fills_at_open(self):
+        # Day-1 bar gaps to a 90 open, below the 95 stop → fill at the open.
+        ords = list(range(self.ORD, self.ORD + 2))
+        prices = {"AAA": (ords, [100.0, 90.0], [101.0, 92.0], [99.0, 88.0], [100.0, 91.0], [1, 1])}
+        cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=120.0)
+        sim = bt._simulate_portfolio([cand], prices, None, "hold", 30)
+        assert sim is not None
+        assert sim["trades"] == 1
+        assert sim["worst_trade_r"] == pytest.approx(-2.0)  # (90 − 100) / 5
+
+    def test_nothing_qualified_returns_none(self):
+        assert bt._simulate_portfolio([], {}, None, "hold", 30) is None
+
+
 def test_bucket_stats_counts_and_expectancy():
    cands = [
        _cand(70, OUTCOME_TARGET_HIT, 3.0),   # +3R win
@@ -283,6 +401,10 @@ def test_bucket_stats_counts_and_expectancy():
    # net = gross minus a 0.04R round trip per candidate (risk_pct 0.05)
    assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001)
    assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01)
+    assert s["best_r"] == 3.0
+    assert s["worst_r"] == -1.0
+    assert s["avg_hold_days"] == 10.0
+    assert s["net_r_per_day"] == pytest.approx((1.0 - _COST_R_005) / 10.0, abs=0.001)


 def test_bucket_stats_empty():
@@ -394,6 +516,12 @@ async def test_run_backtest_smoke(session):
    # time-exit sweep covers the configured hold lengths
    assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)

+    # portfolio simulation section is always present (policies may be empty
+    # when nothing qualifies)
+    assert "portfolio_sim" in report
+    assert isinstance(report["portfolio_sim"]["policies"], list)
+    assert report["portfolio_sim"]["params"]["max_positions"] == bt.SIM_MAX_POSITIONS
+
    # sweep: lowering the momentum-percentile cutoff can only add qualifiers
    sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
    counts = [r["total"] for r in sweep]