feat: portfolio simulation + per-trade stats (gaps, hold time, best/worst)

Per-trade additions to the report: - Gap-through-stop fills: stops now fill at the worse of the stop or the bar's open across every exit model (target, TP, trailing, time), so a loss can exceed -1R; targets never fill better than their level. - best_r / worst_r, avg holding days, and net R per day of capital deployed on the summary buckets and the time-exit sweep. Portfolio simulation (the stats a per-setup replay cannot give): - One capital-constrained book over the qualified setups: 10k start, max 10 concurrent positions (one per ticker, best momentum first), 1% fixed-fractional risk with a 20% no-leverage notional cap, entries at the detection close, 0.1%/side costs, daily mark-to-market. - Two exit policies compared: S/R target race vs hold-to-horizon. - Equity-curve stats: final equity, total return, CAGR, max drawdown, annualized daily Sharpe, win rate, avg P&L, best/worst trade, avg hold, entries skipped on a full book, and SPY price return over the same window (benchmark history refreshed to cover the replay span). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 11:56:29 +02:00
parent 942a22ce65
commit 0f43e755f4
4 changed files with 634 additions and 33 deletions
@@ -32,6 +32,7 @@ def _cand(
    qualified: bool = True,
    direction: str = "long",
    risk_pct: float = 0.05,
+    hold_days: int = 10,
 ) -> dict:
    target_hit = outcome == OUTCOME_TARGET_HIT
    realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
@@ -44,6 +45,7 @@ def _cand(
        "qualified": qualified,
        "direction": direction,
        "risk_pct": risk_pct,
+        "hold_days": hold_days,
    }


@@ -51,35 +53,64 @@ def _cand(
 _COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05


-def _bar(high: float, low: float, close: float) -> SimpleNamespace:
-    return SimpleNamespace(high=high, low=low, close=close)
+def _bar(high: float, low: float, close: float, open_: float | None = None) -> SimpleNamespace:
+    """Synthetic daily bar. ``open`` defaults to the high so a stop is pierced
+    intraday (fill at the stop level); pass an explicit open beyond the stop to
+    model a gap through it."""
+    return SimpleNamespace(
+        high=high, low=low, close=close, open=open_ if open_ is not None else high
+    )
+
+
+class TestStopFillR:
+    def test_intraday_fill_at_stop(self):
+        assert bt._stop_fill_r("long", 100.0, 95.0, _bar(101, 94, 96)) == pytest.approx(-1.0)
+
+    def test_gap_fill_at_open(self):
+        # Opens at 92, below the 95 stop → filled at the open, worse than −1R.
+        assert bt._stop_fill_r("long", 100.0, 95.0, _bar(93, 90, 91, open_=92)) == pytest.approx(-1.6)
+
+    def test_short_gap_fill_at_open(self):
+        # Short stop 105; opens at 107 above it → fill 107.
+        assert bt._stop_fill_r("short", 100.0, 105.0, _bar(110, 104, 108, open_=107)) == pytest.approx(-1.4)


 class TestTakeProfitPrimitives:
    def test_long_tp_reachable_before_stop(self):
-        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
+        risk, stopped, mfe, close_pct, stop_day, _ = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
        assert risk == pytest.approx(0.05)
        assert stopped is False
        assert mfe == pytest.approx(0.09)
        assert close_pct == pytest.approx(0.08)
+        assert stop_day is None

    def test_long_stop_zeroes_mfe(self):
        # Low pierces the stop on the only bar → loss, nothing banked before it.
-        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
+        risk, stopped, mfe, close_pct, stop_day, stop_r = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
        assert stopped is True
        assert mfe == pytest.approx(0.0)
        assert close_pct == pytest.approx(-0.04)
+        assert stop_day == 1
+        assert stop_r == pytest.approx(-1.0)
+
+    def test_gap_through_stop_loses_more_than_1r(self):
+        _, stopped, _, _, stop_day, stop_r = bt._tp_primitives(
+            "long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], 30
+        )
+        assert stopped is True
+        assert stop_day == 1
+        assert stop_r == pytest.approx(-1.6)  # filled at the 92 open, not the 95 stop

    def test_long_drift_no_trigger(self):
        bars = [_bar(102, 99, 101), _bar(103, 100, 102)]
-        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
+        risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
        assert stopped is False
        assert mfe == pytest.approx(0.03)
        assert close_pct == pytest.approx(0.02)

    def test_short_direction(self):
        # short entry 100, stop 105; price falls → favourable = (entry - low)/entry
-        risk, stopped, mfe, close_pct = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
+        risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
        assert risk == pytest.approx(0.05)
        assert stopped is False
        assert mfe == pytest.approx(0.08)
@@ -131,6 +162,12 @@ class TestTrailingExits:
        assert res[10] == pytest.approx(0.8)
        assert res[5] == pytest.approx(1.4)

+    def test_gap_through_stop_fills_at_open(self):
+        # Initial stop 90 governs (20% trail from peak 100 is lower); the bar
+        # opens at 85, below it → fill at the open.
+        res = bt._trailing_exits("long", 100.0, 90.0, (0.20,), [_bar(88, 84, 86, open_=85)], 30)
+        assert res[20] == pytest.approx(-1.5)
+

 class TestTrailingBucket:
    def test_bucket(self):
@@ -177,6 +214,10 @@ class TestTimeExits:
        res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,))
        assert res[5] == 0.0

+    def test_gap_through_stop_fills_at_open(self):
+        res = bt._time_exits("long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], (5,))
+        assert res[5] == pytest.approx(-1.6)
+

 class TestTimeExitBucket:
    def test_bucket(self):
@@ -192,6 +233,11 @@ class TestTimeExitBucket:
        assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
        assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
        assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
+        assert b["best_r"] == pytest.approx(1.4)
+        assert b["worst_r"] == pytest.approx(-1.0)
+        # No stop_day on any candidate → every hold runs the full 5 days.
+        assert b["avg_hold_days"] == 5.0
+        assert b["net_r_per_day"] == pytest.approx(0.28 / 5.0, abs=0.001)

    def test_missing_hold_skipped(self):
        b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
@@ -263,6 +309,78 @@ class TestGateAblation:
        assert rows["all_floors"]["total"] == 2


+def _sim_prices(start_ord: int, closes: list[float]) -> tuple:
+    """Column arrays for consecutive daily bars: open = close (no gaps),
+    high/low = close ± 1."""
+    ords = list(range(start_ord, start_ord + len(closes)))
+    return (
+        ords,
+        list(closes),
+        [c + 1.0 for c in closes],
+        [c - 1.0 for c in closes],
+        list(closes),
+        [1_000_000] * len(closes),
+    )
+
+
+def _sim_cand(
+    sym: str, day_ord: int, entry: float, stop: float, target: float, mp: float = 90.0
+) -> dict:
+    return {
+        "qualified": True,
+        "direction": "long",
+        "symbol": sym,
+        "date": date.fromordinal(day_ord).isoformat(),
+        "entry": entry,
+        "stop": stop,
+        "target": target,
+        "momentum_percentile": mp,
+    }
+
+
+class TestSimulatePortfolio:
+    ORD = date(2025, 1, 6).toordinal()
+
+    def test_hold_policy_accounting(self):
+        closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
+        prices = {"AAA": _sim_prices(self.ORD, closes)}
+        cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=130.0)
+        sim = bt._simulate_portfolio([cand], prices, None, "hold", 3)
+        assert sim is not None
+        assert sim["trades"] == 1
+        # 20 shares (1% risk / $5 stop distance), exit at the day-3 close 106:
+        # pnl = 2120 − 2000 − 2.00 entry cost − 2.12 exit cost = 115.88
+        assert sim["final_equity"] == pytest.approx(10_115.88, abs=0.01)
+        assert sim["win_rate"] == 100.0
+        assert sim["best_trade_r"] == pytest.approx(1.2)
+        assert sim["avg_hold_days"] == 3.0
+        assert sim["max_drawdown_pct"] == 0.0
+        assert sim["cagr_pct"] is None  # window far too short to annualize
+        assert sim["spy_return_pct"] is None
+
+    def test_target_policy_exits_at_target(self):
+        closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
+        prices = {"AAA": _sim_prices(self.ORD, closes)}
+        cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=105.0)
+        sim = bt._simulate_portfolio([cand], prices, None, "target", 30)
+        assert sim is not None
+        assert sim["trades"] == 1
+        assert sim["best_trade_r"] == pytest.approx(1.0)  # filled exactly at 105
+
+    def test_stop_gap_fills_at_open(self):
+        # Day-1 bar gaps to a 90 open, below the 95 stop → fill at the open.
+        ords = list(range(self.ORD, self.ORD + 2))
+        prices = {"AAA": (ords, [100.0, 90.0], [101.0, 92.0], [99.0, 88.0], [100.0, 91.0], [1, 1])}
+        cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=120.0)
+        sim = bt._simulate_portfolio([cand], prices, None, "hold", 30)
+        assert sim is not None
+        assert sim["trades"] == 1
+        assert sim["worst_trade_r"] == pytest.approx(-2.0)  # (90 − 100) / 5
+
+    def test_nothing_qualified_returns_none(self):
+        assert bt._simulate_portfolio([], {}, None, "hold", 30) is None
+
+
 def test_bucket_stats_counts_and_expectancy():
    cands = [
        _cand(70, OUTCOME_TARGET_HIT, 3.0),   # +3R win
@@ -283,6 +401,10 @@ def test_bucket_stats_counts_and_expectancy():
    # net = gross minus a 0.04R round trip per candidate (risk_pct 0.05)
    assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001)
    assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01)
+    assert s["best_r"] == 3.0
+    assert s["worst_r"] == -1.0
+    assert s["avg_hold_days"] == 10.0
+    assert s["net_r_per_day"] == pytest.approx((1.0 - _COST_R_005) / 10.0, abs=0.001)


 def test_bucket_stats_empty():
@@ -394,6 +516,12 @@ async def test_run_backtest_smoke(session):
    # time-exit sweep covers the configured hold lengths
    assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)

+    # portfolio simulation section is always present (policies may be empty
+    # when nothing qualifies)
+    assert "portfolio_sim" in report
+    assert isinstance(report["portfolio_sim"]["policies"], list)
+    assert report["portfolio_sim"]["params"]["max_positions"] == bt.SIM_MAX_POSITIONS
+
    # sweep: lowering the momentum-percentile cutoff can only add qualifiers
    sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
    counts = [r["total"] for r in sweep]