feat: portfolio simulation + per-trade stats (gaps, hold time, best/worst)
Deploy / lint (push) Successful in 6s
Deploy / test (push) Successful in 55s
Deploy / deploy (push) Successful in 38s

Per-trade additions to the report:
- Gap-through-stop fills: stops now fill at the worse of the stop or the
  bar's open across every exit model (target, TP, trailing, time), so a
  loss can exceed -1R; targets never fill better than their level.
- best_r / worst_r, avg holding days, and net R per day of capital
  deployed on the summary buckets and the time-exit sweep.

Portfolio simulation (the stats a per-setup replay cannot give):
- One capital-constrained book over the qualified setups: 10k start, max
  10 concurrent positions (one per ticker, best momentum first), 1%
  fixed-fractional risk with a 20% no-leverage notional cap, entries at
  the detection close, 0.1%/side costs, daily mark-to-market.
- Two exit policies compared: S/R target race vs hold-to-horizon.
- Equity-curve stats: final equity, total return, CAGR, max drawdown,
  annualized daily Sharpe, win rate, avg P&L, best/worst trade, avg
  hold, entries skipped on a full book, and SPY price return over the
  same window (benchmark history refreshed to cover the replay span).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-02 11:56:29 +02:00
parent 942a22ce65
commit 0f43e755f4
4 changed files with 634 additions and 33 deletions
+134 -6
View File
@@ -32,6 +32,7 @@ def _cand(
qualified: bool = True,
direction: str = "long",
risk_pct: float = 0.05,
hold_days: int = 10,
) -> dict:
target_hit = outcome == OUTCOME_TARGET_HIT
realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
@@ -44,6 +45,7 @@ def _cand(
"qualified": qualified,
"direction": direction,
"risk_pct": risk_pct,
"hold_days": hold_days,
}
@@ -51,35 +53,64 @@ def _cand(
_COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05
def _bar(high: float, low: float, close: float) -> SimpleNamespace:
return SimpleNamespace(high=high, low=low, close=close)
def _bar(high: float, low: float, close: float, open_: float | None = None) -> SimpleNamespace:
"""Synthetic daily bar. ``open`` defaults to the high so a stop is pierced
intraday (fill at the stop level); pass an explicit open beyond the stop to
model a gap through it."""
return SimpleNamespace(
high=high, low=low, close=close, open=open_ if open_ is not None else high
)
class TestStopFillR:
def test_intraday_fill_at_stop(self):
assert bt._stop_fill_r("long", 100.0, 95.0, _bar(101, 94, 96)) == pytest.approx(-1.0)
def test_gap_fill_at_open(self):
# Opens at 92, below the 95 stop → filled at the open, worse than 1R.
assert bt._stop_fill_r("long", 100.0, 95.0, _bar(93, 90, 91, open_=92)) == pytest.approx(-1.6)
def test_short_gap_fill_at_open(self):
# Short stop 105; opens at 107 above it → fill 107.
assert bt._stop_fill_r("short", 100.0, 105.0, _bar(110, 104, 108, open_=107)) == pytest.approx(-1.4)
class TestTakeProfitPrimitives:
def test_long_tp_reachable_before_stop(self):
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
risk, stopped, mfe, close_pct, stop_day, _ = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
assert risk == pytest.approx(0.05)
assert stopped is False
assert mfe == pytest.approx(0.09)
assert close_pct == pytest.approx(0.08)
assert stop_day is None
def test_long_stop_zeroes_mfe(self):
# Low pierces the stop on the only bar → loss, nothing banked before it.
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
risk, stopped, mfe, close_pct, stop_day, stop_r = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
assert stopped is True
assert mfe == pytest.approx(0.0)
assert close_pct == pytest.approx(-0.04)
assert stop_day == 1
assert stop_r == pytest.approx(-1.0)
def test_gap_through_stop_loses_more_than_1r(self):
_, stopped, _, _, stop_day, stop_r = bt._tp_primitives(
"long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], 30
)
assert stopped is True
assert stop_day == 1
assert stop_r == pytest.approx(-1.6) # filled at the 92 open, not the 95 stop
def test_long_drift_no_trigger(self):
bars = [_bar(102, 99, 101), _bar(103, 100, 102)]
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
assert stopped is False
assert mfe == pytest.approx(0.03)
assert close_pct == pytest.approx(0.02)
def test_short_direction(self):
# short entry 100, stop 105; price falls → favourable = (entry - low)/entry
risk, stopped, mfe, close_pct = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
risk, stopped, mfe, close_pct, _, _ = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
assert risk == pytest.approx(0.05)
assert stopped is False
assert mfe == pytest.approx(0.08)
@@ -131,6 +162,12 @@ class TestTrailingExits:
assert res[10] == pytest.approx(0.8)
assert res[5] == pytest.approx(1.4)
def test_gap_through_stop_fills_at_open(self):
# Initial stop 90 governs (20% trail from peak 100 is lower); the bar
# opens at 85, below it → fill at the open.
res = bt._trailing_exits("long", 100.0, 90.0, (0.20,), [_bar(88, 84, 86, open_=85)], 30)
assert res[20] == pytest.approx(-1.5)
class TestTrailingBucket:
def test_bucket(self):
@@ -177,6 +214,10 @@ class TestTimeExits:
res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,))
assert res[5] == 0.0
def test_gap_through_stop_fills_at_open(self):
res = bt._time_exits("long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], (5,))
assert res[5] == pytest.approx(-1.6)
class TestTimeExitBucket:
def test_bucket(self):
@@ -192,6 +233,11 @@ class TestTimeExitBucket:
assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
assert b["best_r"] == pytest.approx(1.4)
assert b["worst_r"] == pytest.approx(-1.0)
# No stop_day on any candidate → every hold runs the full 5 days.
assert b["avg_hold_days"] == 5.0
assert b["net_r_per_day"] == pytest.approx(0.28 / 5.0, abs=0.001)
def test_missing_hold_skipped(self):
b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
@@ -263,6 +309,78 @@ class TestGateAblation:
assert rows["all_floors"]["total"] == 2
def _sim_prices(start_ord: int, closes: list[float]) -> tuple:
"""Column arrays for consecutive daily bars: open = close (no gaps),
high/low = close ± 1."""
ords = list(range(start_ord, start_ord + len(closes)))
return (
ords,
list(closes),
[c + 1.0 for c in closes],
[c - 1.0 for c in closes],
list(closes),
[1_000_000] * len(closes),
)
def _sim_cand(
sym: str, day_ord: int, entry: float, stop: float, target: float, mp: float = 90.0
) -> dict:
return {
"qualified": True,
"direction": "long",
"symbol": sym,
"date": date.fromordinal(day_ord).isoformat(),
"entry": entry,
"stop": stop,
"target": target,
"momentum_percentile": mp,
}
class TestSimulatePortfolio:
ORD = date(2025, 1, 6).toordinal()
def test_hold_policy_accounting(self):
closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
prices = {"AAA": _sim_prices(self.ORD, closes)}
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=130.0)
sim = bt._simulate_portfolio([cand], prices, None, "hold", 3)
assert sim is not None
assert sim["trades"] == 1
# 20 shares (1% risk / $5 stop distance), exit at the day-3 close 106:
# pnl = 2120 2000 2.00 entry cost 2.12 exit cost = 115.88
assert sim["final_equity"] == pytest.approx(10_115.88, abs=0.01)
assert sim["win_rate"] == 100.0
assert sim["best_trade_r"] == pytest.approx(1.2)
assert sim["avg_hold_days"] == 3.0
assert sim["max_drawdown_pct"] == 0.0
assert sim["cagr_pct"] is None # window far too short to annualize
assert sim["spy_return_pct"] is None
def test_target_policy_exits_at_target(self):
closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
prices = {"AAA": _sim_prices(self.ORD, closes)}
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=105.0)
sim = bt._simulate_portfolio([cand], prices, None, "target", 30)
assert sim is not None
assert sim["trades"] == 1
assert sim["best_trade_r"] == pytest.approx(1.0) # filled exactly at 105
def test_stop_gap_fills_at_open(self):
# Day-1 bar gaps to a 90 open, below the 95 stop → fill at the open.
ords = list(range(self.ORD, self.ORD + 2))
prices = {"AAA": (ords, [100.0, 90.0], [101.0, 92.0], [99.0, 88.0], [100.0, 91.0], [1, 1])}
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=120.0)
sim = bt._simulate_portfolio([cand], prices, None, "hold", 30)
assert sim is not None
assert sim["trades"] == 1
assert sim["worst_trade_r"] == pytest.approx(-2.0) # (90 100) / 5
def test_nothing_qualified_returns_none(self):
assert bt._simulate_portfolio([], {}, None, "hold", 30) is None
def test_bucket_stats_counts_and_expectancy():
cands = [
_cand(70, OUTCOME_TARGET_HIT, 3.0), # +3R win
@@ -283,6 +401,10 @@ def test_bucket_stats_counts_and_expectancy():
# net = gross minus a 0.04R round trip per candidate (risk_pct 0.05)
assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001)
assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01)
assert s["best_r"] == 3.0
assert s["worst_r"] == -1.0
assert s["avg_hold_days"] == 10.0
assert s["net_r_per_day"] == pytest.approx((1.0 - _COST_R_005) / 10.0, abs=0.001)
def test_bucket_stats_empty():
@@ -394,6 +516,12 @@ async def test_run_backtest_smoke(session):
# time-exit sweep covers the configured hold lengths
assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)
# portfolio simulation section is always present (policies may be empty
# when nothing qualifies)
assert "portfolio_sim" in report
assert isinstance(report["portfolio_sim"]["policies"], list)
assert report["portfolio_sim"]["params"]["max_positions"] == bt.SIM_MAX_POSITIONS
# sweep: lowering the momentum-percentile cutoff can only add qualifiers
sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
counts = [r["total"] for r in sweep]