feat: net-of-cost backtest, gate ablation + time-exit sweeps, longer tails
Deploy / lint (push) Successful in 7s
Deploy / test (push) Successful in 57s
Deploy / deploy (push) Successful in 32s

Phase 1 of the strategy-measurement plan — report-only, no production
trading behavior changes:

- Cost haircut: every bucket/sweep now reports net_avg_r/net_total_r
  alongside gross (COST_PER_SIDE=0.1% of notional, converted to R via
  each setup's stop distance); params carry cost_per_side_pct.
- Gate ablation table: re-qualifies candidates at the current momentum
  cutoff with one floor removed per row (confidence / R:R / NEUTRAL /
  momentum-only) to show which floors earn their keep.
- Time-based exit sweep: hold 5/10/21/30 days with the initial ATR stop,
  exit at the day-N close — the classic momentum implementation, to
  disambiguate the wide-trailing result.
- TP sweep extended to +40/+50%, trailing to 25/30% so the optima are
  interior instead of starred at the sweep edge.
- BacktestPanel: Net Avg R columns everywhere, gate-ablation and
  time-exit tables, stars now mark best net avg R; stale cached reports
  still render (all new fields optional/guarded).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-02 07:50:37 +02:00
parent 84ce7c5c26
commit 29b1a9a28c
5 changed files with 505 additions and 24 deletions
+156 -5
View File
@@ -25,7 +25,14 @@ async def session():
yield s
def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, direction: str = "long") -> dict:
def _cand(
prob: float,
outcome: str,
rr: float,
qualified: bool = True,
direction: str = "long",
risk_pct: float = 0.05,
) -> dict:
target_hit = outcome == OUTCOME_TARGET_HIT
realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
return {
@@ -36,9 +43,14 @@ def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, directio
"realized_r": realized,
"qualified": qualified,
"direction": direction,
"risk_pct": risk_pct,
}
# Round-trip cost in R for the default _cand risk_pct: 2 * 0.001 / 0.05 = 0.04R.
_COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05
def _bar(high: float, low: float, close: float) -> SimpleNamespace:
return SimpleNamespace(high=high, low=low, close=close)
@@ -87,6 +99,9 @@ class TestTakeProfitBucket:
assert b["hit_rate"] == pytest.approx(33.3, abs=0.1)
assert b["total_r"] == pytest.approx(0.8, abs=0.01)
assert b["avg_r"] == pytest.approx(0.267, abs=0.01)
# net: minus a 0.04R round trip per candidate (risk_pct 0.05)
assert b["net_total_r"] == pytest.approx(0.8 - 3 * _COST_R_005, abs=0.01)
assert b["net_avg_r"] == pytest.approx((0.8 - 3 * _COST_R_005) / 3, abs=0.01)
def test_zero_risk_skipped(self):
cands = [{"risk_pct": 0.0, "mfe_pct": 0.2, "tp_stopped": False, "tp_close_pct": 0.1}]
@@ -120,9 +135,9 @@ class TestTrailingExits:
class TestTrailingBucket:
def test_bucket(self):
cands = [
{"trail_r": {5: 1.4, 10: 0.8}},
{"trail_r": {5: -1.0, 10: -1.0}},
{"trail_r": {5: 0.5, 10: 0.5}},
{"trail_r": {5: 1.4, 10: 0.8}, "risk_pct": 0.10},
{"trail_r": {5: -1.0, 10: -1.0}, "risk_pct": 0.10},
{"trail_r": {5: 0.5, 10: 0.5}, "risk_pct": 0.10},
]
b = bt._trailing_bucket(cands, 5)
assert b["total"] == 3
@@ -130,6 +145,116 @@ class TestTrailingBucket:
assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
assert b["total_r"] == pytest.approx(0.9, abs=0.01)
assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
# net: 0.02R round trip per candidate (risk_pct 0.10)
assert b["net_total_r"] == pytest.approx(0.9 - 3 * 0.02, abs=0.01)
assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
class TestTimeExits:
def test_long_exits_at_horizon_close(self):
bars = [_bar(103, 99, 102), _bar(105, 101, 104), _bar(107, 103, 106)]
res = bt._time_exits("long", 100.0, 95.0, bars, (2, 5))
assert res[2] == pytest.approx(0.8) # close 104 → +4% / 5% risk
assert res[5] == pytest.approx(1.2) # only 3 bars → last close 106
def test_stop_on_first_bar_loses_everywhere(self):
res = bt._time_exits("long", 100.0, 95.0, [_bar(101, 94, 96), _bar(105, 101, 104)], (1, 5))
assert res[1] == pytest.approx(-1.0)
assert res[5] == pytest.approx(-1.0)
def test_stop_after_short_horizon_only_hits_long_hold(self):
# Day-2 close banked by the 2-day hold; the stop on day 3 only hits n=5.
bars = [_bar(103, 99, 102), _bar(104, 100, 103), _bar(101, 94, 95)]
res = bt._time_exits("long", 100.0, 95.0, bars, (2, 5))
assert res[2] == pytest.approx(0.6) # close 103 → +3% / 5% risk
assert res[5] == pytest.approx(-1.0)
def test_short_direction(self):
res = bt._time_exits("short", 100.0, 105.0, [_bar(101, 95, 96)], (1,))
assert res[1] == pytest.approx(0.8) # close 96 → +4% / 5% risk
def test_zero_risk_returns_zero(self):
res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,))
assert res[5] == 0.0
class TestTimeExitBucket:
def test_bucket(self):
cands = [
{"time_r": {5: 1.4, 21: 0.8}, "risk_pct": 0.10},
{"time_r": {5: -1.0, 21: -1.0}, "risk_pct": 0.10},
{"time_r": {5: 0.5, 21: 0.5}, "risk_pct": 0.10},
]
b = bt._time_exit_bucket(cands, 5)
assert b["hold_days"] == 5
assert b["total"] == 3
assert b["wins"] == 2
assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
def test_missing_hold_skipped(self):
b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
assert b["total"] == 0
assert b["avg_r"] is None
def _acand(
rr: float = 2.0,
conf: float = 60.0,
action: str = "LONG_MODERATE",
mp: float | None = 90.0,
direction: str = "long",
) -> dict:
"""Ablation candidate: meets_core mirrors the default floors (min_rr 1.2,
min_confidence 55, exclude_neutral on)."""
meets = rr >= 1.2 and conf >= 55.0 and action != "NEUTRAL"
return {
"rr": rr,
"confidence": conf,
"action": action,
"momentum_percentile": mp,
"direction": direction,
"meets_core": meets,
"risk_level": "Low",
"target_hit": True,
"outcome": OUTCOME_TARGET_HIT,
"realized_r": rr,
"risk_pct": 0.05,
}
class TestGateAblation:
ACTIVATION = {
"min_rr": 1.2,
"min_confidence": 55.0,
"exclude_neutral": True,
"require_high_conviction": False,
"exclude_conflicts": False,
}
def test_variant_counts(self):
cands = [
_acand(), # clears everything
_acand(conf=40.0), # fails confidence floor
_acand(rr=1.0), # fails R:R floor
_acand(action="NEUTRAL"), # fails NEUTRAL exclusion
_acand(mp=50.0), # fails the momentum cutoff
_acand(direction="short", mp=95.0), # short — gated out
]
rows = {r["variant"]: r for r in bt._gate_ablation(cands, self.ACTIVATION, 80.0)}
assert rows["all_floors"]["total"] == 1
assert rows["no_confidence_floor"]["total"] == 2
assert rows["no_rr_floor"]["total"] == 2
assert rows["no_neutral_exclusion"]["total"] == 2
assert rows["momentum_only"]["total"] == 4
assert rows["all_floors"]["net_avg_r"] is not None
def test_threshold_zero_disables_momentum_gate(self):
# Floors only: the short and the low-momentum long both pass all_floors.
cands = [_acand(mp=50.0), _acand(direction="short", mp=None)]
rows = {r["variant"]: r for r in bt._gate_ablation(cands, self.ACTIVATION, 0.0)}
assert rows["all_floors"]["total"] == 2
def test_bucket_stats_counts_and_expectancy():
@@ -149,6 +274,9 @@ def test_bucket_stats_counts_and_expectancy():
# avg R = (3 + 2 - 1 + 0) / 4 = 1.0
assert s["avg_r"] == 1.0
assert s["total_r"] == 4.0
# net = gross minus a 0.04R round trip per candidate (risk_pct 0.05)
assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001)
assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01)
def test_bucket_stats_empty():
@@ -156,6 +284,15 @@ def test_bucket_stats_empty():
assert s["total"] == 0
assert s["hit_rate"] is None
assert s["avg_r"] is None
assert s["net_avg_r"] is None
def test_bucket_stats_no_risk_pct_means_no_cost():
c = _cand(50, OUTCOME_TARGET_HIT, 2.0)
del c["risk_pct"]
s = bt._bucket_stats([c])
assert s["net_avg_r"] == s["avg_r"]
assert s["net_total_r"] == s["total_r"]
def test_calibration_buckets():
@@ -202,11 +339,25 @@ async def test_run_backtest_smoke(session):
# well-formed report
assert report["tickers"] == 1
assert isinstance(report["candidates"], int)
for key in ("overall_qualified", "overall_all", "by_direction", "calibration", "sweep"):
for key in (
"overall_qualified", "overall_all", "by_direction", "calibration", "sweep",
"gate_ablation", "time_exit_sweep",
):
assert key in report
# the oscillating series should yield at least some resolved setups
assert report["candidates"] >= 1
# cost assumption is reported, and every bucket carries net numbers
assert report["params"]["cost_per_side_pct"] == pytest.approx(bt.COST_PER_SIDE * 100)
assert "net_avg_r" in report["overall_all"]
# ablation baseline reproduces the qualified set exactly
ablation = {r["variant"]: r for r in report["gate_ablation"]}
assert ablation["all_floors"]["total"] == report["overall_qualified"]["total"]
# time-exit sweep covers the configured hold lengths
assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)
# sweep: lowering the momentum-percentile cutoff can only add qualifiers
sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
counts = [r["total"] for r in sweep]