feat: net-of-cost backtest, gate ablation + time-exit sweeps, longer tails
Phase 1 of the strategy-measurement plan — report-only, no production trading behavior changes: - Cost haircut: every bucket/sweep now reports net_avg_r/net_total_r alongside gross (COST_PER_SIDE=0.1% of notional, converted to R via each setup's stop distance); params carry cost_per_side_pct. - Gate ablation table: re-qualifies candidates at the current momentum cutoff with one floor removed per row (confidence / R:R / NEUTRAL / momentum-only) to show which floors earn their keep. - Time-based exit sweep: hold 5/10/21/30 days with the initial ATR stop, exit at the day-N close — the classic momentum implementation, to disambiguate the wide-trailing result. - TP sweep extended to +40/+50%, trailing to 25/30% so the optima are interior instead of starred at the sweep edge. - BacktestPanel: Net Avg R columns everywhere, gate-ablation and time-exit tables, stars now mark best net avg R; stale cached reports still render (all new fields optional/guarded). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -25,7 +25,14 @@ async def session():
|
||||
yield s
|
||||
|
||||
|
||||
def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, direction: str = "long") -> dict:
|
||||
def _cand(
|
||||
prob: float,
|
||||
outcome: str,
|
||||
rr: float,
|
||||
qualified: bool = True,
|
||||
direction: str = "long",
|
||||
risk_pct: float = 0.05,
|
||||
) -> dict:
|
||||
target_hit = outcome == OUTCOME_TARGET_HIT
|
||||
realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
|
||||
return {
|
||||
@@ -36,9 +43,14 @@ def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, directio
|
||||
"realized_r": realized,
|
||||
"qualified": qualified,
|
||||
"direction": direction,
|
||||
"risk_pct": risk_pct,
|
||||
}
|
||||
|
||||
|
||||
# Round-trip cost in R for the default _cand risk_pct: 2 * 0.001 / 0.05 = 0.04R.
|
||||
_COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05
|
||||
|
||||
|
||||
def _bar(high: float, low: float, close: float) -> SimpleNamespace:
|
||||
return SimpleNamespace(high=high, low=low, close=close)
|
||||
|
||||
@@ -87,6 +99,9 @@ class TestTakeProfitBucket:
|
||||
assert b["hit_rate"] == pytest.approx(33.3, abs=0.1)
|
||||
assert b["total_r"] == pytest.approx(0.8, abs=0.01)
|
||||
assert b["avg_r"] == pytest.approx(0.267, abs=0.01)
|
||||
# net: minus a 0.04R round trip per candidate (risk_pct 0.05)
|
||||
assert b["net_total_r"] == pytest.approx(0.8 - 3 * _COST_R_005, abs=0.01)
|
||||
assert b["net_avg_r"] == pytest.approx((0.8 - 3 * _COST_R_005) / 3, abs=0.01)
|
||||
|
||||
def test_zero_risk_skipped(self):
|
||||
cands = [{"risk_pct": 0.0, "mfe_pct": 0.2, "tp_stopped": False, "tp_close_pct": 0.1}]
|
||||
@@ -120,9 +135,9 @@ class TestTrailingExits:
|
||||
class TestTrailingBucket:
|
||||
def test_bucket(self):
|
||||
cands = [
|
||||
{"trail_r": {5: 1.4, 10: 0.8}},
|
||||
{"trail_r": {5: -1.0, 10: -1.0}},
|
||||
{"trail_r": {5: 0.5, 10: 0.5}},
|
||||
{"trail_r": {5: 1.4, 10: 0.8}, "risk_pct": 0.10},
|
||||
{"trail_r": {5: -1.0, 10: -1.0}, "risk_pct": 0.10},
|
||||
{"trail_r": {5: 0.5, 10: 0.5}, "risk_pct": 0.10},
|
||||
]
|
||||
b = bt._trailing_bucket(cands, 5)
|
||||
assert b["total"] == 3
|
||||
@@ -130,6 +145,116 @@ class TestTrailingBucket:
|
||||
assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
|
||||
assert b["total_r"] == pytest.approx(0.9, abs=0.01)
|
||||
assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
|
||||
# net: 0.02R round trip per candidate (risk_pct 0.10)
|
||||
assert b["net_total_r"] == pytest.approx(0.9 - 3 * 0.02, abs=0.01)
|
||||
assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
|
||||
|
||||
|
||||
class TestTimeExits:
|
||||
def test_long_exits_at_horizon_close(self):
|
||||
bars = [_bar(103, 99, 102), _bar(105, 101, 104), _bar(107, 103, 106)]
|
||||
res = bt._time_exits("long", 100.0, 95.0, bars, (2, 5))
|
||||
assert res[2] == pytest.approx(0.8) # close 104 → +4% / 5% risk
|
||||
assert res[5] == pytest.approx(1.2) # only 3 bars → last close 106
|
||||
|
||||
def test_stop_on_first_bar_loses_everywhere(self):
|
||||
res = bt._time_exits("long", 100.0, 95.0, [_bar(101, 94, 96), _bar(105, 101, 104)], (1, 5))
|
||||
assert res[1] == pytest.approx(-1.0)
|
||||
assert res[5] == pytest.approx(-1.0)
|
||||
|
||||
def test_stop_after_short_horizon_only_hits_long_hold(self):
|
||||
# Day-2 close banked by the 2-day hold; the stop on day 3 only hits n=5.
|
||||
bars = [_bar(103, 99, 102), _bar(104, 100, 103), _bar(101, 94, 95)]
|
||||
res = bt._time_exits("long", 100.0, 95.0, bars, (2, 5))
|
||||
assert res[2] == pytest.approx(0.6) # close 103 → +3% / 5% risk
|
||||
assert res[5] == pytest.approx(-1.0)
|
||||
|
||||
def test_short_direction(self):
|
||||
res = bt._time_exits("short", 100.0, 105.0, [_bar(101, 95, 96)], (1,))
|
||||
assert res[1] == pytest.approx(0.8) # close 96 → +4% / 5% risk
|
||||
|
||||
def test_zero_risk_returns_zero(self):
|
||||
res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,))
|
||||
assert res[5] == 0.0
|
||||
|
||||
|
||||
class TestTimeExitBucket:
|
||||
def test_bucket(self):
|
||||
cands = [
|
||||
{"time_r": {5: 1.4, 21: 0.8}, "risk_pct": 0.10},
|
||||
{"time_r": {5: -1.0, 21: -1.0}, "risk_pct": 0.10},
|
||||
{"time_r": {5: 0.5, 21: 0.5}, "risk_pct": 0.10},
|
||||
]
|
||||
b = bt._time_exit_bucket(cands, 5)
|
||||
assert b["hold_days"] == 5
|
||||
assert b["total"] == 3
|
||||
assert b["wins"] == 2
|
||||
assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
|
||||
assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
|
||||
assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
|
||||
|
||||
def test_missing_hold_skipped(self):
|
||||
b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
|
||||
assert b["total"] == 0
|
||||
assert b["avg_r"] is None
|
||||
|
||||
|
||||
def _acand(
|
||||
rr: float = 2.0,
|
||||
conf: float = 60.0,
|
||||
action: str = "LONG_MODERATE",
|
||||
mp: float | None = 90.0,
|
||||
direction: str = "long",
|
||||
) -> dict:
|
||||
"""Ablation candidate: meets_core mirrors the default floors (min_rr 1.2,
|
||||
min_confidence 55, exclude_neutral on)."""
|
||||
meets = rr >= 1.2 and conf >= 55.0 and action != "NEUTRAL"
|
||||
return {
|
||||
"rr": rr,
|
||||
"confidence": conf,
|
||||
"action": action,
|
||||
"momentum_percentile": mp,
|
||||
"direction": direction,
|
||||
"meets_core": meets,
|
||||
"risk_level": "Low",
|
||||
"target_hit": True,
|
||||
"outcome": OUTCOME_TARGET_HIT,
|
||||
"realized_r": rr,
|
||||
"risk_pct": 0.05,
|
||||
}
|
||||
|
||||
|
||||
class TestGateAblation:
|
||||
ACTIVATION = {
|
||||
"min_rr": 1.2,
|
||||
"min_confidence": 55.0,
|
||||
"exclude_neutral": True,
|
||||
"require_high_conviction": False,
|
||||
"exclude_conflicts": False,
|
||||
}
|
||||
|
||||
def test_variant_counts(self):
|
||||
cands = [
|
||||
_acand(), # clears everything
|
||||
_acand(conf=40.0), # fails confidence floor
|
||||
_acand(rr=1.0), # fails R:R floor
|
||||
_acand(action="NEUTRAL"), # fails NEUTRAL exclusion
|
||||
_acand(mp=50.0), # fails the momentum cutoff
|
||||
_acand(direction="short", mp=95.0), # short — gated out
|
||||
]
|
||||
rows = {r["variant"]: r for r in bt._gate_ablation(cands, self.ACTIVATION, 80.0)}
|
||||
assert rows["all_floors"]["total"] == 1
|
||||
assert rows["no_confidence_floor"]["total"] == 2
|
||||
assert rows["no_rr_floor"]["total"] == 2
|
||||
assert rows["no_neutral_exclusion"]["total"] == 2
|
||||
assert rows["momentum_only"]["total"] == 4
|
||||
assert rows["all_floors"]["net_avg_r"] is not None
|
||||
|
||||
def test_threshold_zero_disables_momentum_gate(self):
|
||||
# Floors only: the short and the low-momentum long both pass all_floors.
|
||||
cands = [_acand(mp=50.0), _acand(direction="short", mp=None)]
|
||||
rows = {r["variant"]: r for r in bt._gate_ablation(cands, self.ACTIVATION, 0.0)}
|
||||
assert rows["all_floors"]["total"] == 2
|
||||
|
||||
|
||||
def test_bucket_stats_counts_and_expectancy():
|
||||
@@ -149,6 +274,9 @@ def test_bucket_stats_counts_and_expectancy():
|
||||
# avg R = (3 + 2 - 1 + 0) / 4 = 1.0
|
||||
assert s["avg_r"] == 1.0
|
||||
assert s["total_r"] == 4.0
|
||||
# net = gross minus a 0.04R round trip per candidate (risk_pct 0.05)
|
||||
assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001)
|
||||
assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01)
|
||||
|
||||
|
||||
def test_bucket_stats_empty():
|
||||
@@ -156,6 +284,15 @@ def test_bucket_stats_empty():
|
||||
assert s["total"] == 0
|
||||
assert s["hit_rate"] is None
|
||||
assert s["avg_r"] is None
|
||||
assert s["net_avg_r"] is None
|
||||
|
||||
|
||||
def test_bucket_stats_no_risk_pct_means_no_cost():
|
||||
c = _cand(50, OUTCOME_TARGET_HIT, 2.0)
|
||||
del c["risk_pct"]
|
||||
s = bt._bucket_stats([c])
|
||||
assert s["net_avg_r"] == s["avg_r"]
|
||||
assert s["net_total_r"] == s["total_r"]
|
||||
|
||||
|
||||
def test_calibration_buckets():
|
||||
@@ -202,11 +339,25 @@ async def test_run_backtest_smoke(session):
|
||||
# well-formed report
|
||||
assert report["tickers"] == 1
|
||||
assert isinstance(report["candidates"], int)
|
||||
for key in ("overall_qualified", "overall_all", "by_direction", "calibration", "sweep"):
|
||||
for key in (
|
||||
"overall_qualified", "overall_all", "by_direction", "calibration", "sweep",
|
||||
"gate_ablation", "time_exit_sweep",
|
||||
):
|
||||
assert key in report
|
||||
# the oscillating series should yield at least some resolved setups
|
||||
assert report["candidates"] >= 1
|
||||
|
||||
# cost assumption is reported, and every bucket carries net numbers
|
||||
assert report["params"]["cost_per_side_pct"] == pytest.approx(bt.COST_PER_SIDE * 100)
|
||||
assert "net_avg_r" in report["overall_all"]
|
||||
|
||||
# ablation baseline reproduces the qualified set exactly
|
||||
ablation = {r["variant"]: r for r in report["gate_ablation"]}
|
||||
assert ablation["all_floors"]["total"] == report["overall_qualified"]["total"]
|
||||
|
||||
# time-exit sweep covers the configured hold lengths
|
||||
assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)
|
||||
|
||||
# sweep: lowering the momentum-percentile cutoff can only add qualifiers
|
||||
sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
|
||||
counts = [r["total"] for r in sweep]
|
||||
|
||||
Reference in New Issue
Block a user