From 942a22ce653e0757bb24e3b242ce54b68f159d8d Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Thu, 2 Jul 2026 11:34:41 +0200 Subject: [PATCH] feat: grade gate-ablation variants under the hold-to-horizon exit too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ablation judged floors under the target/stop model, but the exit sweeps point at replacing that exit with a fixed hold — under which the R:R floor's rationale (bigger payoff at the target) may not apply. Each ablation row now also carries hold_avg_r / hold_net_avg_r / hold_total_r (30d hold, initial stop only), so the Phase 3 gate decision can be read under the exit policy that would actually be used. Co-Authored-By: Claude Fable 5 --- app/services/backtest_service.py | 34 +++++++++++++------ .../src/components/signals/BacktestPanel.tsx | 8 +++++ frontend/src/lib/types.ts | 5 +++ tests/unit/test_backtest_service.py | 11 +++++- 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/app/services/backtest_service.py b/app/services/backtest_service.py index 71331ab..1dcbdbf 100644 --- a/app/services/backtest_service.py +++ b/app/services/backtest_service.py @@ -911,16 +911,27 @@ def _gate_ablation(candidates: list[dict], activation: dict, threshold: float) - ("no_neutral_exclusion", [rr_ok, conf_ok, tighteners_ok]), ("momentum_only", []), ] - return [ - { + # Grade each variant under BOTH exit models: the target/stop outcome + # (_bucket_stats) and the hold-to-horizon time exit. A floor that pays under + # the target model may be meaningless once the exit is a fixed hold — the + # hold_* columns are what a time-exit gate decision should read. + hold_days = max(TIME_EXIT_DAYS) + rows: list[dict] = [] + for name, checks in variants: + matching = [ + c for c in candidates + if momentum_ok(c) and all(check(c) for check in checks) + ] + hold = _time_exit_bucket(matching, hold_days) + rows.append({ "variant": name, - **_bucket_stats([ - c for c in candidates - if momentum_ok(c) and all(check(c) for check in checks) - ]), - } - for name, checks in variants - ] + **_bucket_stats(matching), + "hold_days": hold_days, + "hold_avg_r": hold["avg_r"], + "hold_net_avg_r": hold["net_avg_r"], + "hold_total_r": hold["total_r"], + }) + return rows async def run_backtest( @@ -1051,7 +1062,10 @@ async def run_backtest( "Each row re-qualifies the same candidates at the current momentum " f"cutoff ({current_min_pct:.0f}) with one floor removed (long-only " "while the momentum gate is active). If dropping a floor doesn't " - "hurt net expectancy, that floor isn't pulling its weight." + "hurt net expectancy, that floor isn't pulling its weight. The Hold " + "columns grade the same variants under the hold-to-horizon time exit " + "instead of the S/R target — the view that matters if the exit " + "policy moves to a fixed hold." ), "take_profit_sweep": [_take_profit_bucket(qualified, tp) for tp in TP_LEVELS], "trailing_sweep": [_trailing_bucket(qualified, round(f * 100)) for f in TRAIL_LEVELS], diff --git a/frontend/src/components/signals/BacktestPanel.tsx b/frontend/src/components/signals/BacktestPanel.tsx index d80d88b..c4aa152 100644 --- a/frontend/src/components/signals/BacktestPanel.tsx +++ b/frontend/src/components/signals/BacktestPanel.tsx @@ -285,6 +285,8 @@ export function BacktestPanel() { Avg R Net Avg R Total R + Hold Net Avg R + Hold Total R @@ -303,6 +305,12 @@ export function BacktestPanel() { {fmtR(row.net_avg_r ?? null)} {fmtR(row.total_r)} + + {fmtR(row.hold_net_avg_r ?? null)} + + + {fmtR(row.hold_total_r ?? null)} + ))} diff --git a/frontend/src/lib/types.ts b/frontend/src/lib/types.ts index 0b2a3d1..c8d45a1 100644 --- a/frontend/src/lib/types.ts +++ b/frontend/src/lib/types.ts @@ -280,6 +280,11 @@ export interface BacktestTimeExitRow { export interface BacktestGateAblationRow extends BacktestBucket { variant: string; + // The same variant graded under the hold-to-horizon time exit. + hold_days?: number; + hold_avg_r?: number | null; + hold_net_avg_r?: number | null; + hold_total_r?: number | null; } export interface BacktestSignalEvalRow { diff --git a/tests/unit/test_backtest_service.py b/tests/unit/test_backtest_service.py index 62eaa4b..fbe6750 100644 --- a/tests/unit/test_backtest_service.py +++ b/tests/unit/test_backtest_service.py @@ -221,6 +221,7 @@ def _acand( "outcome": OUTCOME_TARGET_HIT, "realized_r": rr, "risk_pct": 0.05, + "time_r": {d: 0.5 for d in bt.TIME_EXIT_DAYS}, } @@ -249,6 +250,11 @@ class TestGateAblation: assert rows["no_neutral_exclusion"]["total"] == 2 assert rows["momentum_only"]["total"] == 4 assert rows["all_floors"]["net_avg_r"] is not None + # Every variant is also graded under the hold-to-horizon exit. + assert rows["all_floors"]["hold_days"] == max(bt.TIME_EXIT_DAYS) + assert rows["all_floors"]["hold_avg_r"] == pytest.approx(0.5) + assert rows["all_floors"]["hold_net_avg_r"] is not None + assert rows["momentum_only"]["hold_total_r"] == pytest.approx(4 * 0.5, abs=0.01) def test_threshold_zero_disables_momentum_gate(self): # Floors only: the short and the low-momentum long both pass all_floors. @@ -378,9 +384,12 @@ async def test_run_backtest_smoke(session): assert report["params"]["cost_per_side_pct"] == pytest.approx(bt.COST_PER_SIDE * 100) assert "net_avg_r" in report["overall_all"] - # ablation baseline reproduces the qualified set exactly + # ablation baseline reproduces the qualified set exactly, and every row + # carries the hold-to-horizon grading alongside the target model ablation = {r["variant"]: r for r in report["gate_ablation"]} assert ablation["all_floors"]["total"] == report["overall_qualified"]["total"] + for row in report["gate_ablation"]: + assert "hold_net_avg_r" in row # time-exit sweep covers the configured hold lengths assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)