From 942a22ce653e0757bb24e3b242ce54b68f159d8d Mon Sep 17 00:00:00 2001
From: Dennis Thiessen <dennis@thiessen.io>
Date: Thu, 2 Jul 2026 11:34:41 +0200
Subject: [PATCH] feat: grade gate-ablation variants under the hold-to-horizon
 exit too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ablation judged floors under the target/stop model, but the exit
sweeps point at replacing that exit with a fixed hold — under which the
R:R floor's rationale (bigger payoff at the target) may not apply. Each
ablation row now also carries hold_avg_r / hold_net_avg_r / hold_total_r
(30d hold, initial stop only), so the Phase 3 gate decision can be read
under the exit policy that would actually be used.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 app/services/backtest_service.py              | 34 +++++++++++++------
 .../src/components/signals/BacktestPanel.tsx  |  8 +++++
 frontend/src/lib/types.ts                     |  5 +++
 tests/unit/test_backtest_service.py           | 11 +++++-
 4 files changed, 47 insertions(+), 11 deletions(-)
diff --git a/app/services/backtest_service.py b/app/services/backtest_service.py
index 71331ab..1dcbdbf 100644
--- a/app/services/backtest_service.py
+++ b/app/services/backtest_service.py
@@ -911,16 +911,27 @@ def _gate_ablation(candidates: list[dict], activation: dict, threshold: float) -
         ("no_neutral_exclusion", [rr_ok, conf_ok, tighteners_ok]),
         ("momentum_only", []),
     ]
-    return [
-        {
+    # Grade each variant under BOTH exit models: the target/stop outcome
+    # (_bucket_stats) and the hold-to-horizon time exit. A floor that pays under
+    # the target model may be meaningless once the exit is a fixed hold — the
+    # hold_* columns are what a time-exit gate decision should read.
+    hold_days = max(TIME_EXIT_DAYS)
+    rows: list[dict] = []
+    for name, checks in variants:
+        matching = [
+            c for c in candidates
+            if momentum_ok(c) and all(check(c) for check in checks)
+        ]
+        hold = _time_exit_bucket(matching, hold_days)
+        rows.append({
             "variant": name,
-            **_bucket_stats([
-                c for c in candidates
-                if momentum_ok(c) and all(check(c) for check in checks)
-            ]),
-        }
-        for name, checks in variants
-    ]
+            **_bucket_stats(matching),
+            "hold_days": hold_days,
+            "hold_avg_r": hold["avg_r"],
+            "hold_net_avg_r": hold["net_avg_r"],
+            "hold_total_r": hold["total_r"],
+        })
+    return rows
 
 
 async def run_backtest(
@@ -1051,7 +1062,10 @@ async def run_backtest(
             "Each row re-qualifies the same candidates at the current momentum "
             f"cutoff ({current_min_pct:.0f}) with one floor removed (long-only "
             "while the momentum gate is active). If dropping a floor doesn't "
-            "hurt net expectancy, that floor isn't pulling its weight."
+            "hurt net expectancy, that floor isn't pulling its weight. The Hold "
+            "columns grade the same variants under the hold-to-horizon time exit "
+            "instead of the S/R target — the view that matters if the exit "
+            "policy moves to a fixed hold."
         ),
         "take_profit_sweep": [_take_profit_bucket(qualified, tp) for tp in TP_LEVELS],
         "trailing_sweep": [_trailing_bucket(qualified, round(f * 100)) for f in TRAIL_LEVELS],
diff --git a/frontend/src/components/signals/BacktestPanel.tsx b/frontend/src/components/signals/BacktestPanel.tsx
index d80d88b..c4aa152 100644
--- a/frontend/src/components/signals/BacktestPanel.tsx
+++ b/frontend/src/components/signals/BacktestPanel.tsx
@@ -285,6 +285,8 @@ export function BacktestPanel() {
                         <th className="px-4 py-2.5 text-right">Avg R</th>
                         <th className="px-4 py-2.5 text-right">Net Avg R</th>
                         <th className="px-4 py-2.5 text-right">Total R</th>
+                        <th className="px-4 py-2.5 text-right">Hold Net Avg R</th>
+                        <th className="px-4 py-2.5 text-right">Hold Total R</th>
                       </tr>
                     </thead>
                     <tbody>
@@ -303,6 +305,12 @@ export function BacktestPanel() {
                             {fmtR(row.net_avg_r ?? null)}
                           </td>
                           <td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
+                          <td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.hold_net_avg_r ?? null)}`}>
+                            {fmtR(row.hold_net_avg_r ?? null)}
+                          </td>
+                          <td className={`num px-4 py-2.5 text-right ${rColor(row.hold_total_r ?? null)}`}>
+                            {fmtR(row.hold_total_r ?? null)}
+                          </td>
                         </tr>
                       ))}
                     </tbody>
diff --git a/frontend/src/lib/types.ts b/frontend/src/lib/types.ts
index 0b2a3d1..c8d45a1 100644
--- a/frontend/src/lib/types.ts
+++ b/frontend/src/lib/types.ts
@@ -280,6 +280,11 @@ export interface BacktestTimeExitRow {
 
 export interface BacktestGateAblationRow extends BacktestBucket {
   variant: string;
+  // The same variant graded under the hold-to-horizon time exit.
+  hold_days?: number;
+  hold_avg_r?: number | null;
+  hold_net_avg_r?: number | null;
+  hold_total_r?: number | null;
 }
 
 export interface BacktestSignalEvalRow {
diff --git a/tests/unit/test_backtest_service.py b/tests/unit/test_backtest_service.py
index 62eaa4b..fbe6750 100644
--- a/tests/unit/test_backtest_service.py
+++ b/tests/unit/test_backtest_service.py
@@ -221,6 +221,7 @@ def _acand(
         "outcome": OUTCOME_TARGET_HIT,
         "realized_r": rr,
         "risk_pct": 0.05,
+        "time_r": {d: 0.5 for d in bt.TIME_EXIT_DAYS},
     }
 
 
@@ -249,6 +250,11 @@ class TestGateAblation:
         assert rows["no_neutral_exclusion"]["total"] == 2
         assert rows["momentum_only"]["total"] == 4
         assert rows["all_floors"]["net_avg_r"] is not None
+        # Every variant is also graded under the hold-to-horizon exit.
+        assert rows["all_floors"]["hold_days"] == max(bt.TIME_EXIT_DAYS)
+        assert rows["all_floors"]["hold_avg_r"] == pytest.approx(0.5)
+        assert rows["all_floors"]["hold_net_avg_r"] is not None
+        assert rows["momentum_only"]["hold_total_r"] == pytest.approx(4 * 0.5, abs=0.01)
 
     def test_threshold_zero_disables_momentum_gate(self):
         # Floors only: the short and the low-momentum long both pass all_floors.
@@ -378,9 +384,12 @@ async def test_run_backtest_smoke(session):
     assert report["params"]["cost_per_side_pct"] == pytest.approx(bt.COST_PER_SIDE * 100)
     assert "net_avg_r" in report["overall_all"]
 
-    # ablation baseline reproduces the qualified set exactly
+    # ablation baseline reproduces the qualified set exactly, and every row
+    # carries the hold-to-horizon grading alongside the target model
     ablation = {r["variant"]: r for r in report["gate_ablation"]}
     assert ablation["all_floors"]["total"] == report["overall_qualified"]["total"]
+    for row in report["gate_ablation"]:
+        assert "hold_net_avg_r" in row
 
     # time-exit sweep covers the configured hold lengths
     assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)