feat: net-of-cost backtest, gate ablation + time-exit sweeps, longer tails

Phase 1 of the strategy-measurement plan — report-only, no production trading behavior changes: - Cost haircut: every bucket/sweep now reports net_avg_r/net_total_r alongside gross (COST_PER_SIDE=0.1% of notional, converted to R via each setup's stop distance); params carry cost_per_side_pct. - Gate ablation table: re-qualifies candidates at the current momentum cutoff with one floor removed per row (confidence / R:R / NEUTRAL / momentum-only) to show which floors earn their keep. - Time-based exit sweep: hold 5/10/21/30 days with the initial ATR stop, exit at the day-N close — the classic momentum implementation, to disambiguate the wide-trailing result. - TP sweep extended to +40/+50%, trailing to 25/30% so the optima are interior instead of starred at the sweep edge. - BacktestPanel: Net Avg R columns everywhere, gate-ablation and time-exit tables, stars now mark best net avg R; stale cached reports still render (all new fields optional/guarded). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 07:50:37 +02:00
parent 84ce7c5c26
commit 29b1a9a28c
5 changed files with 505 additions and 24 deletions
@@ -25,7 +25,14 @@ async def session():
        yield s


-def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, direction: str = "long") -> dict:
+def _cand(
+    prob: float,
+    outcome: str,
+    rr: float,
+    qualified: bool = True,
+    direction: str = "long",
+    risk_pct: float = 0.05,
+) -> dict:
    target_hit = outcome == OUTCOME_TARGET_HIT
    realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
    return {
@@ -36,9 +43,14 @@ def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, directio
        "realized_r": realized,
        "qualified": qualified,
        "direction": direction,
+        "risk_pct": risk_pct,
    }


+# Round-trip cost in R for the default _cand risk_pct: 2 * 0.001 / 0.05 = 0.04R.
+_COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05
+
+
 def _bar(high: float, low: float, close: float) -> SimpleNamespace:
    return SimpleNamespace(high=high, low=low, close=close)

@@ -87,6 +99,9 @@ class TestTakeProfitBucket:
        assert b["hit_rate"] == pytest.approx(33.3, abs=0.1)
        assert b["total_r"] == pytest.approx(0.8, abs=0.01)
        assert b["avg_r"] == pytest.approx(0.267, abs=0.01)
+        # net: minus a 0.04R round trip per candidate (risk_pct 0.05)
+        assert b["net_total_r"] == pytest.approx(0.8 - 3 * _COST_R_005, abs=0.01)
+        assert b["net_avg_r"] == pytest.approx((0.8 - 3 * _COST_R_005) / 3, abs=0.01)

    def test_zero_risk_skipped(self):
        cands = [{"risk_pct": 0.0, "mfe_pct": 0.2, "tp_stopped": False, "tp_close_pct": 0.1}]
@@ -120,9 +135,9 @@ class TestTrailingExits:
 class TestTrailingBucket:
    def test_bucket(self):
        cands = [
-            {"trail_r": {5: 1.4, 10: 0.8}},
-            {"trail_r": {5: -1.0, 10: -1.0}},
-            {"trail_r": {5: 0.5, 10: 0.5}},
+            {"trail_r": {5: 1.4, 10: 0.8}, "risk_pct": 0.10},
+            {"trail_r": {5: -1.0, 10: -1.0}, "risk_pct": 0.10},
+            {"trail_r": {5: 0.5, 10: 0.5}, "risk_pct": 0.10},
        ]
        b = bt._trailing_bucket(cands, 5)
        assert b["total"] == 3
@@ -130,6 +145,116 @@ class TestTrailingBucket:
        assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
        assert b["total_r"] == pytest.approx(0.9, abs=0.01)
        assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
+        # net: 0.02R round trip per candidate (risk_pct 0.10)
+        assert b["net_total_r"] == pytest.approx(0.9 - 3 * 0.02, abs=0.01)
+        assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
+
+
+class TestTimeExits:
+    def test_long_exits_at_horizon_close(self):
+        bars = [_bar(103, 99, 102), _bar(105, 101, 104), _bar(107, 103, 106)]
+        res = bt._time_exits("long", 100.0, 95.0, bars, (2, 5))
+        assert res[2] == pytest.approx(0.8)   # close 104 → +4% / 5% risk
+        assert res[5] == pytest.approx(1.2)   # only 3 bars → last close 106
+
+    def test_stop_on_first_bar_loses_everywhere(self):
+        res = bt._time_exits("long", 100.0, 95.0, [_bar(101, 94, 96), _bar(105, 101, 104)], (1, 5))
+        assert res[1] == pytest.approx(-1.0)
+        assert res[5] == pytest.approx(-1.0)
+
+    def test_stop_after_short_horizon_only_hits_long_hold(self):
+        # Day-2 close banked by the 2-day hold; the stop on day 3 only hits n=5.
+        bars = [_bar(103, 99, 102), _bar(104, 100, 103), _bar(101, 94, 95)]
+        res = bt._time_exits("long", 100.0, 95.0, bars, (2, 5))
+        assert res[2] == pytest.approx(0.6)   # close 103 → +3% / 5% risk
+        assert res[5] == pytest.approx(-1.0)
+
+    def test_short_direction(self):
+        res = bt._time_exits("short", 100.0, 105.0, [_bar(101, 95, 96)], (1,))
+        assert res[1] == pytest.approx(0.8)   # close 96 → +4% / 5% risk
+
+    def test_zero_risk_returns_zero(self):
+        res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,))
+        assert res[5] == 0.0
+
+
+class TestTimeExitBucket:
+    def test_bucket(self):
+        cands = [
+            {"time_r": {5: 1.4, 21: 0.8}, "risk_pct": 0.10},
+            {"time_r": {5: -1.0, 21: -1.0}, "risk_pct": 0.10},
+            {"time_r": {5: 0.5, 21: 0.5}, "risk_pct": 0.10},
+        ]
+        b = bt._time_exit_bucket(cands, 5)
+        assert b["hold_days"] == 5
+        assert b["total"] == 3
+        assert b["wins"] == 2
+        assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
+        assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
+        assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
+
+    def test_missing_hold_skipped(self):
+        b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
+        assert b["total"] == 0
+        assert b["avg_r"] is None
+
+
+def _acand(
+    rr: float = 2.0,
+    conf: float = 60.0,
+    action: str = "LONG_MODERATE",
+    mp: float | None = 90.0,
+    direction: str = "long",
+) -> dict:
+    """Ablation candidate: meets_core mirrors the default floors (min_rr 1.2,
+    min_confidence 55, exclude_neutral on)."""
+    meets = rr >= 1.2 and conf >= 55.0 and action != "NEUTRAL"
+    return {
+        "rr": rr,
+        "confidence": conf,
+        "action": action,
+        "momentum_percentile": mp,
+        "direction": direction,
+        "meets_core": meets,
+        "risk_level": "Low",
+        "target_hit": True,
+        "outcome": OUTCOME_TARGET_HIT,
+        "realized_r": rr,
+        "risk_pct": 0.05,
+    }
+
+
+class TestGateAblation:
+    ACTIVATION = {
+        "min_rr": 1.2,
+        "min_confidence": 55.0,
+        "exclude_neutral": True,
+        "require_high_conviction": False,
+        "exclude_conflicts": False,
+    }
+
+    def test_variant_counts(self):
+        cands = [
+            _acand(),                          # clears everything
+            _acand(conf=40.0),                 # fails confidence floor
+            _acand(rr=1.0),                    # fails R:R floor
+            _acand(action="NEUTRAL"),          # fails NEUTRAL exclusion
+            _acand(mp=50.0),                   # fails the momentum cutoff
+            _acand(direction="short", mp=95.0),  # short — gated out
+        ]
+        rows = {r["variant"]: r for r in bt._gate_ablation(cands, self.ACTIVATION, 80.0)}
+        assert rows["all_floors"]["total"] == 1
+        assert rows["no_confidence_floor"]["total"] == 2
+        assert rows["no_rr_floor"]["total"] == 2
+        assert rows["no_neutral_exclusion"]["total"] == 2
+        assert rows["momentum_only"]["total"] == 4
+        assert rows["all_floors"]["net_avg_r"] is not None
+
+    def test_threshold_zero_disables_momentum_gate(self):
+        # Floors only: the short and the low-momentum long both pass all_floors.
+        cands = [_acand(mp=50.0), _acand(direction="short", mp=None)]
+        rows = {r["variant"]: r for r in bt._gate_ablation(cands, self.ACTIVATION, 0.0)}
+        assert rows["all_floors"]["total"] == 2


 def test_bucket_stats_counts_and_expectancy():
@@ -149,6 +274,9 @@ def test_bucket_stats_counts_and_expectancy():
    # avg R = (3 + 2 - 1 + 0) / 4 = 1.0
    assert s["avg_r"] == 1.0
    assert s["total_r"] == 4.0
+    # net = gross minus a 0.04R round trip per candidate (risk_pct 0.05)
+    assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001)
+    assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01)


 def test_bucket_stats_empty():
@@ -156,6 +284,15 @@ def test_bucket_stats_empty():
    assert s["total"] == 0
    assert s["hit_rate"] is None
    assert s["avg_r"] is None
+    assert s["net_avg_r"] is None
+
+
+def test_bucket_stats_no_risk_pct_means_no_cost():
+    c = _cand(50, OUTCOME_TARGET_HIT, 2.0)
+    del c["risk_pct"]
+    s = bt._bucket_stats([c])
+    assert s["net_avg_r"] == s["avg_r"]
+    assert s["net_total_r"] == s["total_r"]


 def test_calibration_buckets():
@@ -202,11 +339,25 @@ async def test_run_backtest_smoke(session):
    # well-formed report
    assert report["tickers"] == 1
    assert isinstance(report["candidates"], int)
-    for key in ("overall_qualified", "overall_all", "by_direction", "calibration", "sweep"):
+    for key in (
+        "overall_qualified", "overall_all", "by_direction", "calibration", "sweep",
+        "gate_ablation", "time_exit_sweep",
+    ):
        assert key in report
    # the oscillating series should yield at least some resolved setups
    assert report["candidates"] >= 1

+    # cost assumption is reported, and every bucket carries net numbers
+    assert report["params"]["cost_per_side_pct"] == pytest.approx(bt.COST_PER_SIDE * 100)
+    assert "net_avg_r" in report["overall_all"]
+
+    # ablation baseline reproduces the qualified set exactly
+    ablation = {r["variant"]: r for r in report["gate_ablation"]}
+    assert ablation["all_floors"]["total"] == report["overall_qualified"]["total"]
+
+    # time-exit sweep covers the configured hold lengths
+    assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)
+
    # sweep: lowering the momentum-percentile cutoff can only add qualifiers
    sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
    counts = [r["total"] for r in sweep]