feat: take-profit exit sweep in the backtest (alongside target-vs-stop)

The target-vs-stop model counts a near-miss of a far S/R target as a full loss and ignores the partial gains you actually bank — so it measures a different strategy than "scalp the early pop, take +8%". Add a realistic take-profit exit model next to it (original untouched). Per setup the replay now also records risk%, whether the stop was hit, the favourable excursion reachable before the stop (MFE), and the horizon-close move. From those a fixed-take-profit sweep (4/6/8/10/12/15%) is scored in R: bank +X% if reached before the stop, else -1R, else the horizon close. Hit rate = how often +X% was banked (the MFE CDF), so you can pick the EV-optimal TP without top-ticking fantasy. Shown as a new table in the Backtest panel; the IC, calibration and momentum sweep are unchanged. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-30 16:56:32 +02:00
parent 6511a1020b
commit c63951ca02
4 changed files with 199 additions and 0 deletions
@@ -215,6 +215,42 @@ def _window_setups(
    return out
 def _tp_primitives(
    direction: str, entry: float, stop: float, forward: list, horizon: int
 ) -> tuple[float, bool, float, float]:
    """Primitives for the take-profit exit model, from the bars after detection.
    Returns ``(risk_pct, stopped, mfe_pct, close_pct)``:
      - ``risk_pct``  fraction from entry to stop (the 1R distance)
      - ``stopped``   whether the stop was hit within the horizon
      - ``mfe_pct``   best favourable excursion (fraction) reachable *before* the
                      stop — strictly before the stop bar, so a same-bar tp+stop
                      counts as a loss (matching the conservative target model);
                      over the whole horizon if the stop is never hit
      - ``close_pct`` directional return at the horizon-end close (the timeout exit)
    From these any fixed take-profit level can be scored without re-walking bars:
    tp reached before stop (``mfe_pct >= tp``) → +tp; else stop → −1R; else the
    horizon-close move.
    """
    long = direction == "long"
    risk_pct = abs(entry - stop) / entry if entry else 0.0
    bars = forward[:horizon]
    if not bars:
        return risk_pct, False, 0.0, 0.0
    mfe = 0.0
    stopped = False
    for r in bars:
        if (r.low <= stop) if long else (r.high >= stop):
            stopped = True
            break
        fav = (r.high - entry) / entry if long else (entry - r.low) / entry
        if fav > mfe:
            mfe = fav
    close_pct = ((bars[-1].close - entry) / entry) * (1.0 if long else -1.0)
    return risk_pct, stopped, mfe, close_pct
 def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -> list[dict]:
    """Walk one ticker's history weekly, building setups and their realized outcomes."""
    candidates: list[dict] = []
@@ -240,6 +276,11 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
                realized_r = -1.0
            else:  # expired
                realized_r = 0.0
            # Take-profit exit primitives (parallel to the target-vs-stop outcome
            # above; aggregated separately into the take-profit sweep).
            risk_pct, tp_stopped, mfe_pct, tp_close_pct = _tp_primitives(
                s["direction"], s["entry"], s["stop"], forward, HORIZON
            )
            iso = records[i].date.isocalendar()
            candidates.append({
                "symbol": symbol,
@@ -255,6 +296,10 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
                "outcome": outcome,
                "target_hit": target_hit,
                "realized_r": realized_r,
                "risk_pct": risk_pct,
                "tp_stopped": tp_stopped,
                "mfe_pct": mfe_pct,
                "tp_close_pct": tp_close_pct,
            })
    return candidates
@@ -276,6 +321,39 @@ def _bucket_stats(cands: list[dict]) -> dict:
    }
 # Fixed take-profit levels (fractions) swept for the take-profit exit model.
 TP_LEVELS = (0.04, 0.06, 0.08, 0.10, 0.12, 0.15)
 def _take_profit_bucket(cands: list[dict], tp: float) -> dict:
    """Stats for a fixed take-profit exit at +``tp`` (fraction): bank +tp if it's
    reached before the stop, else −1R on a stop, else exit at the horizon close.
    Results are in R (gain% / risk%) so they're comparable to the target model.
    ``hit_rate`` here = share that reached +tp before the stop (the MFE CDF)."""
    rs: list[float] = []
    wins = 0
    for c in cands:
        risk = c.get("risk_pct") or 0.0
        if risk <= 0:
            continue
        if c.get("mfe_pct", 0.0) >= tp:
            rs.append(tp / risk)
            wins += 1
        elif c.get("tp_stopped"):
            rs.append(-1.0)
        else:
            rs.append((c.get("tp_close_pct", 0.0)) / risk)
    total = len(rs)
    return {
        "tp_pct": round(tp * 100, 1),
        "total": total,
        "wins": wins,
        "hit_rate": round(wins / total * 100, 1) if total else None,
        "avg_r": round(sum(rs) / total, 3) if total else None,
        "total_r": round(sum(rs), 2) if total else None,
    }
 def _calibration(cands: list[dict]) -> list[dict]:
    """Predicted target probability vs realized hit rate, per probability bucket."""
    rows: list[dict] = []
@@ -710,6 +788,7 @@ async def run_backtest(
        },
        "min_momentum_percentile": current_min_pct,
        "sweep": sweep,
        "take_profit_sweep": [_take_profit_bucket(qualified, tp) for tp in TP_LEVELS],
        "calibration": _calibration(candidates),
        "signal_eval": _signal_evaluation(collected),
        "signal_eval_note": (
@@ -85,6 +85,11 @@ export function BacktestPanel() {
  const queryClient = useQueryClient();
  const toast = useToast();
  const bestTpAvgR =
    report?.take_profit_sweep && report.take_profit_sweep.length > 0
      ? Math.max(...report.take_profit_sweep.map((r) => r.avg_r ?? -Infinity))
      : null;
  const run = useMutation({
    mutationFn: () => triggerJob('backtest'),
    onSuccess: (res) => {
@@ -232,6 +237,54 @@ export function BacktestPanel() {
              </div>
            )}
            {report.take_profit_sweep && report.take_profit_sweep.length > 0 && (
              <div>
                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
                  Take-profit exit (alternative to the target above)
                </p>
                <p className="mb-2 text-[11px] text-gray-500">
                  Models a realistic exit instead of waiting for the far S/R target: bank{' '}
                  <span className="text-gray-300">+X%</span> if price reaches it before the stop, else −1R on
                  the stop, else exit at the {report.params.horizon_days}-day close. In R, so it compares to the
                  target model above. <span className="text-gray-300">Hit Rate = how often you'd have banked
                  +X%</span> (how far winners actually run) — no top-ticking, it's the level you'd really set.
                  ★ = best avg R.
                </p>
                <div className="glass overflow-x-auto">
                  <table className="w-full text-sm">
                    <thead>
                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
                        <th className="px-4 py-2.5">Take-profit</th>
                        <th className="px-4 py-2.5 text-right">Setups</th>
                        <th className="px-4 py-2.5 text-right">Hit (banked)</th>
                        <th className="px-4 py-2.5 text-right">Hit Rate</th>
                        <th className="px-4 py-2.5 text-right">Avg R</th>
                        <th className="px-4 py-2.5 text-right">Total R</th>
                      </tr>
                    </thead>
                    <tbody>
                      {report.take_profit_sweep.map((row) => {
                        const best = row.avg_r != null && row.avg_r === bestTpAvgR;
                        return (
                          <tr key={row.tp_pct} className={`border-b border-white/[0.04] ${best ? 'bg-emerald-400/[0.06]' : ''}`}>
                            <td className="num px-4 py-2.5 text-gray-200">
                              {best && <span className="mr-1 text-emerald-300">★</span>}
                              +{row.tp_pct}%
                            </td>
                            <td className="num px-4 py-2.5 text-right text-gray-200">{row.total}</td>
                            <td className="num px-4 py-2.5 text-right text-emerald-400">{row.wins}</td>
                            <td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(row.hit_rate)}</td>
                            <td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
                            <td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
                          </tr>
                        );
                      })}
                    </tbody>
                  </table>
                </div>
              </div>
            )}
            <div>
              <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
                Probability calibration
@@ -230,6 +230,15 @@ export interface BacktestSweepRow extends BacktestBucket {
  min_momentum_percentile: number;
 }
 export interface BacktestTakeProfitRow {
  tp_pct: number;
  total: number;
  wins: number;
  hit_rate: number | null;
  avg_r: number | null;
  total_r: number | null;
 }
 export interface BacktestSignalEvalRow {
  signal: string;
  weeks: number;
@@ -252,6 +261,7 @@ export interface BacktestReport {
  by_direction: Record<string, BacktestBucket>;
  min_momentum_percentile: number;
  sweep: BacktestSweepRow[];
  take_profit_sweep?: BacktestTakeProfitRow[];
  calibration: BacktestCalibrationRow[];
  signal_eval?: BacktestSignalEvalRow[];
  signal_eval_note?: string;
@@ -4,6 +4,7 @@ from __future__ import annotations
 import math
 from datetime import date, timedelta
 from types import SimpleNamespace
 import pytest
@@ -38,6 +39,62 @@ def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, directio
    }
 def _bar(high: float, low: float, close: float) -> SimpleNamespace:
    return SimpleNamespace(high=high, low=low, close=close)
 class TestTakeProfitPrimitives:
    def test_long_tp_reachable_before_stop(self):
        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
        assert risk == pytest.approx(0.05)
        assert stopped is False
        assert mfe == pytest.approx(0.09)
        assert close_pct == pytest.approx(0.08)
    def test_long_stop_zeroes_mfe(self):
        # Low pierces the stop on the only bar → loss, nothing banked before it.
        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
        assert stopped is True
        assert mfe == pytest.approx(0.0)
        assert close_pct == pytest.approx(-0.04)
    def test_long_drift_no_trigger(self):
        bars = [_bar(102, 99, 101), _bar(103, 100, 102)]
        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
        assert stopped is False
        assert mfe == pytest.approx(0.03)
        assert close_pct == pytest.approx(0.02)
    def test_short_direction(self):
        # short entry 100, stop 105; price falls → favourable = (entry - low)/entry
        risk, stopped, mfe, close_pct = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
        assert risk == pytest.approx(0.05)
        assert stopped is False
        assert mfe == pytest.approx(0.08)
        assert close_pct == pytest.approx(0.07)
 class TestTakeProfitBucket:
    def test_bucket_mix(self):
        cands = [
            {"risk_pct": 0.05, "mfe_pct": 0.09, "tp_stopped": False, "tp_close_pct": 0.08},  # +1.6R win
            {"risk_pct": 0.05, "mfe_pct": 0.02, "tp_stopped": True, "tp_close_pct": -0.04},  # -1R stop
            {"risk_pct": 0.05, "mfe_pct": 0.03, "tp_stopped": False, "tp_close_pct": 0.01},  # +0.2R timeout
        ]
        b = bt._take_profit_bucket(cands, 0.08)
        assert b["total"] == 3
        assert b["wins"] == 1
        assert b["hit_rate"] == pytest.approx(33.3, abs=0.1)
        assert b["total_r"] == pytest.approx(0.8, abs=0.01)
        assert b["avg_r"] == pytest.approx(0.267, abs=0.01)
    def test_zero_risk_skipped(self):
        cands = [{"risk_pct": 0.0, "mfe_pct": 0.2, "tp_stopped": False, "tp_close_pct": 0.1}]
        b = bt._take_profit_bucket(cands, 0.08)
        assert b["total"] == 0
        assert b["avg_r"] is None
 def test_bucket_stats_counts_and_expectancy():
    cands = [
        _cand(70, OUTCOME_TARGET_HIT, 3.0),   # +3R win