backtest: add min target-probability sweep

Re-applies the activation gate at several min_target_probability thresholds (60→30, other conditions fixed) over the already-replayed candidates, so the trade-off between how many setups qualify and their expectancy is visible in one table — the cheap "optimize" half of Phase 2. Candidates now carry meets_core + best_prob so the sweep needs no re-replay. New sweep table in BacktestPanel with the current threshold starred. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 06:13:30 +02:00
parent 9d2e1e74bf
commit 050abc6f71
4 changed files with 81 additions and 3 deletions
@@ -160,6 +160,12 @@ def _window_setups(
            stop_loss=stop,
            entry_price=entry,
        )
        # meets_core = clears every gate EXCEPT target probability, so the report
        # can sweep the min_target_probability threshold without re-replaying.
        core_config = {**activation, "min_target_probability": 0.0}
        meets_core = setup_qualifies(setup_ns, core_config)
        best_prob = best_target_probability(setup_ns)
        min_tp = float(activation.get("min_target_probability", 0.0))
        out.append({
            "direction": direction,
            "entry": entry,
@@ -168,10 +174,11 @@ def _window_setups(
            "rr": rr,
            "confidence": confidences[direction],
            "primary_prob": float(primary["probability"]),
-            "best_prob": best_target_probability(setup_ns),
+            "best_prob": best_prob,
            "meets_core": meets_core,
            "action": action,
            "risk_level": risk_level,
-            "qualified": setup_qualifies(setup_ns, activation),
+            "qualified": meets_core and best_prob >= min_tp,
        })
    return out
@@ -208,6 +215,8 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
                "rr": s["rr"],
                "confidence": s["confidence"],
                "primary_prob": s["primary_prob"],
                "best_prob": s["best_prob"],
                "meets_core": s["meets_core"],
                "qualified": s["qualified"],
                "outcome": outcome,
                "target_hit": target_hit,
@@ -279,6 +288,15 @@ async def run_backtest(
    longs = [c for c in qualified if c["direction"] == "long"]
    shorts = [c for c in qualified if c["direction"] == "short"]
    # Threshold sweep: re-apply the gate at several min_target_probability values
    # (holding the other conditions fixed) so the trade-off between how many
    # setups qualify and their expectancy is visible without re-replaying.
    current_min_tp = float(activation.get("min_target_probability", 60.0))
    sweep = []
    for threshold in (60, 55, 50, 45, 40, 35, 30):
        cands = [c for c in candidates if c["meets_core"] and c["best_prob"] >= threshold]
        sweep.append({"min_target_probability": threshold, **_bucket_stats(cands)})
    return {
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "tickers": total,
@@ -292,6 +310,8 @@ async def run_backtest(
            "long": _bucket_stats(longs),
            "short": _bucket_stats(shorts),
        },
        "min_target_probability": current_min_tp,
        "sweep": sweep,
        "calibration": _calibration(candidates),
        "note": (
            "Sentiment & fundamentals held neutral (no point-in-time history). "
@@ -158,6 +158,53 @@ export function BacktestPanel() {
              </table>
            </div>
            {report.sweep && report.sweep.length > 0 && (
              <div>
                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
                  Min target-probability sweep
                </p>
                <p className="mb-2 text-[11px] text-gray-500">
                  How many setups qualify — and how they perform — at each gate threshold (other
                  gate conditions held fixed). Lower = more trades, watch that expectancy holds.
                  Your current setting is highlighted; set it in Admin → Settings → Activation.
                </p>
                <div className="glass overflow-x-auto">
                  <table className="w-full text-sm">
                    <thead>
                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
                        <th className="px-4 py-2.5">Min Target Prob</th>
                        <th className="px-4 py-2.5 text-right">Qualified</th>
                        <th className="px-4 py-2.5 text-right">Wins</th>
                        <th className="px-4 py-2.5 text-right">Losses</th>
                        <th className="px-4 py-2.5 text-right">Hit Rate</th>
                        <th className="px-4 py-2.5 text-right">Avg R</th>
                        <th className="px-4 py-2.5 text-right">Total R</th>
                      </tr>
                    </thead>
                    <tbody>
                      {report.sweep.map((row) => {
                        const current = Math.abs(row.min_target_probability - report.min_target_probability) < 0.5;
                        return (
                          <tr key={row.min_target_probability} className={`border-b border-white/[0.04] ${current ? 'bg-blue-400/10' : ''}`}>
                            <td className="num px-4 py-2.5 text-gray-200">
                              {current && <span className="mr-1 text-blue-300">★</span>}
                              {row.min_target_probability}%
                            </td>
                            <td className="num px-4 py-2.5 text-right text-gray-200">{row.total}</td>
                            <td className="num px-4 py-2.5 text-right text-emerald-400">{row.wins}</td>
                            <td className="num px-4 py-2.5 text-right text-red-400">{row.losses}</td>
                            <td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(row.hit_rate)}</td>
                            <td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
                            <td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
                          </tr>
                        );
                      })}
                    </tbody>
                  </table>
                </div>
              </div>
            )}
            <div>
              <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
                Probability calibration
@@ -196,6 +196,10 @@ export interface BacktestCalibrationRow {
  realized_hit_rate: number;
 }
 export interface BacktestSweepRow extends BacktestBucket {
  min_target_probability: number;
 }
 export interface BacktestReport {
  generated_at: string;
  tickers: number;
@@ -205,6 +209,8 @@ export interface BacktestReport {
  overall_qualified: BacktestBucket;
  overall_all: BacktestBucket;
  by_direction: Record<string, BacktestBucket>;
  min_target_probability: number;
  sweep: BacktestSweepRow[];
  calibration: BacktestCalibrationRow[];
  note: string;
 }
@@ -108,10 +108,15 @@ async def test_run_backtest_smoke(session):
    # well-formed report
    assert report["tickers"] == 1
    assert isinstance(report["candidates"], int)
-    for key in ("overall_qualified", "overall_all", "by_direction", "calibration"):
+    for key in ("overall_qualified", "overall_all", "by_direction", "calibration", "sweep"):
        assert key in report
    # the oscillating series should yield at least some resolved setups
    assert report["candidates"] >= 1
    # sweep: lowering the threshold can only add qualifiers, never remove them
    sweep = sorted(report["sweep"], key=lambda r: r["min_target_probability"], reverse=True)
    counts = [r["total"] for r in sweep]
    assert counts == sorted(counts)  # ascending as threshold descends
    # every calibration row is internally consistent
    for row in report["calibration"]:
        assert 0 <= row["realized_hit_rate"] <= 100