backtest: add min target-probability sweep
Deploy / lint (push) Successful in 7s
Deploy / test (push) Successful in 40s
Deploy / deploy (push) Successful in 26s

Re-applies the activation gate at several min_target_probability thresholds
(60→30, other conditions fixed) over the already-replayed candidates, so the
trade-off between how many setups qualify and their expectancy is visible in one
table — the cheap "optimize" half of Phase 2. Candidates now carry meets_core +
best_prob so the sweep needs no re-replay. New sweep table in BacktestPanel with
the current threshold starred.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-16 06:13:30 +02:00
parent 9d2e1e74bf
commit 050abc6f71
4 changed files with 81 additions and 3 deletions
+22 -2
View File
@@ -160,6 +160,12 @@ def _window_setups(
stop_loss=stop,
entry_price=entry,
)
# meets_core = clears every gate EXCEPT target probability, so the report
# can sweep the min_target_probability threshold without re-replaying.
core_config = {**activation, "min_target_probability": 0.0}
meets_core = setup_qualifies(setup_ns, core_config)
best_prob = best_target_probability(setup_ns)
min_tp = float(activation.get("min_target_probability", 0.0))
out.append({
"direction": direction,
"entry": entry,
@@ -168,10 +174,11 @@ def _window_setups(
"rr": rr,
"confidence": confidences[direction],
"primary_prob": float(primary["probability"]),
"best_prob": best_target_probability(setup_ns),
"best_prob": best_prob,
"meets_core": meets_core,
"action": action,
"risk_level": risk_level,
"qualified": setup_qualifies(setup_ns, activation),
"qualified": meets_core and best_prob >= min_tp,
})
return out
@@ -208,6 +215,8 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
"rr": s["rr"],
"confidence": s["confidence"],
"primary_prob": s["primary_prob"],
"best_prob": s["best_prob"],
"meets_core": s["meets_core"],
"qualified": s["qualified"],
"outcome": outcome,
"target_hit": target_hit,
@@ -279,6 +288,15 @@ async def run_backtest(
longs = [c for c in qualified if c["direction"] == "long"]
shorts = [c for c in qualified if c["direction"] == "short"]
# Threshold sweep: re-apply the gate at several min_target_probability values
# (holding the other conditions fixed) so the trade-off between how many
# setups qualify and their expectancy is visible without re-replaying.
current_min_tp = float(activation.get("min_target_probability", 60.0))
sweep = []
for threshold in (60, 55, 50, 45, 40, 35, 30):
cands = [c for c in candidates if c["meets_core"] and c["best_prob"] >= threshold]
sweep.append({"min_target_probability": threshold, **_bucket_stats(cands)})
return {
"generated_at": datetime.now(timezone.utc).isoformat(),
"tickers": total,
@@ -292,6 +310,8 @@ async def run_backtest(
"long": _bucket_stats(longs),
"short": _bucket_stats(shorts),
},
"min_target_probability": current_min_tp,
"sweep": sweep,
"calibration": _calibration(candidates),
"note": (
"Sentiment & fundamentals held neutral (no point-in-time history). "
@@ -158,6 +158,53 @@ export function BacktestPanel() {
</table>
</div>
{report.sweep && report.sweep.length > 0 && (
<div>
<p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
Min target-probability sweep
</p>
<p className="mb-2 text-[11px] text-gray-500">
How many setups qualify and how they perform at each gate threshold (other
gate conditions held fixed). Lower = more trades, watch that expectancy holds.
Your current setting is highlighted; set it in Admin Settings Activation.
</p>
<div className="glass overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
<th className="px-4 py-2.5">Min Target Prob</th>
<th className="px-4 py-2.5 text-right">Qualified</th>
<th className="px-4 py-2.5 text-right">Wins</th>
<th className="px-4 py-2.5 text-right">Losses</th>
<th className="px-4 py-2.5 text-right">Hit Rate</th>
<th className="px-4 py-2.5 text-right">Avg R</th>
<th className="px-4 py-2.5 text-right">Total R</th>
</tr>
</thead>
<tbody>
{report.sweep.map((row) => {
const current = Math.abs(row.min_target_probability - report.min_target_probability) < 0.5;
return (
<tr key={row.min_target_probability} className={`border-b border-white/[0.04] ${current ? 'bg-blue-400/10' : ''}`}>
<td className="num px-4 py-2.5 text-gray-200">
{current && <span className="mr-1 text-blue-300"></span>}
{row.min_target_probability}%
</td>
<td className="num px-4 py-2.5 text-right text-gray-200">{row.total}</td>
<td className="num px-4 py-2.5 text-right text-emerald-400">{row.wins}</td>
<td className="num px-4 py-2.5 text-right text-red-400">{row.losses}</td>
<td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(row.hit_rate)}</td>
<td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
<td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
</tr>
);
})}
</tbody>
</table>
</div>
</div>
)}
<div>
<p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
Probability calibration
+6
View File
@@ -196,6 +196,10 @@ export interface BacktestCalibrationRow {
realized_hit_rate: number;
}
export interface BacktestSweepRow extends BacktestBucket {
min_target_probability: number;
}
export interface BacktestReport {
generated_at: string;
tickers: number;
@@ -205,6 +209,8 @@ export interface BacktestReport {
overall_qualified: BacktestBucket;
overall_all: BacktestBucket;
by_direction: Record<string, BacktestBucket>;
min_target_probability: number;
sweep: BacktestSweepRow[];
calibration: BacktestCalibrationRow[];
note: string;
}
+6 -1
View File
@@ -108,10 +108,15 @@ async def test_run_backtest_smoke(session):
# well-formed report
assert report["tickers"] == 1
assert isinstance(report["candidates"], int)
for key in ("overall_qualified", "overall_all", "by_direction", "calibration"):
for key in ("overall_qualified", "overall_all", "by_direction", "calibration", "sweep"):
assert key in report
# the oscillating series should yield at least some resolved setups
assert report["candidates"] >= 1
# sweep: lowering the threshold can only add qualifiers, never remove them
sweep = sorted(report["sweep"], key=lambda r: r["min_target_probability"], reverse=True)
counts = [r["total"] for r in sweep]
assert counts == sorted(counts) # ascending as threshold descends
# every calibration row is internally consistent
for row in report["calibration"]:
assert 0 <= row["realized_hit_rate"] <= 100