backtest: add min target-probability sweep
Re-applies the activation gate at several min_target_probability thresholds (60→30, other conditions fixed) over the already-replayed candidates, so the trade-off between how many setups qualify and their expectancy is visible in one table — the cheap "optimize" half of Phase 2. Candidates now carry meets_core + best_prob so the sweep needs no re-replay. New sweep table in BacktestPanel with the current threshold starred. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -160,6 +160,12 @@ def _window_setups(
|
|||||||
stop_loss=stop,
|
stop_loss=stop,
|
||||||
entry_price=entry,
|
entry_price=entry,
|
||||||
)
|
)
|
||||||
|
# meets_core = clears every gate EXCEPT target probability, so the report
|
||||||
|
# can sweep the min_target_probability threshold without re-replaying.
|
||||||
|
core_config = {**activation, "min_target_probability": 0.0}
|
||||||
|
meets_core = setup_qualifies(setup_ns, core_config)
|
||||||
|
best_prob = best_target_probability(setup_ns)
|
||||||
|
min_tp = float(activation.get("min_target_probability", 0.0))
|
||||||
out.append({
|
out.append({
|
||||||
"direction": direction,
|
"direction": direction,
|
||||||
"entry": entry,
|
"entry": entry,
|
||||||
@@ -168,10 +174,11 @@ def _window_setups(
|
|||||||
"rr": rr,
|
"rr": rr,
|
||||||
"confidence": confidences[direction],
|
"confidence": confidences[direction],
|
||||||
"primary_prob": float(primary["probability"]),
|
"primary_prob": float(primary["probability"]),
|
||||||
"best_prob": best_target_probability(setup_ns),
|
"best_prob": best_prob,
|
||||||
|
"meets_core": meets_core,
|
||||||
"action": action,
|
"action": action,
|
||||||
"risk_level": risk_level,
|
"risk_level": risk_level,
|
||||||
"qualified": setup_qualifies(setup_ns, activation),
|
"qualified": meets_core and best_prob >= min_tp,
|
||||||
})
|
})
|
||||||
return out
|
return out
|
||||||
|
|
||||||
@@ -208,6 +215,8 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -
|
|||||||
"rr": s["rr"],
|
"rr": s["rr"],
|
||||||
"confidence": s["confidence"],
|
"confidence": s["confidence"],
|
||||||
"primary_prob": s["primary_prob"],
|
"primary_prob": s["primary_prob"],
|
||||||
|
"best_prob": s["best_prob"],
|
||||||
|
"meets_core": s["meets_core"],
|
||||||
"qualified": s["qualified"],
|
"qualified": s["qualified"],
|
||||||
"outcome": outcome,
|
"outcome": outcome,
|
||||||
"target_hit": target_hit,
|
"target_hit": target_hit,
|
||||||
@@ -279,6 +288,15 @@ async def run_backtest(
|
|||||||
longs = [c for c in qualified if c["direction"] == "long"]
|
longs = [c for c in qualified if c["direction"] == "long"]
|
||||||
shorts = [c for c in qualified if c["direction"] == "short"]
|
shorts = [c for c in qualified if c["direction"] == "short"]
|
||||||
|
|
||||||
|
# Threshold sweep: re-apply the gate at several min_target_probability values
|
||||||
|
# (holding the other conditions fixed) so the trade-off between how many
|
||||||
|
# setups qualify and their expectancy is visible without re-replaying.
|
||||||
|
current_min_tp = float(activation.get("min_target_probability", 60.0))
|
||||||
|
sweep = []
|
||||||
|
for threshold in (60, 55, 50, 45, 40, 35, 30):
|
||||||
|
cands = [c for c in candidates if c["meets_core"] and c["best_prob"] >= threshold]
|
||||||
|
sweep.append({"min_target_probability": threshold, **_bucket_stats(cands)})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||||
"tickers": total,
|
"tickers": total,
|
||||||
@@ -292,6 +310,8 @@ async def run_backtest(
|
|||||||
"long": _bucket_stats(longs),
|
"long": _bucket_stats(longs),
|
||||||
"short": _bucket_stats(shorts),
|
"short": _bucket_stats(shorts),
|
||||||
},
|
},
|
||||||
|
"min_target_probability": current_min_tp,
|
||||||
|
"sweep": sweep,
|
||||||
"calibration": _calibration(candidates),
|
"calibration": _calibration(candidates),
|
||||||
"note": (
|
"note": (
|
||||||
"Sentiment & fundamentals held neutral (no point-in-time history). "
|
"Sentiment & fundamentals held neutral (no point-in-time history). "
|
||||||
|
|||||||
@@ -158,6 +158,53 @@ export function BacktestPanel() {
|
|||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{report.sweep && report.sweep.length > 0 && (
|
||||||
|
<div>
|
||||||
|
<p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
|
||||||
|
Min target-probability sweep
|
||||||
|
</p>
|
||||||
|
<p className="mb-2 text-[11px] text-gray-500">
|
||||||
|
How many setups qualify — and how they perform — at each gate threshold (other
|
||||||
|
gate conditions held fixed). Lower = more trades, watch that expectancy holds.
|
||||||
|
Your current setting is highlighted; set it in Admin → Settings → Activation.
|
||||||
|
</p>
|
||||||
|
<div className="glass overflow-x-auto">
|
||||||
|
<table className="w-full text-sm">
|
||||||
|
<thead>
|
||||||
|
<tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
|
||||||
|
<th className="px-4 py-2.5">Min Target Prob</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Qualified</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Wins</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Losses</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Hit Rate</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Avg R</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Total R</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{report.sweep.map((row) => {
|
||||||
|
const current = Math.abs(row.min_target_probability - report.min_target_probability) < 0.5;
|
||||||
|
return (
|
||||||
|
<tr key={row.min_target_probability} className={`border-b border-white/[0.04] ${current ? 'bg-blue-400/10' : ''}`}>
|
||||||
|
<td className="num px-4 py-2.5 text-gray-200">
|
||||||
|
{current && <span className="mr-1 text-blue-300">★</span>}
|
||||||
|
{row.min_target_probability}%
|
||||||
|
</td>
|
||||||
|
<td className="num px-4 py-2.5 text-right text-gray-200">{row.total}</td>
|
||||||
|
<td className="num px-4 py-2.5 text-right text-emerald-400">{row.wins}</td>
|
||||||
|
<td className="num px-4 py-2.5 text-right text-red-400">{row.losses}</td>
|
||||||
|
<td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(row.hit_rate)}</td>
|
||||||
|
<td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
|
||||||
|
<td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
|
||||||
|
</tr>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
|
<p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
|
||||||
Probability calibration
|
Probability calibration
|
||||||
|
|||||||
@@ -196,6 +196,10 @@ export interface BacktestCalibrationRow {
|
|||||||
realized_hit_rate: number;
|
realized_hit_rate: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface BacktestSweepRow extends BacktestBucket {
|
||||||
|
min_target_probability: number;
|
||||||
|
}
|
||||||
|
|
||||||
export interface BacktestReport {
|
export interface BacktestReport {
|
||||||
generated_at: string;
|
generated_at: string;
|
||||||
tickers: number;
|
tickers: number;
|
||||||
@@ -205,6 +209,8 @@ export interface BacktestReport {
|
|||||||
overall_qualified: BacktestBucket;
|
overall_qualified: BacktestBucket;
|
||||||
overall_all: BacktestBucket;
|
overall_all: BacktestBucket;
|
||||||
by_direction: Record<string, BacktestBucket>;
|
by_direction: Record<string, BacktestBucket>;
|
||||||
|
min_target_probability: number;
|
||||||
|
sweep: BacktestSweepRow[];
|
||||||
calibration: BacktestCalibrationRow[];
|
calibration: BacktestCalibrationRow[];
|
||||||
note: string;
|
note: string;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -108,10 +108,15 @@ async def test_run_backtest_smoke(session):
|
|||||||
# well-formed report
|
# well-formed report
|
||||||
assert report["tickers"] == 1
|
assert report["tickers"] == 1
|
||||||
assert isinstance(report["candidates"], int)
|
assert isinstance(report["candidates"], int)
|
||||||
for key in ("overall_qualified", "overall_all", "by_direction", "calibration"):
|
for key in ("overall_qualified", "overall_all", "by_direction", "calibration", "sweep"):
|
||||||
assert key in report
|
assert key in report
|
||||||
# the oscillating series should yield at least some resolved setups
|
# the oscillating series should yield at least some resolved setups
|
||||||
assert report["candidates"] >= 1
|
assert report["candidates"] >= 1
|
||||||
|
|
||||||
|
# sweep: lowering the threshold can only add qualifiers, never remove them
|
||||||
|
sweep = sorted(report["sweep"], key=lambda r: r["min_target_probability"], reverse=True)
|
||||||
|
counts = [r["total"] for r in sweep]
|
||||||
|
assert counts == sorted(counts) # ascending as threshold descends
|
||||||
# every calibration row is internally consistent
|
# every calibration row is internally consistent
|
||||||
for row in report["calibration"]:
|
for row in report["calibration"]:
|
||||||
assert 0 <= row["realized_hit_rate"] <= 100
|
assert 0 <= row["realized_hit_rate"] <= 100
|
||||||
|
|||||||
Reference in New Issue
Block a user