fix: judge robustness under the recommended exit, not the abandoned one
The robustness warning was computed on the target-model distribution while the same panel recommends the hold exit — internally inconsistent. _robustness_stats (median, profit factor, ex-top-5% expectancy) is now shared by _bucket_stats and _time_exit_bucket, the time-exit table shows Median Net R and Ex-Top-5% per hold length, and _build_recommendation reads the trimmed expectancy from the recommended exit's bucket (falling back to the target model when no hold is recommended). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -375,14 +375,6 @@ def _bucket_stats(cands: list[dict]) -> dict:
|
|||||||
holds = [c["hold_days"] for c in cands if c.get("hold_days")]
|
holds = [c["hold_days"] for c in cands if c.get("hold_days")]
|
||||||
avg_hold = sum(holds) / len(holds) if holds else None
|
avg_hold = sum(holds) / len(holds) if holds else None
|
||||||
net_avg = sum(net_rs) / len(net_rs) if net_rs else None
|
net_avg = sum(net_rs) / len(net_rs) if net_rs else None
|
||||||
# Robustness: does the edge depend on a handful of outliers? Median and
|
|
||||||
# profit factor describe the distribution; ex-top-5% is the expectancy with
|
|
||||||
# the biggest winners removed — if it stays positive, the edge isn't a
|
|
||||||
# lottery ticket.
|
|
||||||
gains = sum(r for r in net_rs if r > 0)
|
|
||||||
losses_abs = -sum(r for r in net_rs if r < 0)
|
|
||||||
trim_n = math.ceil(len(net_rs) * 0.05) if net_rs else 0
|
|
||||||
trimmed = sorted(net_rs, reverse=True)[trim_n:] if net_rs else []
|
|
||||||
return {
|
return {
|
||||||
"total": len(cands),
|
"total": len(cands),
|
||||||
"wins": wins,
|
"wins": wins,
|
||||||
@@ -400,7 +392,21 @@ def _bucket_stats(cands: list[dict]) -> dict:
|
|||||||
"net_r_per_day": (
|
"net_r_per_day": (
|
||||||
round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None
|
round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None
|
||||||
),
|
),
|
||||||
"median_net_r": round(statistics.median(net_rs), 3) if net_rs else None,
|
**_robustness_stats(net_rs),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _robustness_stats(net_rs: list[float]) -> dict:
|
||||||
|
"""Distribution-shape stats: the median (typical) trade, gross wins vs
|
||||||
|
losses, and the expectancy with the top 5% of winners removed — the direct
|
||||||
|
test of whether the edge depends on a handful of outliers."""
|
||||||
|
if not net_rs:
|
||||||
|
return {"median_net_r": None, "profit_factor": None, "net_avg_r_ex_top5": None}
|
||||||
|
gains = sum(r for r in net_rs if r > 0)
|
||||||
|
losses_abs = -sum(r for r in net_rs if r < 0)
|
||||||
|
trimmed = sorted(net_rs, reverse=True)[math.ceil(len(net_rs) * 0.05):]
|
||||||
|
return {
|
||||||
|
"median_net_r": round(statistics.median(net_rs), 3),
|
||||||
"profit_factor": round(gains / losses_abs, 2) if losses_abs > 0 else None,
|
"profit_factor": round(gains / losses_abs, 2) if losses_abs > 0 else None,
|
||||||
"net_avg_r_ex_top5": (
|
"net_avg_r_ex_top5": (
|
||||||
round(sum(trimmed) / len(trimmed), 3) if trimmed else None
|
round(sum(trimmed) / len(trimmed), 3) if trimmed else None
|
||||||
@@ -466,6 +472,7 @@ def _time_exit_bucket(cands: list[dict], hold_days: int) -> dict:
|
|||||||
"net_r_per_day": (
|
"net_r_per_day": (
|
||||||
round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None
|
round(net_avg / avg_hold, 4) if net_avg is not None and avg_hold else None
|
||||||
),
|
),
|
||||||
|
**_robustness_stats(net_rs),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1190,15 +1197,27 @@ def _build_recommendation(report: dict) -> dict:
|
|||||||
),
|
),
|
||||||
})
|
})
|
||||||
|
|
||||||
# Robustness: does the edge survive without the biggest winners?
|
# Robustness: does the edge survive without the biggest winners? Judged on
|
||||||
trimmed = q.get("net_avg_r_ex_top5")
|
# the RECOMMENDED exit — outlier dependence under an exit we'd abandon
|
||||||
|
# would be the wrong warning.
|
||||||
|
hold_recommended = (
|
||||||
|
best_hold is not None and target_net is not None
|
||||||
|
and best_hold["net_avg_r"] > target_net + _EXIT_SWITCH_THRESHOLD
|
||||||
|
)
|
||||||
|
if hold_recommended and best_hold.get("net_avg_r_ex_top5") is not None:
|
||||||
|
trimmed = best_hold["net_avg_r_ex_top5"]
|
||||||
|
basis = f"under the recommended {best_hold['hold_days']}d hold"
|
||||||
|
else:
|
||||||
|
trimmed = q.get("net_avg_r_ex_top5")
|
||||||
|
basis = "under the S/R target exit"
|
||||||
if trimmed is not None:
|
if trimmed is not None:
|
||||||
if trimmed > 0:
|
if trimmed > 0:
|
||||||
items.append({
|
items.append({
|
||||||
"topic": "robustness",
|
"topic": "robustness",
|
||||||
"text": (
|
"text": (
|
||||||
f"Robustness: expectancy survives removing the top 5% of winners "
|
f"Robustness: expectancy survives removing the top 5% of winners "
|
||||||
f"({trimmed:+.2f}R net/trade) — the edge is not a handful of outliers."
|
f"({trimmed:+.2f}R net/trade {basis}) — the edge is not a handful "
|
||||||
|
"of outliers."
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
@@ -1206,13 +1225,13 @@ def _build_recommendation(report: dict) -> dict:
|
|||||||
"topic": "robustness",
|
"topic": "robustness",
|
||||||
"text": (
|
"text": (
|
||||||
f"Robustness WARNING: without the top 5% of winners the edge disappears "
|
f"Robustness WARNING: without the top 5% of winners the edge disappears "
|
||||||
f"({trimmed:+.2f}R net/trade) — outlier-dependent, treat the headline "
|
f"({trimmed:+.2f}R net/trade {basis}) — outlier-dependent, treat the "
|
||||||
"expectancy with caution."
|
"headline expectancy with caution."
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
|
|
||||||
headline = None
|
headline = None
|
||||||
if best_hold is not None and target_net is not None and best_hold["net_avg_r"] > target_net + _EXIT_SWITCH_THRESHOLD:
|
if hold_recommended:
|
||||||
cagr_note = (
|
cagr_note = (
|
||||||
f" (~{hold_sim['cagr_pct']:.0f}% CAGR simulated)"
|
f" (~{hold_sim['cagr_pct']:.0f}% CAGR simulated)"
|
||||||
if hold_sim is not None and hold_sim.get("cagr_pct") is not None
|
if hold_sim is not None and hold_sim.get("cagr_pct") is not None
|
||||||
|
|||||||
@@ -414,6 +414,8 @@ export function BacktestPanel() {
|
|||||||
<th className="px-4 py-2.5 text-right">Worst R</th>
|
<th className="px-4 py-2.5 text-right">Worst R</th>
|
||||||
<th className="px-4 py-2.5 text-right">Avg Hold</th>
|
<th className="px-4 py-2.5 text-right">Avg Hold</th>
|
||||||
<th className="px-4 py-2.5 text-right">Net R/d</th>
|
<th className="px-4 py-2.5 text-right">Net R/d</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Median Net R</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Ex-Top-5%</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
@@ -435,6 +437,8 @@ export function BacktestPanel() {
|
|||||||
<td className="num px-4 py-2.5 text-right text-red-400">{fmtR(row.worst_r)}</td>
|
<td className="num px-4 py-2.5 text-right text-red-400">{fmtR(row.worst_r)}</td>
|
||||||
<td className="num px-4 py-2.5 text-right text-gray-400">{fmtDays(row.avg_hold_days)}</td>
|
<td className="num px-4 py-2.5 text-right text-gray-400">{fmtDays(row.avg_hold_days)}</td>
|
||||||
<td className={`num px-4 py-2.5 text-right ${rColor(row.net_r_per_day ?? null)}`}>{fmtRPerDay(row.net_r_per_day)}</td>
|
<td className={`num px-4 py-2.5 text-right ${rColor(row.net_r_per_day ?? null)}`}>{fmtRPerDay(row.net_r_per_day)}</td>
|
||||||
|
<td className={`num px-4 py-2.5 text-right ${rColor(row.median_net_r ?? null)}`}>{fmtR(row.median_net_r)}</td>
|
||||||
|
<td className={`num px-4 py-2.5 text-right ${rColor(row.net_avg_r_ex_top5 ?? null)}`}>{fmtR(row.net_avg_r_ex_top5)}</td>
|
||||||
</tr>
|
</tr>
|
||||||
);
|
);
|
||||||
})}
|
})}
|
||||||
|
|||||||
@@ -259,6 +259,9 @@ export interface BacktestTimeExitRow {
|
|||||||
worst_r?: number | null;
|
worst_r?: number | null;
|
||||||
avg_hold_days?: number | null;
|
avg_hold_days?: number | null;
|
||||||
net_r_per_day?: number | null;
|
net_r_per_day?: number | null;
|
||||||
|
median_net_r?: number | null;
|
||||||
|
profit_factor?: number | null;
|
||||||
|
net_avg_r_ex_top5?: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface BacktestPortfolioPolicy {
|
export interface BacktestPortfolioPolicy {
|
||||||
|
|||||||
@@ -143,6 +143,10 @@ class TestTimeExitBucket:
|
|||||||
# No stop_day on any candidate → every hold runs the full 5 days.
|
# No stop_day on any candidate → every hold runs the full 5 days.
|
||||||
assert b["avg_hold_days"] == 5.0
|
assert b["avg_hold_days"] == 5.0
|
||||||
assert b["net_r_per_day"] == pytest.approx(0.28 / 5.0, abs=0.001)
|
assert b["net_r_per_day"] == pytest.approx(0.28 / 5.0, abs=0.001)
|
||||||
|
# robustness on net rs [1.38, -1.02, 0.48]
|
||||||
|
assert b["median_net_r"] == pytest.approx(0.48, abs=0.001)
|
||||||
|
assert b["profit_factor"] == pytest.approx(1.86 / 1.02, abs=0.01)
|
||||||
|
assert b["net_avg_r_ex_top5"] == pytest.approx((0.48 - 1.02) / 2, abs=0.001)
|
||||||
|
|
||||||
def test_missing_hold_skipped(self):
|
def test_missing_hold_skipped(self):
|
||||||
b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
|
b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
|
||||||
@@ -341,7 +345,7 @@ def test_build_recommendation_reads_the_report():
|
|||||||
"overall_qualified": {"net_avg_r": 0.13, "net_avg_r_ex_top5": 0.05},
|
"overall_qualified": {"net_avg_r": 0.13, "net_avg_r_ex_top5": 0.05},
|
||||||
"time_exit_sweep": [
|
"time_exit_sweep": [
|
||||||
{"hold_days": 21, "net_avg_r": 0.38},
|
{"hold_days": 21, "net_avg_r": 0.38},
|
||||||
{"hold_days": 30, "net_avg_r": 0.50},
|
{"hold_days": 30, "net_avg_r": 0.50, "net_avg_r_ex_top5": 0.21},
|
||||||
],
|
],
|
||||||
"gate_ablation": [
|
"gate_ablation": [
|
||||||
{"variant": "all_floors", "total": 100, "hold_net_avg_r": 0.50},
|
{"variant": "all_floors", "total": 100, "hold_net_avg_r": 0.50},
|
||||||
@@ -374,7 +378,12 @@ def test_build_recommendation_reads_the_report():
|
|||||||
assert "keep the NEUTRAL exclusion" in gate_texts
|
assert "keep the NEUTRAL exclusion" in gate_texts
|
||||||
assert "80" in by_topic["cutoff"][0]
|
assert "80" in by_topic["cutoff"][0]
|
||||||
assert "beats" in by_topic["benchmark"][0]
|
assert "beats" in by_topic["benchmark"][0]
|
||||||
assert any("not a handful of outliers" in t for t in by_topic["robustness"])
|
# robustness is judged under the RECOMMENDED exit (the 30d hold), not the
|
||||||
|
# target model the recommendation advises abandoning
|
||||||
|
assert any(
|
||||||
|
"not a handful of outliers" in t and "under the recommended 30d hold" in t
|
||||||
|
for t in by_topic["robustness"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_build_recommendation_flags_outlier_dependence():
|
def test_build_recommendation_flags_outlier_dependence():
|
||||||
|
|||||||
Reference in New Issue
Block a user