feat: grade gate-ablation variants under the hold-to-horizon exit too
The ablation judged floors under the target/stop model, but the exit sweeps point at replacing that exit with a fixed hold — under which the R:R floor's rationale (bigger payoff at the target) may not apply. Each ablation row now also carries hold_avg_r / hold_net_avg_r / hold_total_r (30d hold, initial stop only), so the Phase 3 gate decision can be read under the exit policy that would actually be used. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -911,16 +911,27 @@ def _gate_ablation(candidates: list[dict], activation: dict, threshold: float) -
|
|||||||
("no_neutral_exclusion", [rr_ok, conf_ok, tighteners_ok]),
|
("no_neutral_exclusion", [rr_ok, conf_ok, tighteners_ok]),
|
||||||
("momentum_only", []),
|
("momentum_only", []),
|
||||||
]
|
]
|
||||||
return [
|
# Grade each variant under BOTH exit models: the target/stop outcome
|
||||||
{
|
# (_bucket_stats) and the hold-to-horizon time exit. A floor that pays under
|
||||||
|
# the target model may be meaningless once the exit is a fixed hold — the
|
||||||
|
# hold_* columns are what a time-exit gate decision should read.
|
||||||
|
hold_days = max(TIME_EXIT_DAYS)
|
||||||
|
rows: list[dict] = []
|
||||||
|
for name, checks in variants:
|
||||||
|
matching = [
|
||||||
|
c for c in candidates
|
||||||
|
if momentum_ok(c) and all(check(c) for check in checks)
|
||||||
|
]
|
||||||
|
hold = _time_exit_bucket(matching, hold_days)
|
||||||
|
rows.append({
|
||||||
"variant": name,
|
"variant": name,
|
||||||
**_bucket_stats([
|
**_bucket_stats(matching),
|
||||||
c for c in candidates
|
"hold_days": hold_days,
|
||||||
if momentum_ok(c) and all(check(c) for check in checks)
|
"hold_avg_r": hold["avg_r"],
|
||||||
]),
|
"hold_net_avg_r": hold["net_avg_r"],
|
||||||
}
|
"hold_total_r": hold["total_r"],
|
||||||
for name, checks in variants
|
})
|
||||||
]
|
return rows
|
||||||
|
|
||||||
|
|
||||||
async def run_backtest(
|
async def run_backtest(
|
||||||
@@ -1051,7 +1062,10 @@ async def run_backtest(
|
|||||||
"Each row re-qualifies the same candidates at the current momentum "
|
"Each row re-qualifies the same candidates at the current momentum "
|
||||||
f"cutoff ({current_min_pct:.0f}) with one floor removed (long-only "
|
f"cutoff ({current_min_pct:.0f}) with one floor removed (long-only "
|
||||||
"while the momentum gate is active). If dropping a floor doesn't "
|
"while the momentum gate is active). If dropping a floor doesn't "
|
||||||
"hurt net expectancy, that floor isn't pulling its weight."
|
"hurt net expectancy, that floor isn't pulling its weight. The Hold "
|
||||||
|
"columns grade the same variants under the hold-to-horizon time exit "
|
||||||
|
"instead of the S/R target — the view that matters if the exit "
|
||||||
|
"policy moves to a fixed hold."
|
||||||
),
|
),
|
||||||
"take_profit_sweep": [_take_profit_bucket(qualified, tp) for tp in TP_LEVELS],
|
"take_profit_sweep": [_take_profit_bucket(qualified, tp) for tp in TP_LEVELS],
|
||||||
"trailing_sweep": [_trailing_bucket(qualified, round(f * 100)) for f in TRAIL_LEVELS],
|
"trailing_sweep": [_trailing_bucket(qualified, round(f * 100)) for f in TRAIL_LEVELS],
|
||||||
|
|||||||
@@ -285,6 +285,8 @@ export function BacktestPanel() {
|
|||||||
<th className="px-4 py-2.5 text-right">Avg R</th>
|
<th className="px-4 py-2.5 text-right">Avg R</th>
|
||||||
<th className="px-4 py-2.5 text-right">Net Avg R</th>
|
<th className="px-4 py-2.5 text-right">Net Avg R</th>
|
||||||
<th className="px-4 py-2.5 text-right">Total R</th>
|
<th className="px-4 py-2.5 text-right">Total R</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Hold Net Avg R</th>
|
||||||
|
<th className="px-4 py-2.5 text-right">Hold Total R</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
@@ -303,6 +305,12 @@ export function BacktestPanel() {
|
|||||||
{fmtR(row.net_avg_r ?? null)}
|
{fmtR(row.net_avg_r ?? null)}
|
||||||
</td>
|
</td>
|
||||||
<td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
|
<td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
|
||||||
|
<td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.hold_net_avg_r ?? null)}`}>
|
||||||
|
{fmtR(row.hold_net_avg_r ?? null)}
|
||||||
|
</td>
|
||||||
|
<td className={`num px-4 py-2.5 text-right ${rColor(row.hold_total_r ?? null)}`}>
|
||||||
|
{fmtR(row.hold_total_r ?? null)}
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
))}
|
))}
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|||||||
@@ -280,6 +280,11 @@ export interface BacktestTimeExitRow {
|
|||||||
|
|
||||||
export interface BacktestGateAblationRow extends BacktestBucket {
|
export interface BacktestGateAblationRow extends BacktestBucket {
|
||||||
variant: string;
|
variant: string;
|
||||||
|
// The same variant graded under the hold-to-horizon time exit.
|
||||||
|
hold_days?: number;
|
||||||
|
hold_avg_r?: number | null;
|
||||||
|
hold_net_avg_r?: number | null;
|
||||||
|
hold_total_r?: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface BacktestSignalEvalRow {
|
export interface BacktestSignalEvalRow {
|
||||||
|
|||||||
@@ -221,6 +221,7 @@ def _acand(
|
|||||||
"outcome": OUTCOME_TARGET_HIT,
|
"outcome": OUTCOME_TARGET_HIT,
|
||||||
"realized_r": rr,
|
"realized_r": rr,
|
||||||
"risk_pct": 0.05,
|
"risk_pct": 0.05,
|
||||||
|
"time_r": {d: 0.5 for d in bt.TIME_EXIT_DAYS},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -249,6 +250,11 @@ class TestGateAblation:
|
|||||||
assert rows["no_neutral_exclusion"]["total"] == 2
|
assert rows["no_neutral_exclusion"]["total"] == 2
|
||||||
assert rows["momentum_only"]["total"] == 4
|
assert rows["momentum_only"]["total"] == 4
|
||||||
assert rows["all_floors"]["net_avg_r"] is not None
|
assert rows["all_floors"]["net_avg_r"] is not None
|
||||||
|
# Every variant is also graded under the hold-to-horizon exit.
|
||||||
|
assert rows["all_floors"]["hold_days"] == max(bt.TIME_EXIT_DAYS)
|
||||||
|
assert rows["all_floors"]["hold_avg_r"] == pytest.approx(0.5)
|
||||||
|
assert rows["all_floors"]["hold_net_avg_r"] is not None
|
||||||
|
assert rows["momentum_only"]["hold_total_r"] == pytest.approx(4 * 0.5, abs=0.01)
|
||||||
|
|
||||||
def test_threshold_zero_disables_momentum_gate(self):
|
def test_threshold_zero_disables_momentum_gate(self):
|
||||||
# Floors only: the short and the low-momentum long both pass all_floors.
|
# Floors only: the short and the low-momentum long both pass all_floors.
|
||||||
@@ -378,9 +384,12 @@ async def test_run_backtest_smoke(session):
|
|||||||
assert report["params"]["cost_per_side_pct"] == pytest.approx(bt.COST_PER_SIDE * 100)
|
assert report["params"]["cost_per_side_pct"] == pytest.approx(bt.COST_PER_SIDE * 100)
|
||||||
assert "net_avg_r" in report["overall_all"]
|
assert "net_avg_r" in report["overall_all"]
|
||||||
|
|
||||||
# ablation baseline reproduces the qualified set exactly
|
# ablation baseline reproduces the qualified set exactly, and every row
|
||||||
|
# carries the hold-to-horizon grading alongside the target model
|
||||||
ablation = {r["variant"]: r for r in report["gate_ablation"]}
|
ablation = {r["variant"]: r for r in report["gate_ablation"]}
|
||||||
assert ablation["all_floors"]["total"] == report["overall_qualified"]["total"]
|
assert ablation["all_floors"]["total"] == report["overall_qualified"]["total"]
|
||||||
|
for row in report["gate_ablation"]:
|
||||||
|
assert "hold_net_avg_r" in row
|
||||||
|
|
||||||
# time-exit sweep covers the configured hold lengths
|
# time-exit sweep covers the configured hold lengths
|
||||||
assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)
|
assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)
|
||||||
|
|||||||
Reference in New Issue
Block a user