feat: robustness stats + dynamic recommendation; retire settled report sections

Robustness (answers 'is the edge just outliers?'): - _bucket_stats gains median_net_r, profit_factor, and net_avg_r_ex_top5 (expectancy with the top 5% of winners removed); shown as stat tiles. - Portfolio sim gains per-calendar-year returns, shown in the sim table. Dynamic recommendation ('What this backtest recommends' panel): - _build_recommendation derives advice from the report's own numbers on every run — exit policy (target vs best hold, with sim CAGRs), which gate floors earn their keep (ablation Hold column), best momentum cutoff, book-vs-SPY verdict, and an outlier-dependence warning when the trimmed expectancy goes non-positive. Retired (conclusions reached, tables removed from report + UI): - Take-profit sweep (no interior optimum — fixed TP is the wrong tool for momentum), trailing sweep (converged to the hold-to-horizon exit), probability calibration (model is display-only by decision). - _tp_primitives slimmed to _risk_and_stop_day; trailing machinery gone. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 12:33:22 +02:00
parent 0f43e755f4
commit 243e369e9a
4 changed files with 359 additions and 503 deletions
@@ -124,14 +124,6 @@ export function BacktestPanel() {
  const queryClient = useQueryClient();
  const toast = useToast();

-  const bestTpAvgR =
-    report?.take_profit_sweep && report.take_profit_sweep.length > 0
-      ? Math.max(...report.take_profit_sweep.map((r) => netOrGross(r) ?? -Infinity))
-      : null;
-  const bestTrailAvgR =
-    report?.trailing_sweep && report.trailing_sweep.length > 0
-      ? Math.max(...report.trailing_sweep.map((r) => netOrGross(r) ?? -Infinity))
-      : null;
  const bestTimeAvgR =
    report?.time_exit_sweep && report.time_exit_sweep.length > 0
      ? Math.max(...report.time_exit_sweep.map((r) => netOrGross(r) ?? -Infinity))
@@ -189,6 +181,30 @@ export function BacktestPanel() {
              )}
            </p>

+            {report.recommendation && report.recommendation.items.length > 0 && (
+              <div className="glass border border-blue-400/20 p-4">
+                <p className="section-index">What this backtest recommends</p>
+                {report.recommendation.headline && (
+                  <p className="mt-1.5 text-sm font-semibold text-gray-100">
+                    {report.recommendation.headline}
+                  </p>
+                )}
+                <ul className="mt-2 space-y-1">
+                  {report.recommendation.items.map((item) => (
+                    <li
+                      key={item.topic + item.text}
+                      className={`text-xs ${item.text.includes('WARNING') || item.text.includes('LAGS') ? 'text-amber-400' : 'text-gray-400'}`}
+                    >
+                      {item.text}
+                    </li>
+                  ))}
+                </ul>
+                {report.recommendation.note && (
+                  <p className="mt-2 text-[11px] text-gray-600">{report.recommendation.note}</p>
+                )}
+              </div>
+            )}
+
            <div className="grid gap-3 sm:grid-cols-2 lg:grid-cols-4">
              <Stat
                label="Qualified Hit Rate"
@@ -213,6 +229,30 @@ export function BacktestPanel() {
                valueClass={rColor(report.overall_qualified.total_r)}
                sub="cumulative, risk-adjusted"
              />
+              {report.overall_qualified.median_net_r != null && (
+                <Stat
+                  label="Median Net R"
+                  value={fmtR(report.overall_qualified.median_net_r)}
+                  valueClass={rColor(report.overall_qualified.median_net_r)}
+                  sub="qualified · the typical trade"
+                />
+              )}
+              {report.overall_qualified.profit_factor != null && (
+                <Stat
+                  label="Profit Factor"
+                  value={report.overall_qualified.profit_factor.toFixed(2)}
+                  valueClass={report.overall_qualified.profit_factor > 1 ? 'text-emerald-400' : 'text-red-400'}
+                  sub="qualified · net wins / net losses"
+                />
+              )}
+              {report.overall_qualified.net_avg_r_ex_top5 != null && (
+                <Stat
+                  label="Ex-Top-5% Net R"
+                  value={fmtR(report.overall_qualified.net_avg_r_ex_top5)}
+                  valueClass={rColor(report.overall_qualified.net_avg_r_ex_top5)}
+                  sub="expectancy without the biggest winners"
+                />
+              )}
            </div>

            <div className="glass overflow-x-auto">
@@ -348,106 +388,6 @@ export function BacktestPanel() {
              </div>
            )}

-            {report.take_profit_sweep && report.take_profit_sweep.length > 0 && (
-              <div>
-                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
-                  Take-profit exit (alternative to the target above)
-                </p>
-                <p className="mb-2 text-[11px] text-gray-500">
-                  Models a realistic exit instead of waiting for the far S/R target: bank{' '}
-                  <span className="text-gray-300">+X%</span> if price reaches it before the stop, else the
-                  stop-fill loss (a gap through the stop fills at the open, so it can exceed −1R), else exit
-                  at the {report.params.horizon_days}-day close. In R, so it compares to the
-                  target model above. <span className="text-gray-300">Hit Rate = how often you'd have banked
-                  +X%</span> (how far winners actually run) — no top-ticking, it's the level you'd really set.
-                  The setup's own S/R target is <em>not</em> used here (exiting at that target is the model
-                  above); this is a pure fixed-% exit. ★ = best net avg R.
-                </p>
-                <div className="glass overflow-x-auto">
-                  <table className="w-full text-sm">
-                    <thead>
-                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
-                        <th className="px-4 py-2.5">Take-profit</th>
-                        <th className="px-4 py-2.5 text-right">Setups</th>
-                        <th className="px-4 py-2.5 text-right">Hit (banked)</th>
-                        <th className="px-4 py-2.5 text-right">Hit Rate</th>
-                        <th className="px-4 py-2.5 text-right">Avg R</th>
-                        <th className="px-4 py-2.5 text-right">Net Avg R</th>
-                        <th className="px-4 py-2.5 text-right">Total R</th>
-                      </tr>
-                    </thead>
-                    <tbody>
-                      {report.take_profit_sweep.map((row) => {
-                        const best = netOrGross(row) != null && netOrGross(row) === bestTpAvgR;
-                        return (
-                          <tr key={row.tp_pct} className={`border-b border-white/[0.04] ${best ? 'bg-emerald-400/[0.06]' : ''}`}>
-                            <td className="num px-4 py-2.5 text-gray-200">
-                              {best && <span className="mr-1 text-emerald-300">★</span>}
-                              +{row.tp_pct}%
-                            </td>
-                            <td className="num px-4 py-2.5 text-right text-gray-200">{row.total}</td>
-                            <td className="num px-4 py-2.5 text-right text-emerald-400">{row.wins}</td>
-                            <td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(row.hit_rate)}</td>
-                            <td className={`num px-4 py-2.5 text-right ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
-                            <td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.net_avg_r ?? null)}`}>{fmtR(row.net_avg_r ?? null)}</td>
-                            <td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
-                          </tr>
-                        );
-                      })}
-                    </tbody>
-                  </table>
-                </div>
-              </div>
-            )}
-
-            {report.trailing_sweep && report.trailing_sweep.length > 0 && (
-              <div>
-                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
-                  Trailing-stop exit
-                </p>
-                <p className="mb-2 text-[11px] text-gray-500">
-                  Let it run, but exit when price gives back <span className="text-gray-300">X% from its
-                  peak</span> (the stop only ratchets up, never below the initial stop). Captures the tail
-                  without the fixed take-profit's all-or-nothing miss, and protects gains. In R vs the initial
-                  risk. <span className="text-gray-300">Win Rate = share closed in profit.</span> ★ = best net avg R.
-                </p>
-                <div className="glass overflow-x-auto">
-                  <table className="w-full text-sm">
-                    <thead>
-                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
-                        <th className="px-4 py-2.5">Trail</th>
-                        <th className="px-4 py-2.5 text-right">Setups</th>
-                        <th className="px-4 py-2.5 text-right">Profitable</th>
-                        <th className="px-4 py-2.5 text-right">Win Rate</th>
-                        <th className="px-4 py-2.5 text-right">Avg R</th>
-                        <th className="px-4 py-2.5 text-right">Net Avg R</th>
-                        <th className="px-4 py-2.5 text-right">Total R</th>
-                      </tr>
-                    </thead>
-                    <tbody>
-                      {report.trailing_sweep.map((row) => {
-                        const best = netOrGross(row) != null && netOrGross(row) === bestTrailAvgR;
-                        return (
-                          <tr key={row.trail_pct} className={`border-b border-white/[0.04] ${best ? 'bg-emerald-400/[0.06]' : ''}`}>
-                            <td className="num px-4 py-2.5 text-gray-200">
-                              {best && <span className="mr-1 text-emerald-300">★</span>}
-                              {row.trail_pct}%
-                            </td>
-                            <td className="num px-4 py-2.5 text-right text-gray-200">{row.total}</td>
-                            <td className="num px-4 py-2.5 text-right text-emerald-400">{row.wins}</td>
-                            <td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(row.win_rate)}</td>
-                            <td className={`num px-4 py-2.5 text-right ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
-                            <td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.net_avg_r ?? null)}`}>{fmtR(row.net_avg_r ?? null)}</td>
-                            <td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
-                          </tr>
-                        );
-                      })}
-                    </tbody>
-                  </table>
-                </div>
-              </div>
-            )}
-
            {report.time_exit_sweep && report.time_exit_sweep.length > 0 && (
              <div>
                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
@@ -543,6 +483,16 @@ export function BacktestPanel() {
                          ['Avg P&L / trade', (p) => fmtMoney(p.avg_trade_pnl), (p) => rColor(p.avg_trade_pnl)],
                          ['Best / worst trade', (p) => `${fmtR(p.best_trade_r)} / ${fmtR(p.worst_trade_r)}`, () => 'text-gray-300'],
                          ['Avg holding time', (p) => fmtDays(p.avg_hold_days), () => 'text-gray-300'],
+                          [
+                            'Per-year returns',
+                            (p) =>
+                              p.yearly_returns && p.yearly_returns.length > 0
+                                ? p.yearly_returns
+                                    .map((y) => `${y.year} ${fmtSignedPct(y.return_pct)}`)
+                                    .join(' · ')
+                                : '—',
+                            () => 'text-gray-300',
+                          ],
                          ['Entries skipped (book full)', (p) => String(p.skipped_book_full), () => 'text-gray-500'],
                        ] as [string, (p: BacktestPortfolioPolicy) => string, (p: BacktestPortfolioPolicy) => string][]
                      ).map(([label, fmt, color]) => (
@@ -561,47 +511,6 @@ export function BacktestPanel() {
              </div>
            )}

-            <div>
-              <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
-                Probability calibration
-              </p>
-              <p className="mb-2 text-[11px] text-gray-500">
-                Do targets we call “X% likely” actually hit that often? Realized below predicted =
-                the model is over-confident.
-              </p>
-              {report.calibration.length === 0 ? (
-                <Callout variant="empty">Not enough resolved setups to calibrate.</Callout>
-              ) : (
-                <div className="glass overflow-x-auto">
-                  <table className="w-full text-sm">
-                    <thead>
-                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
-                        <th className="px-4 py-2.5">Predicted Bucket</th>
-                        <th className="px-4 py-2.5 text-right">Setups</th>
-                        <th className="px-4 py-2.5 text-right">Avg Predicted</th>
-                        <th className="px-4 py-2.5 text-right">Realized Hit Rate</th>
-                      </tr>
-                    </thead>
-                    <tbody>
-                      {report.calibration.map((row) => {
-                        const over = row.realized_hit_rate < row.predicted_avg;
-                        return (
-                          <tr key={row.bucket} className="border-b border-white/[0.04]">
-                            <td className="px-4 py-2.5 text-gray-200">{row.bucket}</td>
-                            <td className="num px-4 py-2.5 text-right text-gray-300">{row.n}</td>
-                            <td className="num px-4 py-2.5 text-right text-gray-400">{row.predicted_avg.toFixed(0)}%</td>
-                            <td className={`num px-4 py-2.5 text-right font-semibold ${over ? 'text-amber-400' : 'text-emerald-400'}`}>
-                              {row.realized_hit_rate.toFixed(0)}%
-                            </td>
-                          </tr>
-                        );
-                      })}
-                    </tbody>
-                  </table>
-                </div>
-              )}
-            </div>
-
            {report.signal_eval && report.signal_eval.length > 0 && (
              <div>
                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">