feat: collapse track record into a live-vs-backtest check

The outcome section measures the same thing as the backtest with the same code and data — its only unique value is catching when the live system drifts from the backtest (a bug, config/data drift, or look-ahead). So reframe it as exactly that: a one-line "Live X R vs Backtest Y R · n matured · tracking ✓ / drift ⚠" indicator (like-for-like with the qualified toggle), with the stat cards and By-Action/By-Confidence tables moved into a collapsed "Outcome details" disclosure. Drop the always-empty By-Direction table. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-28 13:58:15 +02:00
parent 7e9a6cd7ec
commit 6c2e45377c
1 changed files with 152 additions and 105 deletions
@@ -3,6 +3,7 @@ import { useMutation, useQueryClient } from '@tanstack/react-query';
 import { useActivation } from '../../hooks/useActivation';
 import { activationSummary } from '../../lib/qualification';
 import { usePerformance } from '../../hooks/usePerformance';
+import { useBacktestReport } from '../../hooks/useMarketRegime';
 import { triggerJob, resetTrackRecord } from '../../api/admin';
 import { Button } from '../ui/Button';
 import { Callout } from '../ui/Callout';
@@ -15,6 +16,14 @@ import { BacktestPanel } from './BacktestPanel';
 import { MyTradesPanel } from './MyTradesPanel';
 import type { OutcomeBucketStats } from '../../lib/types';

+// Need at least this many matured setups before a live-vs-backtest verdict means
+// anything; below it the live sample is too noisy to compare.
+const MIN_MATURED = 20;
+// Live expectancy this far (in R) below the backtest counts as drift, not noise.
+const DRIFT_TOLERANCE_R = 0.2;
+
+type TrackingStatus = 'building' | 'tracking' | 'drift' | 'no-backtest';
+
 function fmtR(value: number | null): string {
  if (value === null) return '—';
  return `${value > 0 ? '+' : ''}${value.toFixed(2)}R`;
@@ -31,6 +40,17 @@ function rColor(value: number | null): string {
  return 'text-gray-300';
 }

+function VerdictChip({ status }: { status: TrackingStatus }) {
+  const styles: Record<TrackingStatus, { cls: string; label: string }> = {
+    tracking: { cls: 'border-emerald-500/30 bg-emerald-500/15 text-emerald-300', label: '✓ tracking' },
+    drift: { cls: 'border-amber-500/30 bg-amber-500/15 text-amber-300', label: '⚠ drift' },
+    building: { cls: 'border-white/10 bg-white/[0.05] text-gray-400', label: 'building' },
+    'no-backtest': { cls: 'border-white/10 bg-white/[0.05] text-gray-400', label: 'no backtest' },
+  };
+  const s = styles[status];
+  return <span className={`shrink-0 rounded-full border px-2.5 py-1 text-xs font-medium ${s.cls}`}>{s.label}</span>;
+}
+
 function StatCard({ label, value, valueClass = 'text-gray-100', sub }: {
  label: string;
  value: string;
@@ -57,7 +77,7 @@ function BreakdownTable({ rows, labelHeader, mapLabel }: {
 }) {
  const entries = Object.entries(rows);
  if (entries.length === 0) {
-    return <Callout variant="empty">No evaluated setups in this breakdown yet.</Callout>;
+    return <Callout variant="empty">No matured setups in this breakdown yet.</Callout>;
  }
  return (
    <div className="glass overflow-x-auto">
@@ -100,6 +120,7 @@ export function TrackRecordPanel() {
  const { data, isLoading, isError, error } = usePerformance(
    qualifiedOnly ? { qualified_only: true } : undefined,
  );
+  const backtest = useBacktestReport();
  const queryClient = useQueryClient();
  const toast = useToast();

@@ -137,119 +158,145 @@ export function TrackRecordPanel() {
    }
  };

+  // Live (matured cohort) vs the backtest, like-for-like with the qualified toggle.
+  const live = data?.overall ?? null;
+  const btBucket = qualifiedOnly ? backtest.data?.overall_qualified : backtest.data?.overall_all;
+  const liveAvgR = live?.avg_r ?? null;
+  const liveN = live?.total ?? 0;
+  const btAvgR = btBucket?.avg_r ?? null;
+
+  let status: TrackingStatus = 'building';
+  if (liveAvgR != null && liveN >= MIN_MATURED) {
+    status = btAvgR == null ? 'no-backtest' : liveAvgR >= btAvgR - DRIFT_TOLERANCE_R ? 'tracking' : 'drift';
+  }
+
+  const verdictNote: Record<TrackingStatus, string> = {
+    building: `Not enough matured setups yet (need ~${MIN_MATURED}). Only setups whose full ~30-day window has elapsed are counted — the rest are still maturing. Until then, the backtest is your edge estimate; this becomes a live check as setups age past ~6 weeks.`,
+    'no-backtest': 'Run the backtest below to get a baseline to compare the live record against.',
+    tracking: 'Live setups are resolving in line with the backtest — the running system is faithfully implementing it (no look-ahead, config or data drift).',
+    drift: 'Live expectancy is running materially below the backtest. Could be small-sample noise, a regime shift, or a config/data/look-ahead gap between live and the backtest — worth a look.',
+  };
+
  return (
    <div className="space-y-6">
-      {/* Your real, realized results come first; the signal/theoretical record follows. */}
+      {/* Your real, realized results come first; the live-vs-backtest check follows. */}
      <MyTradesPanel />
      <div className="border-t border-white/[0.06]" />

-      <div className="glass-sm flex flex-wrap items-center justify-between gap-3 px-4 py-3">
-        <label className="flex cursor-pointer items-center gap-2.5 text-sm text-gray-300">
-          <input
-            type="checkbox"
-            checked={qualifiedOnly}
-            onChange={(e) => setQualifiedOnly(e.target.checked)}
-            className="h-4 w-4 cursor-pointer accent-blue-400"
-          />
-          <span>
-            Qualified signals only
-            {activation.data && (
-              <span className="num ml-2 text-xs text-gray-500">{activationSummary(activation.data)}</span>
-            )}
-          </span>
-        </label>
-        <p className="text-xs text-gray-500">Confidence breakdown always covers all setups.</p>
-      </div>
-
-      <div className="flex items-start justify-between gap-4">
-        <Disclosure summary="How outcomes are measured">
-          <p className="text-xs text-gray-400">
-            Each setup is replayed against the daily bars after its detection: a{' '}
-            <span className="text-emerald-400">win</span> means the target was reached before the
-            stop, a <span className="text-red-400">loss</span> means the stop was hit first (bars
-            where both levels fall inside the same day count conservatively as losses). Setups with
-            neither level hit within 30 trading days <span className="text-gray-300">expire</span> at
-            0R. Avg R is the expectancy per trade: wins earn their R:R ratio, losses cost −1R — a
-            positive value means the signals have been profitable on a risk-adjusted basis. The
-            evaluator runs nightly after OHLCV collection. Only setups whose full 30-day window has
-            elapsed are counted — younger ones show as <span className="text-gray-300">maturing</span>,
-            since near stops resolve in days while far targets need time, so early numbers would skew
-            negative.
-          </p>
-        </Disclosure>
-        <div className="flex shrink-0 items-center gap-2">
-          <Button onClick={() => evaluateMutation.mutate()} loading={evaluateMutation.isPending}>
-            {evaluateMutation.isPending ? 'Evaluating…' : 'Evaluate Now'}
-          </Button>
-          <Button variant="danger" onClick={onReset} loading={resetMutation.isPending}>
-            {resetMutation.isPending ? 'Resetting…' : 'Reset'}
-          </Button>
-        </div>
-      </div>
-
-      {isLoading && (
-        <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
-          <SkeletonCard /><SkeletonCard /><SkeletonCard /><SkeletonCard />
-        </div>
-      )}
-
-      {isError && (
-        <Callout variant="error">
-          {error instanceof Error ? error.message : 'Failed to load performance stats'}
-        </Callout>
-      )}
-
-      {data && data.overall.total === 0 && (
-        <Callout variant="empty">
-          {data.maturing > 0
-            ? `No setups have completed their ~30-day evaluation window yet — ${data.maturing} still maturing. ` +
-              'Stats appear once a setup’s full window has elapsed; counting them earlier would skew toward quick stop-outs.'
-            : qualifiedOnly
-              ? 'No matured setups meet the activation thresholds yet. Untick "Qualified signals only" to see all, or wait for more outcomes.'
-              : 'No matured setups yet. Outcomes appear once setups complete their evaluation window — the evaluator runs nightly, or click Evaluate Now.'}
-        </Callout>
-      )}
-
-      {data && data.overall.total > 0 && (
-        <>
-          <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
-            <StatCard
-              label="Hit Rate"
-              value={fmtPct(data.overall.hit_rate)}
-              sub={`${data.overall.wins} wins / ${data.overall.losses} losses`}
-            />
-            <StatCard
-              label="Expectancy"
-              value={fmtR(data.overall.avg_r)}
-              valueClass={rColor(data.overall.avg_r)}
-              sub="average R per trade"
-            />
-            <StatCard
-              label="Total R"
-              value={fmtR(data.overall.total_r)}
-              valueClass={rColor(data.overall.total_r)}
-              sub="cumulative risk-adjusted result"
-            />
-            <StatCard
-              label="Matured"
-              value={String(data.overall.total)}
-              sub={`${data.maturing} maturing · ${data.overall.expired} expired`}
-            />
+      <Section title="Live vs Backtest" hint="is the live system tracking the backtest?">
+        {isError ? (
+          <Callout variant="error">
+            {error instanceof Error ? error.message : 'Failed to load performance stats'}
+          </Callout>
+        ) : (
+          <div className="glass-sm space-y-2.5 p-4">
+            <div className="flex flex-wrap items-center justify-between gap-x-6 gap-y-2">
+              <div className="flex flex-wrap items-baseline gap-x-5 gap-y-1">
+                <span className="text-sm text-gray-400">
+                  Live <span className={`num font-semibold ${rColor(liveAvgR)}`}>{fmtR(liveAvgR)}</span>
+                </span>
+                <span className="text-sm text-gray-400">
+                  Backtest <span className={`num font-semibold ${rColor(btAvgR)}`}>{fmtR(btAvgR)}</span>
+                </span>
+                <span className="text-xs text-gray-500">
+                  {liveN} matured{data ? ` · ${data.maturing} maturing` : ''} · {qualifiedOnly ? 'qualified' : 'all setups'}
+                </span>
+              </div>
+              <VerdictChip status={status} />
+            </div>
+            <p className="text-[11px] leading-relaxed text-gray-500">{verdictNote[status]}</p>
          </div>
+        )}
+      </Section>

-          <Section title="By Direction">
-            <BreakdownTable rows={data.by_direction} labelHeader="Direction" />
-          </Section>
+      <Disclosure summary="Outcome details (matured cohort)">
+        <div className="space-y-4 pt-1">
+          <label className="flex w-fit cursor-pointer items-center gap-2.5 text-sm text-gray-300">
+            <input
+              type="checkbox"
+              checked={qualifiedOnly}
+              onChange={(e) => setQualifiedOnly(e.target.checked)}
+              className="h-4 w-4 cursor-pointer accent-blue-400"
+            />
+            <span>
+              Qualified signals only
+              {activation.data && (
+                <span className="num ml-2 text-xs text-gray-500">{activationSummary(activation.data)}</span>
+              )}
+            </span>
+          </label>

-          <Section title="By Recommended Action">
-            <BreakdownTable rows={data.by_action} labelHeader="Action" mapLabel={actionLabel} />
-          </Section>
+          {isLoading && (
+            <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
+              <SkeletonCard /><SkeletonCard /><SkeletonCard /><SkeletonCard />
+            </div>
+          )}

-          <Section title="By Confidence" hint="at detection time">
-            <BreakdownTable rows={data.by_confidence} labelHeader="Confidence" />
-          </Section>
-        </>
-      )}
+          {data && data.overall.total === 0 && (
+            <Callout variant="empty">
+              {data.maturing > 0
+                ? `No setups have completed their ~30-day window yet — ${data.maturing} still maturing. ` +
+                  'Counting them earlier would skew toward quick stop-outs.'
+                : 'No matured setups yet. Outcomes appear once setups complete their evaluation window — the evaluator runs nightly, or click Evaluate Now.'}
+            </Callout>
+          )}
+
+          {data && data.overall.total > 0 && (
+            <>
+              <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
+                <StatCard
+                  label="Hit Rate"
+                  value={fmtPct(data.overall.hit_rate)}
+                  sub={`${data.overall.wins} wins / ${data.overall.losses} losses`}
+                />
+                <StatCard
+                  label="Expectancy"
+                  value={fmtR(data.overall.avg_r)}
+                  valueClass={rColor(data.overall.avg_r)}
+                  sub="average R per trade"
+                />
+                <StatCard
+                  label="Total R"
+                  value={fmtR(data.overall.total_r)}
+                  valueClass={rColor(data.overall.total_r)}
+                  sub="cumulative risk-adjusted result"
+                />
+                <StatCard
+                  label="Matured"
+                  value={String(data.overall.total)}
+                  sub={`${data.maturing} maturing · ${data.overall.expired} expired`}
+                />
+              </div>
+
+              <Section title="By Recommended Action">
+                <BreakdownTable rows={data.by_action} labelHeader="Action" mapLabel={actionLabel} />
+              </Section>
+
+              <Section title="By Confidence" hint="at detection time · all setups">
+                <BreakdownTable rows={data.by_confidence} labelHeader="Confidence" />
+              </Section>
+            </>
+          )}
+
+          <div className="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-3">
+            <p className="max-w-2xl text-xs text-gray-500">
+              Each setup is replayed against the daily bars after detection: target before stop = win,
+              stop first = loss (both in one bar counts conservatively as a loss), neither within 30
+              trading days = expired at 0R. Only setups whose full window has elapsed are counted; younger
+              ones are still <span className="text-gray-300">maturing</span> (near stops resolve fast, far
+              targets need time, so early numbers would skew negative). The evaluator runs nightly.
+            </p>
+            <div className="flex shrink-0 items-center gap-2">
+              <Button onClick={() => evaluateMutation.mutate()} loading={evaluateMutation.isPending}>
+                {evaluateMutation.isPending ? 'Evaluating…' : 'Evaluate Now'}
+              </Button>
+              <Button variant="danger" onClick={onReset} loading={resetMutation.isPending}>
+                {resetMutation.isPending ? 'Resetting…' : 'Reset'}
+              </Button>
+            </div>
+          </div>
+        </div>
+      </Disclosure>

      <div className="border-t border-white/[0.06] pt-2" />
      <BacktestPanel />