From 6c2e45377c0ca1fc875317f012185c8283c34f81 Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Sun, 28 Jun 2026 13:58:15 +0200 Subject: [PATCH] feat: collapse track record into a live-vs-backtest check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The outcome section measures the same thing as the backtest with the same code and data — its only unique value is catching when the live system drifts from the backtest (a bug, config/data drift, or look-ahead). So reframe it as exactly that: a one-line "Live X R vs Backtest Y R · n matured · tracking ✓ / drift ⚠" indicator (like-for-like with the qualified toggle), with the stat cards and By-Action/By-Confidence tables moved into a collapsed "Outcome details" disclosure. Drop the always-empty By-Direction table. Co-Authored-By: Claude Opus 4.8 --- .../components/signals/TrackRecordPanel.tsx | 257 +++++++++++------- 1 file changed, 152 insertions(+), 105 deletions(-) diff --git a/frontend/src/components/signals/TrackRecordPanel.tsx b/frontend/src/components/signals/TrackRecordPanel.tsx index 719a8ec..78dc811 100644 --- a/frontend/src/components/signals/TrackRecordPanel.tsx +++ b/frontend/src/components/signals/TrackRecordPanel.tsx @@ -3,6 +3,7 @@ import { useMutation, useQueryClient } from '@tanstack/react-query'; import { useActivation } from '../../hooks/useActivation'; import { activationSummary } from '../../lib/qualification'; import { usePerformance } from '../../hooks/usePerformance'; +import { useBacktestReport } from '../../hooks/useMarketRegime'; import { triggerJob, resetTrackRecord } from '../../api/admin'; import { Button } from '../ui/Button'; import { Callout } from '../ui/Callout'; @@ -15,6 +16,14 @@ import { BacktestPanel } from './BacktestPanel'; import { MyTradesPanel } from './MyTradesPanel'; import type { OutcomeBucketStats } from '../../lib/types'; +// Need at least this many matured setups before a live-vs-backtest verdict means +// anything; below it the live sample is too noisy to compare. +const MIN_MATURED = 20; +// Live expectancy this far (in R) below the backtest counts as drift, not noise. +const DRIFT_TOLERANCE_R = 0.2; + +type TrackingStatus = 'building' | 'tracking' | 'drift' | 'no-backtest'; + function fmtR(value: number | null): string { if (value === null) return '—'; return `${value > 0 ? '+' : ''}${value.toFixed(2)}R`; @@ -31,6 +40,17 @@ function rColor(value: number | null): string { return 'text-gray-300'; } +function VerdictChip({ status }: { status: TrackingStatus }) { + const styles: Record = { + tracking: { cls: 'border-emerald-500/30 bg-emerald-500/15 text-emerald-300', label: '✓ tracking' }, + drift: { cls: 'border-amber-500/30 bg-amber-500/15 text-amber-300', label: '⚠ drift' }, + building: { cls: 'border-white/10 bg-white/[0.05] text-gray-400', label: 'building' }, + 'no-backtest': { cls: 'border-white/10 bg-white/[0.05] text-gray-400', label: 'no backtest' }, + }; + const s = styles[status]; + return {s.label}; +} + function StatCard({ label, value, valueClass = 'text-gray-100', sub }: { label: string; value: string; @@ -57,7 +77,7 @@ function BreakdownTable({ rows, labelHeader, mapLabel }: { }) { const entries = Object.entries(rows); if (entries.length === 0) { - return No evaluated setups in this breakdown yet.; + return No matured setups in this breakdown yet.; } return (
@@ -100,6 +120,7 @@ export function TrackRecordPanel() { const { data, isLoading, isError, error } = usePerformance( qualifiedOnly ? { qualified_only: true } : undefined, ); + const backtest = useBacktestReport(); const queryClient = useQueryClient(); const toast = useToast(); @@ -137,119 +158,145 @@ export function TrackRecordPanel() { } }; + // Live (matured cohort) vs the backtest, like-for-like with the qualified toggle. + const live = data?.overall ?? null; + const btBucket = qualifiedOnly ? backtest.data?.overall_qualified : backtest.data?.overall_all; + const liveAvgR = live?.avg_r ?? null; + const liveN = live?.total ?? 0; + const btAvgR = btBucket?.avg_r ?? null; + + let status: TrackingStatus = 'building'; + if (liveAvgR != null && liveN >= MIN_MATURED) { + status = btAvgR == null ? 'no-backtest' : liveAvgR >= btAvgR - DRIFT_TOLERANCE_R ? 'tracking' : 'drift'; + } + + const verdictNote: Record = { + building: `Not enough matured setups yet (need ~${MIN_MATURED}). Only setups whose full ~30-day window has elapsed are counted — the rest are still maturing. Until then, the backtest is your edge estimate; this becomes a live check as setups age past ~6 weeks.`, + 'no-backtest': 'Run the backtest below to get a baseline to compare the live record against.', + tracking: 'Live setups are resolving in line with the backtest — the running system is faithfully implementing it (no look-ahead, config or data drift).', + drift: 'Live expectancy is running materially below the backtest. Could be small-sample noise, a regime shift, or a config/data/look-ahead gap between live and the backtest — worth a look.', + }; + return (
- {/* Your real, realized results come first; the signal/theoretical record follows. */} + {/* Your real, realized results come first; the live-vs-backtest check follows. */}
-
- -

Confidence breakdown always covers all setups.

-
- -
- -

- Each setup is replayed against the daily bars after its detection: a{' '} - win means the target was reached before the - stop, a loss means the stop was hit first (bars - where both levels fall inside the same day count conservatively as losses). Setups with - neither level hit within 30 trading days expire at - 0R. Avg R is the expectancy per trade: wins earn their R:R ratio, losses cost −1R — a - positive value means the signals have been profitable on a risk-adjusted basis. The - evaluator runs nightly after OHLCV collection. Only setups whose full 30-day window has - elapsed are counted — younger ones show as maturing, - since near stops resolve in days while far targets need time, so early numbers would skew - negative. -

-
-
- - -
-
- - {isLoading && ( -
- -
- )} - - {isError && ( - - {error instanceof Error ? error.message : 'Failed to load performance stats'} - - )} - - {data && data.overall.total === 0 && ( - - {data.maturing > 0 - ? `No setups have completed their ~30-day evaluation window yet — ${data.maturing} still maturing. ` + - 'Stats appear once a setup’s full window has elapsed; counting them earlier would skew toward quick stop-outs.' - : qualifiedOnly - ? 'No matured setups meet the activation thresholds yet. Untick "Qualified signals only" to see all, or wait for more outcomes.' - : 'No matured setups yet. Outcomes appear once setups complete their evaluation window — the evaluator runs nightly, or click Evaluate Now.'} - - )} - - {data && data.overall.total > 0 && ( - <> -
- - - - +
+ {isError ? ( + + {error instanceof Error ? error.message : 'Failed to load performance stats'} + + ) : ( +
+
+
+ + Live {fmtR(liveAvgR)} + + + Backtest {fmtR(btAvgR)} + + + {liveN} matured{data ? ` · ${data.maturing} maturing` : ''} · {qualifiedOnly ? 'qualified' : 'all setups'} + +
+ +
+

{verdictNote[status]}

+ )} +
-
- -
+ +
+ -
- -
+ {isLoading && ( +
+ +
+ )} -
- -
- - )} + {data && data.overall.total === 0 && ( + + {data.maturing > 0 + ? `No setups have completed their ~30-day window yet — ${data.maturing} still maturing. ` + + 'Counting them earlier would skew toward quick stop-outs.' + : 'No matured setups yet. Outcomes appear once setups complete their evaluation window — the evaluator runs nightly, or click Evaluate Now.'} + + )} + + {data && data.overall.total > 0 && ( + <> +
+ + + + +
+ +
+ +
+ +
+ +
+ + )} + +
+

+ Each setup is replayed against the daily bars after detection: target before stop = win, + stop first = loss (both in one bar counts conservatively as a loss), neither within 30 + trading days = expired at 0R. Only setups whose full window has elapsed are counted; younger + ones are still maturing (near stops resolve fast, far + targets need time, so early numbers would skew negative). The evaluator runs nightly. +

+
+ + +
+
+
+