feat: collapse track record into a live-vs-backtest check
The outcome section measures the same thing as the backtest with the same code and data — its only unique value is catching when the live system drifts from the backtest (a bug, config/data drift, or look-ahead). So reframe it as exactly that: a one-line "Live X R vs Backtest Y R · n matured · tracking ✓ / drift ⚠" indicator (like-for-like with the qualified toggle), with the stat cards and By-Action/By-Confidence tables moved into a collapsed "Outcome details" disclosure. Drop the always-empty By-Direction table. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ import { useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { useActivation } from '../../hooks/useActivation';
|
||||
import { activationSummary } from '../../lib/qualification';
|
||||
import { usePerformance } from '../../hooks/usePerformance';
|
||||
import { useBacktestReport } from '../../hooks/useMarketRegime';
|
||||
import { triggerJob, resetTrackRecord } from '../../api/admin';
|
||||
import { Button } from '../ui/Button';
|
||||
import { Callout } from '../ui/Callout';
|
||||
@@ -15,6 +16,14 @@ import { BacktestPanel } from './BacktestPanel';
|
||||
import { MyTradesPanel } from './MyTradesPanel';
|
||||
import type { OutcomeBucketStats } from '../../lib/types';
|
||||
|
||||
// Need at least this many matured setups before a live-vs-backtest verdict means
|
||||
// anything; below it the live sample is too noisy to compare.
|
||||
const MIN_MATURED = 20;
|
||||
// Live expectancy this far (in R) below the backtest counts as drift, not noise.
|
||||
const DRIFT_TOLERANCE_R = 0.2;
|
||||
|
||||
type TrackingStatus = 'building' | 'tracking' | 'drift' | 'no-backtest';
|
||||
|
||||
function fmtR(value: number | null): string {
|
||||
if (value === null) return '—';
|
||||
return `${value > 0 ? '+' : ''}${value.toFixed(2)}R`;
|
||||
@@ -31,6 +40,17 @@ function rColor(value: number | null): string {
|
||||
return 'text-gray-300';
|
||||
}
|
||||
|
||||
function VerdictChip({ status }: { status: TrackingStatus }) {
|
||||
const styles: Record<TrackingStatus, { cls: string; label: string }> = {
|
||||
tracking: { cls: 'border-emerald-500/30 bg-emerald-500/15 text-emerald-300', label: '✓ tracking' },
|
||||
drift: { cls: 'border-amber-500/30 bg-amber-500/15 text-amber-300', label: '⚠ drift' },
|
||||
building: { cls: 'border-white/10 bg-white/[0.05] text-gray-400', label: 'building' },
|
||||
'no-backtest': { cls: 'border-white/10 bg-white/[0.05] text-gray-400', label: 'no backtest' },
|
||||
};
|
||||
const s = styles[status];
|
||||
return <span className={`shrink-0 rounded-full border px-2.5 py-1 text-xs font-medium ${s.cls}`}>{s.label}</span>;
|
||||
}
|
||||
|
||||
function StatCard({ label, value, valueClass = 'text-gray-100', sub }: {
|
||||
label: string;
|
||||
value: string;
|
||||
@@ -57,7 +77,7 @@ function BreakdownTable({ rows, labelHeader, mapLabel }: {
|
||||
}) {
|
||||
const entries = Object.entries(rows);
|
||||
if (entries.length === 0) {
|
||||
return <Callout variant="empty">No evaluated setups in this breakdown yet.</Callout>;
|
||||
return <Callout variant="empty">No matured setups in this breakdown yet.</Callout>;
|
||||
}
|
||||
return (
|
||||
<div className="glass overflow-x-auto">
|
||||
@@ -100,6 +120,7 @@ export function TrackRecordPanel() {
|
||||
const { data, isLoading, isError, error } = usePerformance(
|
||||
qualifiedOnly ? { qualified_only: true } : undefined,
|
||||
);
|
||||
const backtest = useBacktestReport();
|
||||
const queryClient = useQueryClient();
|
||||
const toast = useToast();
|
||||
|
||||
@@ -137,119 +158,145 @@ export function TrackRecordPanel() {
|
||||
}
|
||||
};
|
||||
|
||||
// Live (matured cohort) vs the backtest, like-for-like with the qualified toggle.
|
||||
const live = data?.overall ?? null;
|
||||
const btBucket = qualifiedOnly ? backtest.data?.overall_qualified : backtest.data?.overall_all;
|
||||
const liveAvgR = live?.avg_r ?? null;
|
||||
const liveN = live?.total ?? 0;
|
||||
const btAvgR = btBucket?.avg_r ?? null;
|
||||
|
||||
let status: TrackingStatus = 'building';
|
||||
if (liveAvgR != null && liveN >= MIN_MATURED) {
|
||||
status = btAvgR == null ? 'no-backtest' : liveAvgR >= btAvgR - DRIFT_TOLERANCE_R ? 'tracking' : 'drift';
|
||||
}
|
||||
|
||||
const verdictNote: Record<TrackingStatus, string> = {
|
||||
building: `Not enough matured setups yet (need ~${MIN_MATURED}). Only setups whose full ~30-day window has elapsed are counted — the rest are still maturing. Until then, the backtest is your edge estimate; this becomes a live check as setups age past ~6 weeks.`,
|
||||
'no-backtest': 'Run the backtest below to get a baseline to compare the live record against.',
|
||||
tracking: 'Live setups are resolving in line with the backtest — the running system is faithfully implementing it (no look-ahead, config or data drift).',
|
||||
drift: 'Live expectancy is running materially below the backtest. Could be small-sample noise, a regime shift, or a config/data/look-ahead gap between live and the backtest — worth a look.',
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Your real, realized results come first; the signal/theoretical record follows. */}
|
||||
{/* Your real, realized results come first; the live-vs-backtest check follows. */}
|
||||
<MyTradesPanel />
|
||||
<div className="border-t border-white/[0.06]" />
|
||||
|
||||
<div className="glass-sm flex flex-wrap items-center justify-between gap-3 px-4 py-3">
|
||||
<label className="flex cursor-pointer items-center gap-2.5 text-sm text-gray-300">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={qualifiedOnly}
|
||||
onChange={(e) => setQualifiedOnly(e.target.checked)}
|
||||
className="h-4 w-4 cursor-pointer accent-blue-400"
|
||||
/>
|
||||
<span>
|
||||
Qualified signals only
|
||||
{activation.data && (
|
||||
<span className="num ml-2 text-xs text-gray-500">{activationSummary(activation.data)}</span>
|
||||
)}
|
||||
</span>
|
||||
</label>
|
||||
<p className="text-xs text-gray-500">Confidence breakdown always covers all setups.</p>
|
||||
</div>
|
||||
|
||||
<div className="flex items-start justify-between gap-4">
|
||||
<Disclosure summary="How outcomes are measured">
|
||||
<p className="text-xs text-gray-400">
|
||||
Each setup is replayed against the daily bars after its detection: a{' '}
|
||||
<span className="text-emerald-400">win</span> means the target was reached before the
|
||||
stop, a <span className="text-red-400">loss</span> means the stop was hit first (bars
|
||||
where both levels fall inside the same day count conservatively as losses). Setups with
|
||||
neither level hit within 30 trading days <span className="text-gray-300">expire</span> at
|
||||
0R. Avg R is the expectancy per trade: wins earn their R:R ratio, losses cost −1R — a
|
||||
positive value means the signals have been profitable on a risk-adjusted basis. The
|
||||
evaluator runs nightly after OHLCV collection. Only setups whose full 30-day window has
|
||||
elapsed are counted — younger ones show as <span className="text-gray-300">maturing</span>,
|
||||
since near stops resolve in days while far targets need time, so early numbers would skew
|
||||
negative.
|
||||
</p>
|
||||
</Disclosure>
|
||||
<div className="flex shrink-0 items-center gap-2">
|
||||
<Button onClick={() => evaluateMutation.mutate()} loading={evaluateMutation.isPending}>
|
||||
{evaluateMutation.isPending ? 'Evaluating…' : 'Evaluate Now'}
|
||||
</Button>
|
||||
<Button variant="danger" onClick={onReset} loading={resetMutation.isPending}>
|
||||
{resetMutation.isPending ? 'Resetting…' : 'Reset'}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{isLoading && (
|
||||
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
|
||||
<SkeletonCard /><SkeletonCard /><SkeletonCard /><SkeletonCard />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{isError && (
|
||||
<Callout variant="error">
|
||||
{error instanceof Error ? error.message : 'Failed to load performance stats'}
|
||||
</Callout>
|
||||
)}
|
||||
|
||||
{data && data.overall.total === 0 && (
|
||||
<Callout variant="empty">
|
||||
{data.maturing > 0
|
||||
? `No setups have completed their ~30-day evaluation window yet — ${data.maturing} still maturing. ` +
|
||||
'Stats appear once a setup’s full window has elapsed; counting them earlier would skew toward quick stop-outs.'
|
||||
: qualifiedOnly
|
||||
? 'No matured setups meet the activation thresholds yet. Untick "Qualified signals only" to see all, or wait for more outcomes.'
|
||||
: 'No matured setups yet. Outcomes appear once setups complete their evaluation window — the evaluator runs nightly, or click Evaluate Now.'}
|
||||
</Callout>
|
||||
)}
|
||||
|
||||
{data && data.overall.total > 0 && (
|
||||
<>
|
||||
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
|
||||
<StatCard
|
||||
label="Hit Rate"
|
||||
value={fmtPct(data.overall.hit_rate)}
|
||||
sub={`${data.overall.wins} wins / ${data.overall.losses} losses`}
|
||||
/>
|
||||
<StatCard
|
||||
label="Expectancy"
|
||||
value={fmtR(data.overall.avg_r)}
|
||||
valueClass={rColor(data.overall.avg_r)}
|
||||
sub="average R per trade"
|
||||
/>
|
||||
<StatCard
|
||||
label="Total R"
|
||||
value={fmtR(data.overall.total_r)}
|
||||
valueClass={rColor(data.overall.total_r)}
|
||||
sub="cumulative risk-adjusted result"
|
||||
/>
|
||||
<StatCard
|
||||
label="Matured"
|
||||
value={String(data.overall.total)}
|
||||
sub={`${data.maturing} maturing · ${data.overall.expired} expired`}
|
||||
/>
|
||||
<Section title="Live vs Backtest" hint="is the live system tracking the backtest?">
|
||||
{isError ? (
|
||||
<Callout variant="error">
|
||||
{error instanceof Error ? error.message : 'Failed to load performance stats'}
|
||||
</Callout>
|
||||
) : (
|
||||
<div className="glass-sm space-y-2.5 p-4">
|
||||
<div className="flex flex-wrap items-center justify-between gap-x-6 gap-y-2">
|
||||
<div className="flex flex-wrap items-baseline gap-x-5 gap-y-1">
|
||||
<span className="text-sm text-gray-400">
|
||||
Live <span className={`num font-semibold ${rColor(liveAvgR)}`}>{fmtR(liveAvgR)}</span>
|
||||
</span>
|
||||
<span className="text-sm text-gray-400">
|
||||
Backtest <span className={`num font-semibold ${rColor(btAvgR)}`}>{fmtR(btAvgR)}</span>
|
||||
</span>
|
||||
<span className="text-xs text-gray-500">
|
||||
{liveN} matured{data ? ` · ${data.maturing} maturing` : ''} · {qualifiedOnly ? 'qualified' : 'all setups'}
|
||||
</span>
|
||||
</div>
|
||||
<VerdictChip status={status} />
|
||||
</div>
|
||||
<p className="text-[11px] leading-relaxed text-gray-500">{verdictNote[status]}</p>
|
||||
</div>
|
||||
)}
|
||||
</Section>
|
||||
|
||||
<Section title="By Direction">
|
||||
<BreakdownTable rows={data.by_direction} labelHeader="Direction" />
|
||||
</Section>
|
||||
<Disclosure summary="Outcome details (matured cohort)">
|
||||
<div className="space-y-4 pt-1">
|
||||
<label className="flex w-fit cursor-pointer items-center gap-2.5 text-sm text-gray-300">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={qualifiedOnly}
|
||||
onChange={(e) => setQualifiedOnly(e.target.checked)}
|
||||
className="h-4 w-4 cursor-pointer accent-blue-400"
|
||||
/>
|
||||
<span>
|
||||
Qualified signals only
|
||||
{activation.data && (
|
||||
<span className="num ml-2 text-xs text-gray-500">{activationSummary(activation.data)}</span>
|
||||
)}
|
||||
</span>
|
||||
</label>
|
||||
|
||||
<Section title="By Recommended Action">
|
||||
<BreakdownTable rows={data.by_action} labelHeader="Action" mapLabel={actionLabel} />
|
||||
</Section>
|
||||
{isLoading && (
|
||||
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
|
||||
<SkeletonCard /><SkeletonCard /><SkeletonCard /><SkeletonCard />
|
||||
</div>
|
||||
)}
|
||||
|
||||
<Section title="By Confidence" hint="at detection time">
|
||||
<BreakdownTable rows={data.by_confidence} labelHeader="Confidence" />
|
||||
</Section>
|
||||
</>
|
||||
)}
|
||||
{data && data.overall.total === 0 && (
|
||||
<Callout variant="empty">
|
||||
{data.maturing > 0
|
||||
? `No setups have completed their ~30-day window yet — ${data.maturing} still maturing. ` +
|
||||
'Counting them earlier would skew toward quick stop-outs.'
|
||||
: 'No matured setups yet. Outcomes appear once setups complete their evaluation window — the evaluator runs nightly, or click Evaluate Now.'}
|
||||
</Callout>
|
||||
)}
|
||||
|
||||
{data && data.overall.total > 0 && (
|
||||
<>
|
||||
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
|
||||
<StatCard
|
||||
label="Hit Rate"
|
||||
value={fmtPct(data.overall.hit_rate)}
|
||||
sub={`${data.overall.wins} wins / ${data.overall.losses} losses`}
|
||||
/>
|
||||
<StatCard
|
||||
label="Expectancy"
|
||||
value={fmtR(data.overall.avg_r)}
|
||||
valueClass={rColor(data.overall.avg_r)}
|
||||
sub="average R per trade"
|
||||
/>
|
||||
<StatCard
|
||||
label="Total R"
|
||||
value={fmtR(data.overall.total_r)}
|
||||
valueClass={rColor(data.overall.total_r)}
|
||||
sub="cumulative risk-adjusted result"
|
||||
/>
|
||||
<StatCard
|
||||
label="Matured"
|
||||
value={String(data.overall.total)}
|
||||
sub={`${data.maturing} maturing · ${data.overall.expired} expired`}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<Section title="By Recommended Action">
|
||||
<BreakdownTable rows={data.by_action} labelHeader="Action" mapLabel={actionLabel} />
|
||||
</Section>
|
||||
|
||||
<Section title="By Confidence" hint="at detection time · all setups">
|
||||
<BreakdownTable rows={data.by_confidence} labelHeader="Confidence" />
|
||||
</Section>
|
||||
</>
|
||||
)}
|
||||
|
||||
<div className="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-3">
|
||||
<p className="max-w-2xl text-xs text-gray-500">
|
||||
Each setup is replayed against the daily bars after detection: target before stop = win,
|
||||
stop first = loss (both in one bar counts conservatively as a loss), neither within 30
|
||||
trading days = expired at 0R. Only setups whose full window has elapsed are counted; younger
|
||||
ones are still <span className="text-gray-300">maturing</span> (near stops resolve fast, far
|
||||
targets need time, so early numbers would skew negative). The evaluator runs nightly.
|
||||
</p>
|
||||
<div className="flex shrink-0 items-center gap-2">
|
||||
<Button onClick={() => evaluateMutation.mutate()} loading={evaluateMutation.isPending}>
|
||||
{evaluateMutation.isPending ? 'Evaluating…' : 'Evaluate Now'}
|
||||
</Button>
|
||||
<Button variant="danger" onClick={onReset} loading={resetMutation.isPending}>
|
||||
{resetMutation.isPending ? 'Resetting…' : 'Reset'}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Disclosure>
|
||||
|
||||
<div className="border-t border-white/[0.06] pt-2" />
|
||||
<BacktestPanel />
|
||||
|
||||
Reference in New Issue
Block a user