signal-platform/frontend/src/components/signals/BacktestPanel.tsx

import { useMutation, useQueryClient } from '@tanstack/react-query';
import { useBacktestReport } from '../../hooks/useMarketRegime';
import { triggerJob } from '../../api/admin';
import { Button } from '../ui/Button';
import { Callout } from '../ui/Callout';
import { Disclosure } from '../ui/Disclosure';
import { Section } from '../ui/Section';
import { useToast } from '../ui/Toast';
import type { BacktestBucket } from '../../lib/types';

function fmtR(v: number | null): string {
  if (v === null) return '—';
  return `${v > 0 ? '+' : ''}${v.toFixed(2)}R`;
}
function fmtPct(v: number | null): string {
  return v === null ? '—' : `${v.toFixed(1)}%`;
}
function rColor(v: number | null): string {
  if (v === null) return 'text-gray-400';
  if (v > 0) return 'text-emerald-400';
  if (v < 0) return 'text-red-400';
  return 'text-gray-300';
}

const SIGNAL_LABELS: Record<string, string> = {
  mom_12_1: '12–1 month momentum',
  mom_6_1: '6–1 month momentum',
  mom_3_1: '3–1 month momentum',
  reversal_1m: '1-month reversal',
  trend_200: 'Price vs 200-day SMA',
  high_52w: 'Proximity to 52-week high',
  vol_6m: '6-month realized volatility',
};

// An |IC| this large, with a consistent sign, is a real (if small) edge worth
// building on; below it, ranking on the signal sorts essentially nothing.
const IC_EDGE_THRESHOLD = 0.03;

function icColor(v: number): string {
  if (Math.abs(v) < 0.02) return 'text-gray-400';
  return v > 0 ? 'text-emerald-400' : 'text-red-400';
}
function fmtSpread(v: number | null): string {
  if (v === null) return '—';
  return `${v > 0 ? '+' : ''}${(v * 100).toFixed(2)}%`;
}

function timeAgo(iso: string): string {
  const mins = Math.floor((Date.now() - new Date(iso).getTime()) / 60_000);
  if (mins < 1) return 'just now';
  if (mins < 60) return `${mins}m ago`;
  const hrs = Math.floor(mins / 60);
  if (hrs < 24) return `${hrs}h ago`;
  return `${Math.floor(hrs / 24)}d ago`;
}

function Stat({ label, value, valueClass = 'text-gray-100', sub }: {
  label: string; value: string; valueClass?: string; sub?: string;
}) {
  return (
    <div className="glass p-4">
      <p className="section-index">{label}</p>
      <p className={`num mt-1.5 text-2xl font-semibold ${valueClass}`}>{value}</p>
      {sub && <p className="mt-1 text-xs text-gray-500">{sub}</p>}
    </div>
  );
}

function BucketRow({ label, b }: { label: string; b: BacktestBucket }) {
  return (
    <tr className="border-b border-white/[0.04]">
      <td className="px-4 py-2.5 font-medium text-gray-200">{label}</td>
      <td className="num px-4 py-2.5 text-right text-gray-300">{b.total}</td>
      <td className="num px-4 py-2.5 text-right text-emerald-400">{b.wins}</td>
      <td className="num px-4 py-2.5 text-right text-red-400">{b.losses}</td>
      <td className="num px-4 py-2.5 text-right text-gray-400">{b.expired}</td>
      <td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(b.hit_rate)}</td>
      <td className={`num px-4 py-2.5 text-right ${rColor(b.avg_r)}`}>{fmtR(b.avg_r)}</td>
    </tr>
  );
}

export function BacktestPanel() {
  const { data: report, isLoading } = useBacktestReport();
  const queryClient = useQueryClient();
  const toast = useToast();

  const run = useMutation({
    mutationFn: () => triggerJob('backtest'),
    onSuccess: (res) => {
      if (res.status === 'triggered') {
        toast.addToast('success', 'Backtest started — results appear when it finishes (a minute or two).');
        setTimeout(() => queryClient.invalidateQueries({ queryKey: ['backtest-report'] }), 8000);
      } else {
        toast.addToast('info', res.message || 'Could not start backtest');
      }
    },
    onError: () => toast.addToast('error', 'Failed to start backtest'),
  });

  return (
    <Section title="Backtest" hint="historical replay of the current config">
      <div className="space-y-4">
        <div className="flex flex-wrap items-start justify-between gap-3">
          <Disclosure summary="How the backtest works">
            <p className="text-xs text-gray-400">
              At each weekly point in history, the setup is rebuilt using only data up to that day
              (no lookahead), then the actual following ~30 trading days decide its outcome. This
              shows how the <em>current</em> settings would have performed. Sentiment and
              fundamentals are held neutral (no point-in-time history), so this calibrates the
              price / support-resistance / probability machinery. ~6 months of data is roughly one
              market regime — read it as directional, not a guarantee.
            </p>
          </Disclosure>
          <Button onClick={() => run.mutate()} loading={run.isPending} className="shrink-0">
            {run.isPending ? 'Starting…' : report ? 'Re-run backtest' : 'Run backtest'}
          </Button>
        </div>

        {isLoading && <Callout variant="empty">Loading…</Callout>}

        {!isLoading && !report && (
          <Callout variant="empty">
            No backtest yet. Click “Run backtest” (or trigger it in Admin → Jobs) — it replays every
            ticker over history and takes a minute or two.
          </Callout>
        )}

        {report && (
          <>
            <p className="text-[11px] text-gray-500">
              Ran {timeAgo(report.generated_at)} · {report.tickers} tickers · {report.candidates} setups
              ({report.qualified} qualified) · weekly cadence, {report.params.horizon_days}-day horizon
            </p>

            <div className="grid gap-3 sm:grid-cols-2 lg:grid-cols-4">
              <Stat
                label="Qualified Hit Rate"
                value={fmtPct(report.overall_qualified.hit_rate)}
                sub={`${report.overall_qualified.wins}W / ${report.overall_qualified.losses}L`}
              />
              <Stat
                label="Qualified Expectancy"
                value={fmtR(report.overall_qualified.avg_r)}
                valueClass={rColor(report.overall_qualified.avg_r)}
                sub="avg R per qualified setup"
              />
              <Stat
                label="All Setups Expectancy"
                value={fmtR(report.overall_all.avg_r)}
                valueClass={rColor(report.overall_all.avg_r)}
                sub={`${report.overall_all.total} setups · baseline`}
              />
              <Stat
                label="Qualified Total R"
                value={fmtR(report.overall_qualified.total_r)}
                valueClass={rColor(report.overall_qualified.total_r)}
                sub="cumulative, risk-adjusted"
              />
            </div>

            <div className="glass overflow-x-auto">
              <table className="w-full text-sm">
                <thead>
                  <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
                    <th className="px-4 py-2.5">Set</th>
                    <th className="px-4 py-2.5 text-right">Setups</th>
                    <th className="px-4 py-2.5 text-right">Wins</th>
                    <th className="px-4 py-2.5 text-right">Losses</th>
                    <th className="px-4 py-2.5 text-right">Expired</th>
                    <th className="px-4 py-2.5 text-right">Hit Rate</th>
                    <th className="px-4 py-2.5 text-right">Avg R</th>
                  </tr>
                </thead>
                <tbody>
                  <BucketRow label="Qualified" b={report.overall_qualified} />
                  <BucketRow label="All" b={report.overall_all} />
                  {report.by_direction.long && <BucketRow label="Long (qual.)" b={report.by_direction.long} />}
                  {report.by_direction.short && <BucketRow label="Short (qual.)" b={report.by_direction.short} />}
                </tbody>
              </table>
            </div>

            {/* Guard on the new field so a stale cached report (pre-momentum,
                with min_expected_value rows) hides the sweep instead of crashing
                the whole page. Re-running the backtest repopulates it. */}
            {report.sweep && report.sweep.length > 0 && report.sweep[0].min_momentum_percentile != null && (
              <div>
                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
                  Momentum-percentile sweep
                </p>
                <p className="mb-2 text-[11px] text-gray-500">
                  How many setups qualify — and how they perform — at each momentum-rank cutoff (floors
                  held fixed). 80 = only the top 20% of the universe by 12-1 momentum each week; 0 =
                  floors only. Lower = more trades, watch that expectancy holds. Your current setting is
                  highlighted; set it in Admin → Settings → Activation.
                </p>
                <div className="glass overflow-x-auto">
                  <table className="w-full text-sm">
                    <thead>
                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
                        <th className="px-4 py-2.5">Min momentum %ile</th>
                        <th className="px-4 py-2.5 text-right">Qualified</th>
                        <th className="px-4 py-2.5 text-right">Wins</th>
                        <th className="px-4 py-2.5 text-right">Losses</th>
                        <th className="px-4 py-2.5 text-right">Hit Rate</th>
                        <th className="px-4 py-2.5 text-right">Avg R</th>
                        <th className="px-4 py-2.5 text-right">Total R</th>
                      </tr>
                    </thead>
                    <tbody>
                      {report.sweep.map((row) => {
                        const current = Math.abs(row.min_momentum_percentile - report.min_momentum_percentile) < 0.001;
                        return (
                          <tr key={row.min_momentum_percentile} className={`border-b border-white/[0.04] ${current ? 'bg-blue-400/10' : ''}`}>
                            <td className="num px-4 py-2.5 text-gray-200">
                              {current && <span className="mr-1 text-blue-300">★</span>}
                              {row.min_momentum_percentile.toFixed(0)}
                            </td>
                            <td className="num px-4 py-2.5 text-right text-gray-200">{row.total}</td>
                            <td className="num px-4 py-2.5 text-right text-emerald-400">{row.wins}</td>
                            <td className="num px-4 py-2.5 text-right text-red-400">{row.losses}</td>
                            <td className="num px-4 py-2.5 text-right text-gray-200">{fmtPct(row.hit_rate)}</td>
                            <td className={`num px-4 py-2.5 text-right font-semibold ${rColor(row.avg_r)}`}>{fmtR(row.avg_r)}</td>
                            <td className={`num px-4 py-2.5 text-right ${rColor(row.total_r)}`}>{fmtR(row.total_r)}</td>
                          </tr>
                        );
                      })}
                    </tbody>
                  </table>
                </div>
              </div>
            )}

            <div>
              <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
                Probability calibration
              </p>
              <p className="mb-2 text-[11px] text-gray-500">
                Do targets we call “X% likely” actually hit that often? Realized below predicted =
                the model is over-confident.
              </p>
              {report.calibration.length === 0 ? (
                <Callout variant="empty">Not enough resolved setups to calibrate.</Callout>
              ) : (
                <div className="glass overflow-x-auto">
                  <table className="w-full text-sm">
                    <thead>
                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
                        <th className="px-4 py-2.5">Predicted Bucket</th>
                        <th className="px-4 py-2.5 text-right">Setups</th>
                        <th className="px-4 py-2.5 text-right">Avg Predicted</th>
                        <th className="px-4 py-2.5 text-right">Realized Hit Rate</th>
                      </tr>
                    </thead>
                    <tbody>
                      {report.calibration.map((row) => {
                        const over = row.realized_hit_rate < row.predicted_avg;
                        return (
                          <tr key={row.bucket} className="border-b border-white/[0.04]">
                            <td className="px-4 py-2.5 text-gray-200">{row.bucket}</td>
                            <td className="num px-4 py-2.5 text-right text-gray-300">{row.n}</td>
                            <td className="num px-4 py-2.5 text-right text-gray-400">{row.predicted_avg.toFixed(0)}%</td>
                            <td className={`num px-4 py-2.5 text-right font-semibold ${over ? 'text-amber-400' : 'text-emerald-400'}`}>
                              {row.realized_hit_rate.toFixed(0)}%
                            </td>
                          </tr>
                        );
                      })}
                    </tbody>
                  </table>
                </div>
              )}
            </div>

            {report.signal_eval && report.signal_eval.length > 0 && (
              <div>
                <p className="mb-2 text-xs font-medium uppercase tracking-widest text-gray-500">
                  Signal edge (cross-sectional)
                </p>
                <p className="mb-2 text-[11px] text-gray-500">
                  Does ranking the universe by a signal predict the forward {report.params.horizon_days}-day
                  return? Mean IC is the rank correlation between signal and return, averaged over
                  non-overlapping windows. <span className="text-emerald-400">|IC| ≳ {IC_EDGE_THRESHOLD}</span> with a
                  consistent sign (high IC&gt;0 %) is a real, if small, edge; near 0 means it sorts nothing.
                  Momentum skips the last month; <em>reversal_1m is expected negative</em> if the universe
                  mean-reverts. Q5−Q1 is the top-minus-bottom-quintile forward return. <span className="text-gray-600">Greyed
                  rows have too few independent windows to trust — deepen history via the Data Backfill job.</span>
                </p>
                <div className="glass overflow-x-auto">
                  <table className="w-full text-sm">
                    <thead>
                      <tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
                        <th className="px-4 py-2.5">Signal</th>
                        <th className="px-4 py-2.5 text-right">Weeks</th>
                        <th className="px-4 py-2.5 text-right">Avg N</th>
                        <th className="px-4 py-2.5 text-right">Mean IC</th>
                        <th className="px-4 py-2.5 text-right">t-stat</th>
                        <th className="px-4 py-2.5 text-right">IC&gt;0 %</th>
                        <th className="px-4 py-2.5 text-right">Q5−Q1 fwd</th>
                      </tr>
                    </thead>
                    <tbody>
                      {report.signal_eval.map((row) => {
                        // Only trust the edge highlight when the IC rests on enough
                        // independent windows; thin signals are dimmed, not starred.
                        const edge = row.reliable && Math.abs(row.mean_ic) >= IC_EDGE_THRESHOLD;
                        return (
                          <tr
                            key={row.signal}
                            className={`border-b border-white/[0.04] ${edge ? 'bg-emerald-400/[0.06]' : ''} ${row.reliable ? '' : 'opacity-40'}`}
                            title={row.reliable ? undefined : `Only ${row.weeks} independent window(s) — not enough to trust`}
                          >
                            <td className="px-4 py-2.5 font-medium text-gray-200">
                              {edge && <span className="mr-1 text-emerald-300">★</span>}
                              {SIGNAL_LABELS[row.signal] ?? row.signal}
                            </td>
                            <td className="num px-4 py-2.5 text-right text-gray-400">{row.weeks}</td>
                            <td className="num px-4 py-2.5 text-right text-gray-400">{row.avg_cross_section ?? '—'}</td>
                            <td className={`num px-4 py-2.5 text-right font-semibold ${icColor(row.mean_ic)}`}>
                              {row.mean_ic.toFixed(3)}
                            </td>
                            <td className="num px-4 py-2.5 text-right text-gray-300">
                              {row.ic_t_stat === null ? '—' : row.ic_t_stat.toFixed(2)}
                            </td>
                            <td className="num px-4 py-2.5 text-right text-gray-300">{fmtPct(row.ic_positive_pct)}</td>
                            <td className={`num px-4 py-2.5 text-right ${rColor(row.mean_quintile_spread)}`}>
                              {fmtSpread(row.mean_quintile_spread)}
                            </td>
                          </tr>
                        );
                      })}
                    </tbody>
                  </table>
                </div>
                {report.signal_eval_note && (
                  <p className="mt-2 text-[11px] text-gray-600">{report.signal_eval_note}</p>
                )}
              </div>
            )}

            <p className="text-[11px] text-gray-600">{report.note}</p>
          </>
        )}
      </div>
    </Section>
  );
}