feat: sharpen the event study — more events, fair baseline, per-event view

The first run gave only 2 events (N=2 is anecdote, not evidence) and an unfairly weak coincident baseline, so the +42d lead couldn't be trusted. This makes the measurement meaningful: - More, cleaner events: default drawdown threshold 15%→10%, and dedup switched from "recover to the high" to a rising-edge + cooldown (40d), so distinct drawdowns each register instead of merging. - Fair comparison: each indicator now warns at its OWN 80th percentile instead of a shared absolute 60, removing the artifact that muted the coincident baseline. - Per-event breakdown (date · depth · breadth lead · coincident lead) so a median over a tiny sample can't hide an apples-to-oranges comparison — you see whether both warned on the same drawdown. - Surface precision/recall (best row) + base rate per indicator — the honest edge read, not just lead time. Re-run the Event Study job to regenerate the cached report in the new shape. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-26 14:54:29 +02:00
parent f8d62e4074
commit 7c5fb1138d
4 changed files with 173 additions and 35 deletions
@@ -34,12 +34,15 @@ logger = logging.getLogger(__name__)

 KEY_REPORT = "regime_event_study"

-# Defaults — admin-tunable later if needed.
-EVENT_THRESHOLD_PCT = 15.0   # drawdown from the 52w high that counts as a "break"
-RECOVER_PCT = 5.0            # must recover to within this of the high before a new event
+# Defaults. The 15% threshold gave only 2 events in 5y (statistically useless),
+# so the default is lower with a cooldown-based dedup to surface more, cleaner
+# events. Each indicator "warns" at its OWN 80th percentile rather than a shared
+# absolute level, so the leading vs. coincident comparison is fair across scales.
+EVENT_THRESHOLD_PCT = 10.0   # drawdown from the 52w high that counts as a "break"
+COOLDOWN_DAYS = 40          # min trading days between event onsets (dedup)
 DRAWDOWN_LOOKBACK = 252      # 52-week trailing high
 HORIZON_DAYS = 20           # signal-centered prediction horizon
-WARN_THRESHOLD = 60.0       # indicator level treated as "warning on"
+WARN_PERCENTILE = 80.0      # each indicator warns at its own Nth percentile
 PRE, POST = 60, 20          # event-centered window (trading days)


@@ -52,6 +55,17 @@ def _median(values: list[float]) -> float | None:
    return float(s[mid]) if n % 2 else (s[mid - 1] + s[mid]) / 2.0


+def _percentile(values: list[float], pct: float) -> float | None:
+    """Linear-interpolated percentile of the non-None values."""
+    vals = sorted(v for v in values if v is not None)
+    if not vals:
+        return None
+    k = (len(vals) - 1) * (pct / 100.0)
+    lo = int(k)
+    hi = min(lo + 1, len(vals) - 1)
+    return vals[lo] + (vals[hi] - vals[lo]) * (k - lo)
+
+
 # ---------------------------------------------------------------------------
 # Event detection
 # ---------------------------------------------------------------------------
@@ -61,22 +75,23 @@ def detect_events(
    dates: list[date],
    threshold_pct: float = EVENT_THRESHOLD_PCT,
    lookback: int = DRAWDOWN_LOOKBACK,
-    recover_pct: float = RECOVER_PCT,
+    cooldown: int = COOLDOWN_DAYS,
 ) -> list[dict]:
-    """Drawdown events: ``t0`` = first day the drawdown from the trailing 52w high
-    crosses ``threshold_pct``. De-duplicated — a new event needs a recovery back to
-    within ``recover_pct`` of the high first (so one decline = one event)."""
+    """Drawdown events: ``t0`` = a day the drawdown from the trailing 52w high
+    crosses up through ``threshold_pct`` (rising edge). De-duplicated by a
+    ``cooldown`` of trading days, so a continuous decline counts once but distinct
+    drawdowns separated by a recovery each register."""
    events: list[dict] = []
-    in_event = False
+    prev_dd = 0.0
+    last_event = -10**9
    for i in range(len(closes)):
        window = closes[max(0, i - lookback + 1): i + 1]
        hi = max(window)
        dd = (hi - closes[i]) / hi * 100.0 if hi > 0 else 0.0
-        if not in_event and dd >= threshold_pct:
+        if dd >= threshold_pct and prev_dd < threshold_pct and (i - last_event) >= cooldown:
            events.append({"date": dates[i].isoformat(), "index": i, "depth_pct": round(dd, 1)})
-            in_event = True
-        elif in_event and dd <= recover_pct:
-            in_event = False
+            last_event = i
+        prev_dd = dd
    return events


@@ -84,31 +99,38 @@ def detect_events(
 # Event-centered: lead time + mean path
 # ---------------------------------------------------------------------------

+def _lead(indicator: dict[date, float], t0: int, dates: list[date], pre: int, threshold: float) -> int | None:
+    """Earliest day within ``[t0-pre, t0]`` at which the indicator crosses
+    ``threshold`` — i.e. how many days of warning before the event, or None."""
+    lead: int | None = None
+    for k in range(0, pre + 1):
+        idx = t0 - k
+        if idx < 0:
+            break
+        v = indicator.get(dates[idx])
+        if v is not None and v >= threshold:
+            lead = k  # keep going: the largest k = earliest warning in the window
+    return lead
+
+
 def event_centered(
    indicator: dict[date, float],
    events_idx: list[int],
    dates: list[date],
    pre: int = PRE,
    post: int = POST,
-    threshold: float = WARN_THRESHOLD,
+    threshold: float = 60.0,
 ) -> dict:
    """Align the indicator at each event's ``t0`` and measure how early it warned.

-    Lead = the earliest day within ``[t0-pre, t0]`` at which the indicator first
-    crosses ``threshold``. Also returns the cross-event mean path.
+    Lead time is measured against ``threshold`` (each indicator gets its own,
+    derived from its distribution). Also returns the cross-event mean path.
    """
    leads: list[float] = []
    sums: dict[int, float] = {}
    counts: dict[int, int] = {}
    for t0 in events_idx:
-        lead: int | None = None
-        for k in range(0, pre + 1):
-            idx = t0 - k
-            if idx < 0:
-                break
-            v = indicator.get(dates[idx])
-            if v is not None and v >= threshold:
-                lead = k  # keep going: the largest k = earliest warning in the window
+        lead = _lead(indicator, t0, dates, pre, threshold)
        if lead is not None:
            leads.append(lead)
        for rel in range(-pre, post + 1):
@@ -125,6 +147,7 @@ def event_centered(
        "median_lead_days": _median(leads),
        "events_with_signal": len(leads),
        "events_total": len(events_idx),
+        "warn_threshold": round(threshold, 1),
        "mean_path": mean_path,
    }

@@ -211,7 +234,8 @@ async def run_event_study(
    db: AsyncSession,
    threshold_pct: float = EVENT_THRESHOLD_PCT,
    horizon: int = HORIZON_DAYS,
-    warn_threshold: float = WARN_THRESHOLD,
+    cooldown: int = COOLDOWN_DAYS,
+    warn_percentile: float = WARN_PERCENTILE,
 ) -> dict:
    """Run the study: detect events on the benchmark, then measure breadth-divergence
    vs. the coincident price composite. Best-effort; returns available=False on no data."""
@@ -227,23 +251,40 @@ async def run_event_study(

    dates = [d for d, _ in bench]
    closes = [c for _, c in bench]
-    events = detect_events(closes, dates, threshold_pct)
+    events = detect_events(closes, dates, threshold_pct, cooldown=cooldown)
    events_idx = [e["index"] for e in events]

    breadth = await breadth_service.compute_breadth_series(db)
    divergence = breadth_service.compute_divergence_series(breadth, bench)
    coincident = _coincident_series(prices, dates, config)

-    def _evaluate(series: dict[date, float]) -> dict:
+    # Each indicator warns at its OWN distribution's percentile, so a leading
+    # indicator isn't penalised for living on a different scale than the baseline.
+    warn = {
+        "breadth_divergence": _percentile(list(divergence.values()), warn_percentile) or 60.0,
+        "coincident_price": _percentile(list(coincident.values()), warn_percentile) or 60.0,
+    }
+    series_by_key = {"breadth_divergence": divergence, "coincident_price": coincident}
+
+    def _evaluate(series: dict[date, float], threshold: float) -> dict:
        return {
-            **event_centered(series, events_idx, dates, threshold=warn_threshold),
+            **event_centered(series, events_idx, dates, threshold=threshold),
            "signal": signal_centered(series, events_idx, dates, horizon),
        }

-    indicators = {
-        "breadth_divergence": _evaluate(divergence),
-        "coincident_price": _evaluate(coincident),
-    }
+    indicators = {key: _evaluate(series_by_key[key], warn[key]) for key in series_by_key}
+
+    # Per-event comparison: which event, and each indicator's lead on THAT event —
+    # so a median over a tiny sample can't hide an apples-to-oranges comparison.
+    per_event = [
+        {
+            "date": e["date"],
+            "depth_pct": e["depth_pct"],
+            "breadth_lead": _lead(divergence, e["index"], dates, PRE, warn["breadth_divergence"]),
+            "coincident_lead": _lead(coincident, e["index"], dates, PRE, warn["coincident_price"]),
+        }
+        for e in events
+    ]

    bd = indicators["breadth_divergence"]["median_lead_days"]
    cd = indicators["coincident_price"]["median_lead_days"]
@@ -261,11 +302,13 @@ async def run_event_study(
        "params": {
            "benchmark": leader,
            "event_threshold_pct": threshold_pct,
+            "cooldown_days": cooldown,
            "horizon_days": horizon,
-            "warn_threshold": warn_threshold,
+            "warn_percentile": warn_percentile,
        },
        "events": events,
        "indicators": indicators,
+        "per_event": per_event,
        "lead_delta_days": lead_delta,
        "recent_breadth": recent_breadth,
    }
@@ -316,6 +316,7 @@ export interface EventStudyLeadStats {
  median_lead_days: number | null;
  events_with_signal: number;
  events_total: number;
+  warn_threshold: number;
  mean_path: { rel_day: number; value: number }[];
  signal: {
    base_rate: number;
@@ -324,6 +325,13 @@ export interface EventStudyLeadStats {
  };
 }

+export interface EventStudyPerEvent {
+  date: string;
+  depth_pct: number;
+  breadth_lead: number | null;
+  coincident_lead: number | null;
+}
+
 export interface EventStudyReport {
  available: boolean;
  reason?: string;
@@ -331,14 +339,16 @@ export interface EventStudyReport {
  params?: {
    benchmark: string;
    event_threshold_pct: number;
+    cooldown_days: number;
    horizon_days: number;
-    warn_threshold: number;
+    warn_percentile: number;
  };
  events?: { date: string; index: number; depth_pct: number }[];
  indicators?: {
    breadth_divergence: EventStudyLeadStats;
    coincident_price: EventStudyLeadStats;
  };
+  per_event?: EventStudyPerEvent[];
  lead_delta_days?: number | null;
  recent_breadth?: { date: string; breadth: number; divergence: number | null }[];
 }
@@ -23,6 +23,7 @@ import type {
  RegimeFundamentals,
  EventStudyReport,
  EventStudyLeadStats,
+  EventStudyPerEvent,
 } from '../lib/types';

 const BAND_STYLES: Record<RegimeBand, { text: string; bar: string; ring: string; label: string }> = {
@@ -285,7 +286,22 @@ function Sparkline({ values, color = '#60a5fa', height = 28 }: { values: number[
  );
 }

+function pctLabel(v: number | null): string {
+  return v == null ? '—' : `${Math.round(v * 100)}%`;
+}
+
+function leadLabel(v: number | null): string {
+  return v == null ? 'missed' : `${v}d`;
+}
+
+function bestPr(stats: EventStudyLeadStats) {
+  const rows = stats.signal.rows.filter((r) => r.precision != null && r.recall != null && r.recall > 0);
+  if (!rows.length) return null;
+  return rows.reduce((a, b) => ((b.precision ?? 0) > (a.precision ?? 0) ? b : a));
+}
+
 function LeadStat({ label, stats, highlight }: { label: string; stats: EventStudyLeadStats; highlight?: boolean }) {
+  const pr = bestPr(stats);
  return (
    <div className={`rounded-lg border px-3 py-2 ${highlight ? 'border-blue-400/30 bg-blue-400/[0.06]' : 'border-white/[0.06] bg-white/[0.02]'}`}>
      <div className="text-xs text-gray-500">{label}</div>
@@ -293,8 +309,46 @@ function LeadStat({ label, stats, highlight }: { label: string; stats: EventStud
        {stats.median_lead_days != null ? `${stats.median_lead_days}d lead` : 'no signal'}
      </div>
      <div className="text-[11px] text-gray-600">
-        {stats.events_with_signal}/{stats.events_total} events warned
+        {stats.events_with_signal}/{stats.events_total} warned
+        {stats.warn_threshold != null ? ` · warn ≥ ${Math.round(stats.warn_threshold)}` : ''}
      </div>
+      {pr && (
+        <div className="text-[11px] text-gray-600">
+          best P {pctLabel(pr.precision)} · R {pctLabel(pr.recall)} @ {pr.threshold}
+        </div>
+      )}
+    </div>
+  );
+}
+
+function PerEventTable({ rows }: { rows: EventStudyPerEvent[] }) {
+  return (
+    <div className="overflow-x-auto rounded-lg border border-white/[0.06]">
+      <table className="w-full text-xs">
+        <thead>
+          <tr className="border-b border-white/[0.06] text-left uppercase tracking-wider text-gray-500">
+            <th className="px-3 py-2 font-medium">Drawdown</th>
+            <th className="px-3 py-2 text-right font-medium">Depth</th>
+            <th className="px-3 py-2 text-right font-medium">Breadth lead</th>
+            <th className="px-3 py-2 text-right font-medium">Coincident lead</th>
+          </tr>
+        </thead>
+        <tbody>
+          {rows.map((e) => {
+            const earlier = e.breadth_lead != null && (e.coincident_lead == null || e.breadth_lead > e.coincident_lead);
+            return (
+              <tr key={e.date} className="border-b border-white/[0.03] last:border-0">
+                <td className="px-3 py-2 num text-gray-300">{e.date}</td>
+                <td className="px-3 py-2 text-right num text-gray-400">{e.depth_pct}%</td>
+                <td className={`px-3 py-2 text-right num ${earlier ? 'text-emerald-400' : 'text-gray-300'}`}>
+                  {leadLabel(e.breadth_lead)}
+                </td>
+                <td className="px-3 py-2 text-right num text-gray-300">{leadLabel(e.coincident_lead)}</td>
+              </tr>
+            );
+          })}
+        </tbody>
+      </table>
    </div>
  );
 }
@@ -325,6 +379,12 @@ function EventStudyBody({ report }: { report: EventStudyReport }) {
          {lead >= 0 ? 'earlier' : 'later'} than the coincident baseline.
        </p>
      )}
+      {report.per_event && report.per_event.length > 0 && (
+        <div className="space-y-1.5">
+          <div className="text-[11px] uppercase tracking-wider text-gray-500">Per drawdown (same events, both indicators)</div>
+          <PerEventTable rows={report.per_event} />
+        </div>
+      )}
      {recent.length > 1 && (
        <div className="flex flex-wrap items-end gap-6">
          <div>
@@ -6,6 +6,8 @@ from datetime import date, timedelta

 from app.services.breadth_service import _breadth_from_closes, compute_divergence_series
 from app.services.event_study_service import (
+    _lead,
+    _percentile,
    detect_events,
    event_centered,
    signal_centered,
@@ -40,6 +42,29 @@ def test_detect_events_two_after_recovery():
    assert len(events) == 2


+def test_detect_events_cooldown_suppresses_close_recross():
+    # Dips below threshold then re-crosses only a few bars later.
+    closes = [100.0] * 300 + [85.0] * 3 + [100.0] * 3 + [85.0] * 3
+    dates = _days(len(closes))
+    assert len(detect_events(closes, dates, threshold_pct=15.0, cooldown=40)) == 1
+    assert len(detect_events(closes, dates, threshold_pct=15.0, cooldown=3)) == 2
+
+
+def test_percentile_interpolation():
+    vals = [float(v) for v in range(0, 101, 10)]  # 0,10,...,100
+    assert _percentile(vals, 50) == 50.0
+    assert _percentile(vals, 80) == 80.0
+    assert _percentile([], 50) is None
+
+
+def test_lead_earliest_crossing():
+    dates = _days(200)
+    t0 = 120
+    indicator = {dates[i]: (70.0 if t0 - 30 <= i <= t0 else 10.0) for i in range(len(dates))}
+    assert _lead(indicator, t0, dates, pre=60, threshold=60.0) == 30
+    assert _lead(indicator, t0, dates, pre=60, threshold=80.0) is None
+
+
 # ---------------------------------------------------------------------------
 # Event-centered lead time
 # ---------------------------------------------------------------------------