Add trade setup outcome tracking and performance stats

Closes the feedback loop on R:R scanner signals: - Nightly outcome_evaluator job replays unresolved setups against daily OHLCV bars: target_hit / stop_hit / ambiguous (same-bar, counted as loss) / expired after OUTCOME_EVALUATION_MAX_BARS (default 30) - Migration 004: evaluated_at + outcome_date on trade_setups - GET /trades/performance: hit rate, expectancy (avg R), total R with breakdowns by direction, recommended action, and confidence bucket - New Performance page (stat cards, breakdown tables, Evaluate Now, methodology disclosure) wired into sidebar and mobile nav - 17 new unit tests for evaluation logic and stats aggregation Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 19:23:57 +02:00
parent d69df5df27
commit 21ed83c56c
20 changed files with 859 additions and 5 deletions
@@ -400,6 +400,7 @@ VALID_JOB_NAMES = {
    "fundamental_collector",
    "rr_scanner",
    "ticker_universe_sync",
+    "outcome_evaluator",
 }

 JOB_LABELS = {
@@ -408,6 +409,7 @@ JOB_LABELS = {
    "fundamental_collector": "Fundamental Collector",
    "rr_scanner": "R:R Scanner",
    "ticker_universe_sync": "Ticker Universe Sync",
+    "outcome_evaluator": "Outcome Evaluator",
 }


@@ -0,0 +1,222 @@
+"""Trade setup outcome evaluation service.
+
+Closes the feedback loop on R:R scanner setups: walks daily OHLCV bars
+after detection and records whether the stop or the target was hit first.
+
+Outcome semantics (entry is the close at detection time, i.e. market entry):
+  - target_hit: target reached before the stop
+  - stop_hit:   stop reached before the target
+  - ambiguous:  stop AND target both within the same daily bar — with daily
+                granularity the order is unknowable, counted as a loss in stats
+  - expired:    neither level hit within ``max_bars`` trading days
+  - (NULL):     not enough bars yet to decide — re-evaluated on the next run
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from datetime import date, datetime, timezone
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.ohlcv import OHLCVRecord
+from app.models.trade_setup import TradeSetup
+
+logger = logging.getLogger(__name__)
+
+OUTCOME_TARGET_HIT = "target_hit"
+OUTCOME_STOP_HIT = "stop_hit"
+OUTCOME_AMBIGUOUS = "ambiguous"
+OUTCOME_EXPIRED = "expired"
+
+DEFAULT_MAX_BARS = 30
+
+# Confidence buckets for the performance breakdown
+_CONFIDENCE_BUCKETS = [
+    ("<50%", 0.0, 50.0),
+    ("50-70%", 50.0, 70.0),
+    ("≥70%", 70.0, 100.01),
+]
+
+
+@dataclass(frozen=True)
+class Bar:
+    date: date
+    high: float
+    low: float
+
+
+def evaluate_setup_against_bars(
+    direction: str,
+    stop_loss: float,
+    target: float,
+    bars: list[Bar],
+    max_bars: int = DEFAULT_MAX_BARS,
+) -> tuple[str | None, date | None]:
+    """Determine a setup's outcome from daily bars strictly after detection.
+
+    Returns (outcome, outcome_date); (None, None) while still undecided.
+    """
+    for i, bar in enumerate(bars):
+        if i >= max_bars:
+            break
+        if direction == "long":
+            stop_hit = bar.low <= stop_loss
+            target_hit = bar.high >= target
+        else:
+            stop_hit = bar.high >= stop_loss
+            target_hit = bar.low <= target
+
+        if stop_hit and target_hit:
+            return OUTCOME_AMBIGUOUS, bar.date
+        if stop_hit:
+            return OUTCOME_STOP_HIT, bar.date
+        if target_hit:
+            return OUTCOME_TARGET_HIT, bar.date
+
+    if len(bars) >= max_bars:
+        return OUTCOME_EXPIRED, bars[max_bars - 1].date
+
+    return None, None
+
+
+async def evaluate_pending_setups(
+    db: AsyncSession,
+    max_bars: int = DEFAULT_MAX_BARS,
+) -> dict[str, int]:
+    """Evaluate all unevaluated trade setups against stored OHLCV data.
+
+    Bars are fetched once per ticker. Setups that cannot be decided yet
+    remain NULL and are picked up on the next run.
+    """
+    result = await db.execute(
+        select(TradeSetup).where(TradeSetup.actual_outcome.is_(None))
+    )
+    pending = list(result.scalars().all())
+
+    summary = {"evaluated": 0, "still_pending": 0, "by_outcome": {}}
+    if not pending:
+        return summary
+
+    by_ticker: dict[int, list[TradeSetup]] = {}
+    for setup in pending:
+        by_ticker.setdefault(setup.ticker_id, []).append(setup)
+
+    now = datetime.now(timezone.utc)
+
+    for ticker_id, setups in by_ticker.items():
+        earliest = min(s.detected_at for s in setups).date()
+        bars_result = await db.execute(
+            select(OHLCVRecord)
+            .where(
+                OHLCVRecord.ticker_id == ticker_id,
+                OHLCVRecord.date > earliest,
+            )
+            .order_by(OHLCVRecord.date.asc())
+        )
+        records = list(bars_result.scalars().all())
+        all_bars = [Bar(date=r.date, high=r.high, low=r.low) for r in records]
+
+        for setup in setups:
+            detected_date = setup.detected_at.date()
+            bars = [b for b in all_bars if b.date > detected_date]
+            outcome, outcome_date = evaluate_setup_against_bars(
+                setup.direction, setup.stop_loss, setup.target, bars, max_bars
+            )
+            if outcome is None:
+                summary["still_pending"] += 1
+                continue
+            setup.actual_outcome = outcome
+            setup.outcome_date = outcome_date
+            setup.evaluated_at = now
+            summary["evaluated"] += 1
+            summary["by_outcome"][outcome] = summary["by_outcome"].get(outcome, 0) + 1
+
+    await db.commit()
+    return summary
+
+
+def _realized_r(setup: TradeSetup) -> float | None:
+    """Realized result in R-multiples: win = +rr_ratio, loss = -1R, expired = 0R."""
+    if setup.actual_outcome == OUTCOME_TARGET_HIT:
+        return setup.rr_ratio
+    if setup.actual_outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS):
+        return -1.0
+    if setup.actual_outcome == OUTCOME_EXPIRED:
+        return 0.0
+    return None
+
+
+def _bucket_stats(setups: list[TradeSetup]) -> dict:
+    wins = sum(1 for s in setups if s.actual_outcome == OUTCOME_TARGET_HIT)
+    losses = sum(
+        1 for s in setups if s.actual_outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS)
+    )
+    expired = sum(1 for s in setups if s.actual_outcome == OUTCOME_EXPIRED)
+    decided = wins + losses
+    realized = [r for s in setups if (r := _realized_r(s)) is not None]
+
+    return {
+        "total": len(setups),
+        "wins": wins,
+        "losses": losses,
+        "expired": expired,
+        "hit_rate": round(wins / decided * 100, 1) if decided else None,
+        "avg_r": round(sum(realized) / len(realized), 3) if realized else None,
+        "total_r": round(sum(realized), 2) if realized else None,
+    }
+
+
+def _confidence_bucket(score: float | None) -> str | None:
+    if score is None:
+        return None
+    for label, lo, hi in _CONFIDENCE_BUCKETS:
+        if lo <= score < hi:
+            return label
+    return None
+
+
+async def get_performance_stats(db: AsyncSession) -> dict:
+    """Aggregate outcome statistics over all evaluated trade setups.
+
+    avg_r is the expectancy per trade in R-multiples (win = +rr_ratio,
+    loss = -1R, expired = 0R). A positive avg_r means the signals have
+    been profitable on a risk-adjusted basis.
+    """
+    result = await db.execute(
+        select(TradeSetup).where(TradeSetup.actual_outcome.is_not(None))
+    )
+    evaluated = list(result.scalars().all())
+
+    pending_result = await db.execute(
+        select(TradeSetup.id).where(TradeSetup.actual_outcome.is_(None))
+    )
+    pending_count = len(pending_result.scalars().all())
+
+    by_direction: dict[str, list[TradeSetup]] = {}
+    by_action: dict[str, list[TradeSetup]] = {}
+    by_confidence: dict[str, list[TradeSetup]] = {}
+
+    for setup in evaluated:
+        by_direction.setdefault(setup.direction, []).append(setup)
+        action = setup.recommended_action or "NONE"
+        by_action.setdefault(action, []).append(setup)
+        bucket = _confidence_bucket(setup.confidence_score)
+        if bucket is not None:
+            by_confidence.setdefault(bucket, []).append(setup)
+
+    bucket_order = [label for label, _, _ in _CONFIDENCE_BUCKETS]
+
+    return {
+        "overall": _bucket_stats(evaluated),
+        "pending": pending_count,
+        "by_direction": {k: _bucket_stats(v) for k, v in sorted(by_direction.items())},
+        "by_action": {k: _bucket_stats(v) for k, v in sorted(by_action.items())},
+        "by_confidence": {
+            label: _bucket_stats(by_confidence[label])
+            for label in bucket_order
+            if label in by_confidence
+        },
+    }
@@ -351,4 +351,6 @@ def _trade_setup_to_dict(setup: TradeSetup, symbol: str) -> dict:
        "reasoning": setup.reasoning,
        "risk_level": setup.risk_level,
        "actual_outcome": setup.actual_outcome,
+        "outcome_date": setup.outcome_date,
+        "evaluated_at": setup.evaluated_at,
    }