Add trade setup outcome tracking and performance stats
Closes the feedback loop on R:R scanner signals: - Nightly outcome_evaluator job replays unresolved setups against daily OHLCV bars: target_hit / stop_hit / ambiguous (same-bar, counted as loss) / expired after OUTCOME_EVALUATION_MAX_BARS (default 30) - Migration 004: evaluated_at + outcome_date on trade_setups - GET /trades/performance: hit rate, expectancy (avg R), total R with breakdowns by direction, recommended action, and confidence bucket - New Performance page (stat cards, breakdown tables, Evaluate Now, methodology disclosure) wired into sidebar and mobile nav - 17 new unit tests for evaluation logic and stats aggregation Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -45,6 +45,9 @@ class Settings(BaseSettings):
|
||||
default_watchlist_auto_size: int = 10
|
||||
default_rr_threshold: float = 1.5
|
||||
|
||||
# Outcome evaluation: trading days before an undecided setup expires
|
||||
outcome_evaluation_max_bars: int = 30
|
||||
|
||||
# Database Pool
|
||||
db_pool_size: int = 5
|
||||
db_pool_timeout: int = 30
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from datetime import datetime
|
||||
from datetime import date, datetime
|
||||
|
||||
import json
|
||||
|
||||
from sqlalchemy import DateTime, Float, ForeignKey, String, Text
|
||||
from sqlalchemy import Date, DateTime, Float, ForeignKey, String, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.database import Base
|
||||
@@ -32,6 +32,10 @@ class TradeSetup(Base):
|
||||
reasoning: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
risk_level: Mapped[str | None] = mapped_column(String(10), nullable=True)
|
||||
actual_outcome: Mapped[str | None] = mapped_column(String(20), nullable=True)
|
||||
evaluated_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True
|
||||
)
|
||||
outcome_date: Mapped[date | None] = mapped_column(Date, nullable=True)
|
||||
|
||||
ticker = relationship("Ticker", back_populates="trade_setups")
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.dependencies import get_db, require_access
|
||||
from app.schemas.common import APIEnvelope
|
||||
from app.schemas.trade_setup import RecommendationSummaryResponse, TradeSetupResponse
|
||||
from app.services.outcome_service import get_performance_stats
|
||||
from app.services.rr_scanner_service import get_trade_setup_history, get_trade_setups
|
||||
|
||||
router = APIRouter(tags=["trades"])
|
||||
@@ -48,6 +49,20 @@ async def list_trade_setups(
|
||||
return APIEnvelope(status="success", data=data)
|
||||
|
||||
|
||||
@router.get("/trades/performance", response_model=APIEnvelope)
|
||||
async def get_trade_performance(
|
||||
_user=Depends(require_access),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
) -> APIEnvelope:
|
||||
"""Aggregate outcome statistics over evaluated trade setups.
|
||||
|
||||
Outcomes are written by the nightly outcome_evaluator job (win = target
|
||||
hit first, loss = stop hit first, expired = neither within the window).
|
||||
"""
|
||||
stats = await get_performance_stats(db)
|
||||
return APIEnvelope(status="success", data=stats)
|
||||
|
||||
|
||||
@router.get("/trades/{symbol}", response_model=APIEnvelope)
|
||||
async def get_ticker_trade_setups(
|
||||
symbol: str,
|
||||
|
||||
@@ -34,6 +34,7 @@ from app.providers.fundamentals_chain import build_fundamental_provider_chain
|
||||
from app.providers.openai_sentiment import OpenAISentimentProvider
|
||||
from app.providers.protocol import SentimentData
|
||||
from app.services import fundamental_service, ingestion_service, sentiment_service
|
||||
from app.services.outcome_service import evaluate_pending_setups
|
||||
from app.services.rr_scanner_service import scan_all_tickers
|
||||
from app.services.ticker_universe_service import bootstrap_universe
|
||||
|
||||
@@ -676,6 +677,52 @@ async def scan_rr() -> None:
|
||||
_runtime_finish(job_name, "error", processed=processed, total=total, message=str(exc))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Job: Outcome Evaluator
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def evaluate_outcomes() -> None:
|
||||
"""Evaluate unresolved trade setups against OHLCV data collected since.
|
||||
|
||||
Writes actual_outcome / outcome_date / evaluated_at on each decided setup.
|
||||
Undecided setups stay pending and are re-checked on the next run.
|
||||
"""
|
||||
job_name = "outcome_evaluator"
|
||||
logger.info(json.dumps({"event": "job_start", "job": job_name}))
|
||||
_runtime_start(job_name, total=1)
|
||||
|
||||
try:
|
||||
async with async_session_factory() as db:
|
||||
if not await _is_job_enabled(db, job_name):
|
||||
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
|
||||
_runtime_finish(job_name, "skipped", processed=0, total=1, message="Disabled")
|
||||
return
|
||||
|
||||
summary = await evaluate_pending_setups(
|
||||
db, max_bars=settings.outcome_evaluation_max_bars
|
||||
)
|
||||
|
||||
_runtime_progress(job_name, processed=1, total=1)
|
||||
_runtime_finish(
|
||||
job_name, "completed", processed=1, total=1,
|
||||
message=f"Evaluated {summary['evaluated']}, pending {summary['still_pending']}",
|
||||
)
|
||||
logger.info(json.dumps({
|
||||
"event": "job_complete",
|
||||
"job": job_name,
|
||||
"summary": summary,
|
||||
}))
|
||||
except Exception as exc:
|
||||
_runtime_finish(job_name, "error", processed=0, total=1, message=str(exc))
|
||||
logger.error(json.dumps({
|
||||
"event": "job_error",
|
||||
"job": job_name,
|
||||
"error_type": type(exc).__name__,
|
||||
"message": str(exc),
|
||||
}))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Job: Ticker Universe Sync
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -804,6 +851,16 @@ def configure_scheduler() -> None:
|
||||
replace_existing=True,
|
||||
)
|
||||
|
||||
# Outcome Evaluator — nightly, after fresh OHLCV has been collected
|
||||
scheduler.add_job(
|
||||
evaluate_outcomes,
|
||||
"interval",
|
||||
hours=24,
|
||||
id="outcome_evaluator",
|
||||
name="Outcome Evaluator",
|
||||
replace_existing=True,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
json.dumps({
|
||||
"event": "scheduler_configured",
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import date, datetime
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@@ -44,4 +44,6 @@ class TradeSetupResponse(BaseModel):
|
||||
reasoning: str | None = None
|
||||
risk_level: str | None = None
|
||||
actual_outcome: str | None = None
|
||||
outcome_date: date | None = None
|
||||
evaluated_at: datetime | None = None
|
||||
recommendation_summary: RecommendationSummaryResponse | None = None
|
||||
|
||||
@@ -400,6 +400,7 @@ VALID_JOB_NAMES = {
|
||||
"fundamental_collector",
|
||||
"rr_scanner",
|
||||
"ticker_universe_sync",
|
||||
"outcome_evaluator",
|
||||
}
|
||||
|
||||
JOB_LABELS = {
|
||||
@@ -408,6 +409,7 @@ JOB_LABELS = {
|
||||
"fundamental_collector": "Fundamental Collector",
|
||||
"rr_scanner": "R:R Scanner",
|
||||
"ticker_universe_sync": "Ticker Universe Sync",
|
||||
"outcome_evaluator": "Outcome Evaluator",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,222 @@
|
||||
"""Trade setup outcome evaluation service.
|
||||
|
||||
Closes the feedback loop on R:R scanner setups: walks daily OHLCV bars
|
||||
after detection and records whether the stop or the target was hit first.
|
||||
|
||||
Outcome semantics (entry is the close at detection time, i.e. market entry):
|
||||
- target_hit: target reached before the stop
|
||||
- stop_hit: stop reached before the target
|
||||
- ambiguous: stop AND target both within the same daily bar — with daily
|
||||
granularity the order is unknowable, counted as a loss in stats
|
||||
- expired: neither level hit within ``max_bars`` trading days
|
||||
- (NULL): not enough bars yet to decide — re-evaluated on the next run
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime, timezone
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models.ohlcv import OHLCVRecord
|
||||
from app.models.trade_setup import TradeSetup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OUTCOME_TARGET_HIT = "target_hit"
|
||||
OUTCOME_STOP_HIT = "stop_hit"
|
||||
OUTCOME_AMBIGUOUS = "ambiguous"
|
||||
OUTCOME_EXPIRED = "expired"
|
||||
|
||||
DEFAULT_MAX_BARS = 30
|
||||
|
||||
# Confidence buckets for the performance breakdown
|
||||
_CONFIDENCE_BUCKETS = [
|
||||
("<50%", 0.0, 50.0),
|
||||
("50-70%", 50.0, 70.0),
|
||||
("≥70%", 70.0, 100.01),
|
||||
]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Bar:
|
||||
date: date
|
||||
high: float
|
||||
low: float
|
||||
|
||||
|
||||
def evaluate_setup_against_bars(
|
||||
direction: str,
|
||||
stop_loss: float,
|
||||
target: float,
|
||||
bars: list[Bar],
|
||||
max_bars: int = DEFAULT_MAX_BARS,
|
||||
) -> tuple[str | None, date | None]:
|
||||
"""Determine a setup's outcome from daily bars strictly after detection.
|
||||
|
||||
Returns (outcome, outcome_date); (None, None) while still undecided.
|
||||
"""
|
||||
for i, bar in enumerate(bars):
|
||||
if i >= max_bars:
|
||||
break
|
||||
if direction == "long":
|
||||
stop_hit = bar.low <= stop_loss
|
||||
target_hit = bar.high >= target
|
||||
else:
|
||||
stop_hit = bar.high >= stop_loss
|
||||
target_hit = bar.low <= target
|
||||
|
||||
if stop_hit and target_hit:
|
||||
return OUTCOME_AMBIGUOUS, bar.date
|
||||
if stop_hit:
|
||||
return OUTCOME_STOP_HIT, bar.date
|
||||
if target_hit:
|
||||
return OUTCOME_TARGET_HIT, bar.date
|
||||
|
||||
if len(bars) >= max_bars:
|
||||
return OUTCOME_EXPIRED, bars[max_bars - 1].date
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
async def evaluate_pending_setups(
|
||||
db: AsyncSession,
|
||||
max_bars: int = DEFAULT_MAX_BARS,
|
||||
) -> dict[str, int]:
|
||||
"""Evaluate all unevaluated trade setups against stored OHLCV data.
|
||||
|
||||
Bars are fetched once per ticker. Setups that cannot be decided yet
|
||||
remain NULL and are picked up on the next run.
|
||||
"""
|
||||
result = await db.execute(
|
||||
select(TradeSetup).where(TradeSetup.actual_outcome.is_(None))
|
||||
)
|
||||
pending = list(result.scalars().all())
|
||||
|
||||
summary = {"evaluated": 0, "still_pending": 0, "by_outcome": {}}
|
||||
if not pending:
|
||||
return summary
|
||||
|
||||
by_ticker: dict[int, list[TradeSetup]] = {}
|
||||
for setup in pending:
|
||||
by_ticker.setdefault(setup.ticker_id, []).append(setup)
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
for ticker_id, setups in by_ticker.items():
|
||||
earliest = min(s.detected_at for s in setups).date()
|
||||
bars_result = await db.execute(
|
||||
select(OHLCVRecord)
|
||||
.where(
|
||||
OHLCVRecord.ticker_id == ticker_id,
|
||||
OHLCVRecord.date > earliest,
|
||||
)
|
||||
.order_by(OHLCVRecord.date.asc())
|
||||
)
|
||||
records = list(bars_result.scalars().all())
|
||||
all_bars = [Bar(date=r.date, high=r.high, low=r.low) for r in records]
|
||||
|
||||
for setup in setups:
|
||||
detected_date = setup.detected_at.date()
|
||||
bars = [b for b in all_bars if b.date > detected_date]
|
||||
outcome, outcome_date = evaluate_setup_against_bars(
|
||||
setup.direction, setup.stop_loss, setup.target, bars, max_bars
|
||||
)
|
||||
if outcome is None:
|
||||
summary["still_pending"] += 1
|
||||
continue
|
||||
setup.actual_outcome = outcome
|
||||
setup.outcome_date = outcome_date
|
||||
setup.evaluated_at = now
|
||||
summary["evaluated"] += 1
|
||||
summary["by_outcome"][outcome] = summary["by_outcome"].get(outcome, 0) + 1
|
||||
|
||||
await db.commit()
|
||||
return summary
|
||||
|
||||
|
||||
def _realized_r(setup: TradeSetup) -> float | None:
|
||||
"""Realized result in R-multiples: win = +rr_ratio, loss = -1R, expired = 0R."""
|
||||
if setup.actual_outcome == OUTCOME_TARGET_HIT:
|
||||
return setup.rr_ratio
|
||||
if setup.actual_outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS):
|
||||
return -1.0
|
||||
if setup.actual_outcome == OUTCOME_EXPIRED:
|
||||
return 0.0
|
||||
return None
|
||||
|
||||
|
||||
def _bucket_stats(setups: list[TradeSetup]) -> dict:
|
||||
wins = sum(1 for s in setups if s.actual_outcome == OUTCOME_TARGET_HIT)
|
||||
losses = sum(
|
||||
1 for s in setups if s.actual_outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS)
|
||||
)
|
||||
expired = sum(1 for s in setups if s.actual_outcome == OUTCOME_EXPIRED)
|
||||
decided = wins + losses
|
||||
realized = [r for s in setups if (r := _realized_r(s)) is not None]
|
||||
|
||||
return {
|
||||
"total": len(setups),
|
||||
"wins": wins,
|
||||
"losses": losses,
|
||||
"expired": expired,
|
||||
"hit_rate": round(wins / decided * 100, 1) if decided else None,
|
||||
"avg_r": round(sum(realized) / len(realized), 3) if realized else None,
|
||||
"total_r": round(sum(realized), 2) if realized else None,
|
||||
}
|
||||
|
||||
|
||||
def _confidence_bucket(score: float | None) -> str | None:
|
||||
if score is None:
|
||||
return None
|
||||
for label, lo, hi in _CONFIDENCE_BUCKETS:
|
||||
if lo <= score < hi:
|
||||
return label
|
||||
return None
|
||||
|
||||
|
||||
async def get_performance_stats(db: AsyncSession) -> dict:
|
||||
"""Aggregate outcome statistics over all evaluated trade setups.
|
||||
|
||||
avg_r is the expectancy per trade in R-multiples (win = +rr_ratio,
|
||||
loss = -1R, expired = 0R). A positive avg_r means the signals have
|
||||
been profitable on a risk-adjusted basis.
|
||||
"""
|
||||
result = await db.execute(
|
||||
select(TradeSetup).where(TradeSetup.actual_outcome.is_not(None))
|
||||
)
|
||||
evaluated = list(result.scalars().all())
|
||||
|
||||
pending_result = await db.execute(
|
||||
select(TradeSetup.id).where(TradeSetup.actual_outcome.is_(None))
|
||||
)
|
||||
pending_count = len(pending_result.scalars().all())
|
||||
|
||||
by_direction: dict[str, list[TradeSetup]] = {}
|
||||
by_action: dict[str, list[TradeSetup]] = {}
|
||||
by_confidence: dict[str, list[TradeSetup]] = {}
|
||||
|
||||
for setup in evaluated:
|
||||
by_direction.setdefault(setup.direction, []).append(setup)
|
||||
action = setup.recommended_action or "NONE"
|
||||
by_action.setdefault(action, []).append(setup)
|
||||
bucket = _confidence_bucket(setup.confidence_score)
|
||||
if bucket is not None:
|
||||
by_confidence.setdefault(bucket, []).append(setup)
|
||||
|
||||
bucket_order = [label for label, _, _ in _CONFIDENCE_BUCKETS]
|
||||
|
||||
return {
|
||||
"overall": _bucket_stats(evaluated),
|
||||
"pending": pending_count,
|
||||
"by_direction": {k: _bucket_stats(v) for k, v in sorted(by_direction.items())},
|
||||
"by_action": {k: _bucket_stats(v) for k, v in sorted(by_action.items())},
|
||||
"by_confidence": {
|
||||
label: _bucket_stats(by_confidence[label])
|
||||
for label in bucket_order
|
||||
if label in by_confidence
|
||||
},
|
||||
}
|
||||
@@ -351,4 +351,6 @@ def _trade_setup_to_dict(setup: TradeSetup, symbol: str) -> dict:
|
||||
"reasoning": setup.reasoning,
|
||||
"risk_level": setup.risk_level,
|
||||
"actual_outcome": setup.actual_outcome,
|
||||
"outcome_date": setup.outcome_date,
|
||||
"evaluated_at": setup.evaluated_at,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user