Add trade setup outcome tracking and performance stats
Deploy / lint (push) Successful in 25s
Deploy / test (push) Successful in 1m7s
Deploy / deploy (push) Successful in 25s

Closes the feedback loop on R:R scanner signals:

- Nightly outcome_evaluator job replays unresolved setups against daily
  OHLCV bars: target_hit / stop_hit / ambiguous (same-bar, counted as
  loss) / expired after OUTCOME_EVALUATION_MAX_BARS (default 30)
- Migration 004: evaluated_at + outcome_date on trade_setups
- GET /trades/performance: hit rate, expectancy (avg R), total R with
  breakdowns by direction, recommended action, and confidence bucket
- New Performance page (stat cards, breakdown tables, Evaluate Now,
  methodology disclosure) wired into sidebar and mobile nav
- 17 new unit tests for evaluation logic and stats aggregation

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 19:23:57 +02:00
parent d69df5df27
commit 21ed83c56c
20 changed files with 859 additions and 5 deletions
+4
View File
@@ -39,6 +39,10 @@ FUNDAMENTAL_RATE_LIMIT_BACKOFF_SECONDS=15
DEFAULT_WATCHLIST_AUTO_SIZE=10
DEFAULT_RR_THRESHOLD=3.0
# Outcome Evaluation
# Trading days before an undecided setup expires at 0R
OUTCOME_EVALUATION_MAX_BARS=30
# Database Pool
DB_POOL_SIZE=5
DB_POOL_TIMEOUT=30
@@ -0,0 +1,34 @@
"""add outcome evaluation fields to trade_setups
Revision ID: 004
Revises: 003
Create Date: 2026-06-10 00:00:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = "004"
down_revision: Union[str, None] = "003"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"trade_setups",
sa.Column("evaluated_at", sa.DateTime(timezone=True), nullable=True),
)
op.add_column(
"trade_setups",
sa.Column("outcome_date", sa.Date(), nullable=True),
)
def downgrade() -> None:
op.drop_column("trade_setups", "outcome_date")
op.drop_column("trade_setups", "evaluated_at")
+3
View File
@@ -45,6 +45,9 @@ class Settings(BaseSettings):
default_watchlist_auto_size: int = 10
default_rr_threshold: float = 1.5
# Outcome evaluation: trading days before an undecided setup expires
outcome_evaluation_max_bars: int = 30
# Database Pool
db_pool_size: int = 5
db_pool_timeout: int = 30
+6 -2
View File
@@ -1,8 +1,8 @@
from datetime import datetime
from datetime import date, datetime
import json
from sqlalchemy import DateTime, Float, ForeignKey, String, Text
from sqlalchemy import Date, DateTime, Float, ForeignKey, String, Text
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
@@ -32,6 +32,10 @@ class TradeSetup(Base):
reasoning: Mapped[str | None] = mapped_column(Text, nullable=True)
risk_level: Mapped[str | None] = mapped_column(String(10), nullable=True)
actual_outcome: Mapped[str | None] = mapped_column(String(20), nullable=True)
evaluated_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
outcome_date: Mapped[date | None] = mapped_column(Date, nullable=True)
ticker = relationship("Ticker", back_populates="trade_setups")
+15
View File
@@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.dependencies import get_db, require_access
from app.schemas.common import APIEnvelope
from app.schemas.trade_setup import RecommendationSummaryResponse, TradeSetupResponse
from app.services.outcome_service import get_performance_stats
from app.services.rr_scanner_service import get_trade_setup_history, get_trade_setups
router = APIRouter(tags=["trades"])
@@ -48,6 +49,20 @@ async def list_trade_setups(
return APIEnvelope(status="success", data=data)
@router.get("/trades/performance", response_model=APIEnvelope)
async def get_trade_performance(
_user=Depends(require_access),
db: AsyncSession = Depends(get_db),
) -> APIEnvelope:
"""Aggregate outcome statistics over evaluated trade setups.
Outcomes are written by the nightly outcome_evaluator job (win = target
hit first, loss = stop hit first, expired = neither within the window).
"""
stats = await get_performance_stats(db)
return APIEnvelope(status="success", data=stats)
@router.get("/trades/{symbol}", response_model=APIEnvelope)
async def get_ticker_trade_setups(
symbol: str,
+57
View File
@@ -34,6 +34,7 @@ from app.providers.fundamentals_chain import build_fundamental_provider_chain
from app.providers.openai_sentiment import OpenAISentimentProvider
from app.providers.protocol import SentimentData
from app.services import fundamental_service, ingestion_service, sentiment_service
from app.services.outcome_service import evaluate_pending_setups
from app.services.rr_scanner_service import scan_all_tickers
from app.services.ticker_universe_service import bootstrap_universe
@@ -676,6 +677,52 @@ async def scan_rr() -> None:
_runtime_finish(job_name, "error", processed=processed, total=total, message=str(exc))
# ---------------------------------------------------------------------------
# Job: Outcome Evaluator
# ---------------------------------------------------------------------------
async def evaluate_outcomes() -> None:
"""Evaluate unresolved trade setups against OHLCV data collected since.
Writes actual_outcome / outcome_date / evaluated_at on each decided setup.
Undecided setups stay pending and are re-checked on the next run.
"""
job_name = "outcome_evaluator"
logger.info(json.dumps({"event": "job_start", "job": job_name}))
_runtime_start(job_name, total=1)
try:
async with async_session_factory() as db:
if not await _is_job_enabled(db, job_name):
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
_runtime_finish(job_name, "skipped", processed=0, total=1, message="Disabled")
return
summary = await evaluate_pending_setups(
db, max_bars=settings.outcome_evaluation_max_bars
)
_runtime_progress(job_name, processed=1, total=1)
_runtime_finish(
job_name, "completed", processed=1, total=1,
message=f"Evaluated {summary['evaluated']}, pending {summary['still_pending']}",
)
logger.info(json.dumps({
"event": "job_complete",
"job": job_name,
"summary": summary,
}))
except Exception as exc:
_runtime_finish(job_name, "error", processed=0, total=1, message=str(exc))
logger.error(json.dumps({
"event": "job_error",
"job": job_name,
"error_type": type(exc).__name__,
"message": str(exc),
}))
# ---------------------------------------------------------------------------
# Job: Ticker Universe Sync
# ---------------------------------------------------------------------------
@@ -804,6 +851,16 @@ def configure_scheduler() -> None:
replace_existing=True,
)
# Outcome Evaluator — nightly, after fresh OHLCV has been collected
scheduler.add_job(
evaluate_outcomes,
"interval",
hours=24,
id="outcome_evaluator",
name="Outcome Evaluator",
replace_existing=True,
)
logger.info(
json.dumps({
"event": "scheduler_configured",
+3 -1
View File
@@ -2,7 +2,7 @@
from __future__ import annotations
from datetime import datetime
from datetime import date, datetime
from pydantic import BaseModel, Field
@@ -44,4 +44,6 @@ class TradeSetupResponse(BaseModel):
reasoning: str | None = None
risk_level: str | None = None
actual_outcome: str | None = None
outcome_date: date | None = None
evaluated_at: datetime | None = None
recommendation_summary: RecommendationSummaryResponse | None = None
+2
View File
@@ -400,6 +400,7 @@ VALID_JOB_NAMES = {
"fundamental_collector",
"rr_scanner",
"ticker_universe_sync",
"outcome_evaluator",
}
JOB_LABELS = {
@@ -408,6 +409,7 @@ JOB_LABELS = {
"fundamental_collector": "Fundamental Collector",
"rr_scanner": "R:R Scanner",
"ticker_universe_sync": "Ticker Universe Sync",
"outcome_evaluator": "Outcome Evaluator",
}
+222
View File
@@ -0,0 +1,222 @@
"""Trade setup outcome evaluation service.
Closes the feedback loop on R:R scanner setups: walks daily OHLCV bars
after detection and records whether the stop or the target was hit first.
Outcome semantics (entry is the close at detection time, i.e. market entry):
- target_hit: target reached before the stop
- stop_hit: stop reached before the target
- ambiguous: stop AND target both within the same daily bar — with daily
granularity the order is unknowable, counted as a loss in stats
- expired: neither level hit within ``max_bars`` trading days
- (NULL): not enough bars yet to decide — re-evaluated on the next run
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from datetime import date, datetime, timezone
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.ohlcv import OHLCVRecord
from app.models.trade_setup import TradeSetup
logger = logging.getLogger(__name__)
OUTCOME_TARGET_HIT = "target_hit"
OUTCOME_STOP_HIT = "stop_hit"
OUTCOME_AMBIGUOUS = "ambiguous"
OUTCOME_EXPIRED = "expired"
DEFAULT_MAX_BARS = 30
# Confidence buckets for the performance breakdown
_CONFIDENCE_BUCKETS = [
("<50%", 0.0, 50.0),
("50-70%", 50.0, 70.0),
("≥70%", 70.0, 100.01),
]
@dataclass(frozen=True)
class Bar:
date: date
high: float
low: float
def evaluate_setup_against_bars(
direction: str,
stop_loss: float,
target: float,
bars: list[Bar],
max_bars: int = DEFAULT_MAX_BARS,
) -> tuple[str | None, date | None]:
"""Determine a setup's outcome from daily bars strictly after detection.
Returns (outcome, outcome_date); (None, None) while still undecided.
"""
for i, bar in enumerate(bars):
if i >= max_bars:
break
if direction == "long":
stop_hit = bar.low <= stop_loss
target_hit = bar.high >= target
else:
stop_hit = bar.high >= stop_loss
target_hit = bar.low <= target
if stop_hit and target_hit:
return OUTCOME_AMBIGUOUS, bar.date
if stop_hit:
return OUTCOME_STOP_HIT, bar.date
if target_hit:
return OUTCOME_TARGET_HIT, bar.date
if len(bars) >= max_bars:
return OUTCOME_EXPIRED, bars[max_bars - 1].date
return None, None
async def evaluate_pending_setups(
db: AsyncSession,
max_bars: int = DEFAULT_MAX_BARS,
) -> dict[str, int]:
"""Evaluate all unevaluated trade setups against stored OHLCV data.
Bars are fetched once per ticker. Setups that cannot be decided yet
remain NULL and are picked up on the next run.
"""
result = await db.execute(
select(TradeSetup).where(TradeSetup.actual_outcome.is_(None))
)
pending = list(result.scalars().all())
summary = {"evaluated": 0, "still_pending": 0, "by_outcome": {}}
if not pending:
return summary
by_ticker: dict[int, list[TradeSetup]] = {}
for setup in pending:
by_ticker.setdefault(setup.ticker_id, []).append(setup)
now = datetime.now(timezone.utc)
for ticker_id, setups in by_ticker.items():
earliest = min(s.detected_at for s in setups).date()
bars_result = await db.execute(
select(OHLCVRecord)
.where(
OHLCVRecord.ticker_id == ticker_id,
OHLCVRecord.date > earliest,
)
.order_by(OHLCVRecord.date.asc())
)
records = list(bars_result.scalars().all())
all_bars = [Bar(date=r.date, high=r.high, low=r.low) for r in records]
for setup in setups:
detected_date = setup.detected_at.date()
bars = [b for b in all_bars if b.date > detected_date]
outcome, outcome_date = evaluate_setup_against_bars(
setup.direction, setup.stop_loss, setup.target, bars, max_bars
)
if outcome is None:
summary["still_pending"] += 1
continue
setup.actual_outcome = outcome
setup.outcome_date = outcome_date
setup.evaluated_at = now
summary["evaluated"] += 1
summary["by_outcome"][outcome] = summary["by_outcome"].get(outcome, 0) + 1
await db.commit()
return summary
def _realized_r(setup: TradeSetup) -> float | None:
"""Realized result in R-multiples: win = +rr_ratio, loss = -1R, expired = 0R."""
if setup.actual_outcome == OUTCOME_TARGET_HIT:
return setup.rr_ratio
if setup.actual_outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS):
return -1.0
if setup.actual_outcome == OUTCOME_EXPIRED:
return 0.0
return None
def _bucket_stats(setups: list[TradeSetup]) -> dict:
wins = sum(1 for s in setups if s.actual_outcome == OUTCOME_TARGET_HIT)
losses = sum(
1 for s in setups if s.actual_outcome in (OUTCOME_STOP_HIT, OUTCOME_AMBIGUOUS)
)
expired = sum(1 for s in setups if s.actual_outcome == OUTCOME_EXPIRED)
decided = wins + losses
realized = [r for s in setups if (r := _realized_r(s)) is not None]
return {
"total": len(setups),
"wins": wins,
"losses": losses,
"expired": expired,
"hit_rate": round(wins / decided * 100, 1) if decided else None,
"avg_r": round(sum(realized) / len(realized), 3) if realized else None,
"total_r": round(sum(realized), 2) if realized else None,
}
def _confidence_bucket(score: float | None) -> str | None:
if score is None:
return None
for label, lo, hi in _CONFIDENCE_BUCKETS:
if lo <= score < hi:
return label
return None
async def get_performance_stats(db: AsyncSession) -> dict:
"""Aggregate outcome statistics over all evaluated trade setups.
avg_r is the expectancy per trade in R-multiples (win = +rr_ratio,
loss = -1R, expired = 0R). A positive avg_r means the signals have
been profitable on a risk-adjusted basis.
"""
result = await db.execute(
select(TradeSetup).where(TradeSetup.actual_outcome.is_not(None))
)
evaluated = list(result.scalars().all())
pending_result = await db.execute(
select(TradeSetup.id).where(TradeSetup.actual_outcome.is_(None))
)
pending_count = len(pending_result.scalars().all())
by_direction: dict[str, list[TradeSetup]] = {}
by_action: dict[str, list[TradeSetup]] = {}
by_confidence: dict[str, list[TradeSetup]] = {}
for setup in evaluated:
by_direction.setdefault(setup.direction, []).append(setup)
action = setup.recommended_action or "NONE"
by_action.setdefault(action, []).append(setup)
bucket = _confidence_bucket(setup.confidence_score)
if bucket is not None:
by_confidence.setdefault(bucket, []).append(setup)
bucket_order = [label for label, _, _ in _CONFIDENCE_BUCKETS]
return {
"overall": _bucket_stats(evaluated),
"pending": pending_count,
"by_direction": {k: _bucket_stats(v) for k, v in sorted(by_direction.items())},
"by_action": {k: _bucket_stats(v) for k, v in sorted(by_action.items())},
"by_confidence": {
label: _bucket_stats(by_confidence[label])
for label in bucket_order
if label in by_confidence
},
}
+2
View File
@@ -351,4 +351,6 @@ def _trade_setup_to_dict(setup: TradeSetup, symbol: str) -> dict:
"reasoning": setup.reasoning,
"risk_level": setup.risk_level,
"actual_outcome": setup.actual_outcome,
"outcome_date": setup.outcome_date,
"evaluated_at": setup.evaluated_at,
}
+2
View File
@@ -7,6 +7,7 @@ import WatchlistPage from './pages/WatchlistPage';
import TickerDetailPage from './pages/TickerDetailPage';
import ScannerPage from './pages/ScannerPage';
import RankingsPage from './pages/RankingsPage';
import PerformancePage from './pages/PerformancePage';
import AdminPage from './pages/AdminPage';
export default function App() {
@@ -21,6 +22,7 @@ export default function App() {
<Route path="/ticker/:symbol" element={<TickerDetailPage />} />
<Route path="/scanner" element={<ScannerPage />} />
<Route path="/rankings" element={<RankingsPage />} />
<Route path="/performance" element={<PerformancePage />} />
<Route element={<ProtectedRoute requireAdmin />}>
<Route path="/admin" element={<AdminPage />} />
</Route>
+6
View File
@@ -0,0 +1,6 @@
import apiClient from './client';
import type { PerformanceStats } from '../lib/types';
export function getPerformance() {
return apiClient.get<PerformanceStats>('trades/performance').then((r) => r.data);
}
@@ -6,6 +6,7 @@ const navItems = [
{ to: '/watchlist', label: 'Watchlist' },
{ to: '/scanner', label: 'Scanner' },
{ to: '/rankings', label: 'Rankings' },
{ to: '/performance', label: 'Performance' },
];
export default function MobileNav() {
@@ -7,6 +7,7 @@ const navItems = [
{ to: '/watchlist', label: 'Watchlist', icon: '◈' },
{ to: '/scanner', label: 'Scanner', icon: '⬡' },
{ to: '/rankings', label: 'Rankings', icon: '△' },
{ to: '/performance', label: 'Performance', icon: '◎' },
];
export default function Sidebar() {
+9
View File
@@ -0,0 +1,9 @@
import { useQuery } from '@tanstack/react-query';
import { getPerformance } from '../api/performance';
export function usePerformance() {
return useQuery({
queryKey: ['performance'],
queryFn: getPerformance,
});
}
+21
View File
@@ -128,9 +128,30 @@ export interface TradeSetup {
reasoning: string | null;
risk_level: 'Low' | 'Medium' | 'High' | null;
actual_outcome: string | null;
outcome_date: string | null;
evaluated_at: string | null;
recommendation_summary?: RecommendationSummary;
}
// Performance / outcome statistics
export interface OutcomeBucketStats {
total: number;
wins: number;
losses: number;
expired: number;
hit_rate: number | null;
avg_r: number | null;
total_r: number | null;
}
export interface PerformanceStats {
overall: OutcomeBucketStats;
pending: number;
by_direction: Record<string, OutcomeBucketStats>;
by_action: Record<string, OutcomeBucketStats>;
by_confidence: Record<string, OutcomeBucketStats>;
}
export interface TradeTarget {
price: number;
distance_from_entry: number;
+195
View File
@@ -0,0 +1,195 @@
import { useMutation, useQueryClient } from '@tanstack/react-query';
import { usePerformance } from '../hooks/usePerformance';
import { triggerJob } from '../api/admin';
import { Button } from '../components/ui/Button';
import { Callout } from '../components/ui/Callout';
import { Disclosure } from '../components/ui/Disclosure';
import { PageHeader } from '../components/ui/PageHeader';
import { Section } from '../components/ui/Section';
import { SkeletonCard } from '../components/ui/Skeleton';
import { useToast } from '../components/ui/Toast';
import { RECOMMENDATION_ACTION_LABELS } from '../lib/recommendation';
import type { OutcomeBucketStats } from '../lib/types';
function fmtR(value: number | null): string {
if (value === null) return '—';
return `${value > 0 ? '+' : ''}${value.toFixed(2)}R`;
}
function fmtPct(value: number | null): string {
return value === null ? '—' : `${value.toFixed(1)}%`;
}
function rColor(value: number | null): string {
if (value === null) return 'text-gray-400';
if (value > 0) return 'text-emerald-400';
if (value < 0) return 'text-red-400';
return 'text-gray-300';
}
function StatCard({ label, value, valueClass = 'text-gray-100', sub }: {
label: string;
value: string;
valueClass?: string;
sub?: string;
}) {
return (
<div className="glass p-5">
<p className="text-xs uppercase tracking-widest text-gray-500">{label}</p>
<p className={`mt-2 text-2xl font-semibold ${valueClass}`}>{value}</p>
{sub && <p className="mt-1 text-xs text-gray-500">{sub}</p>}
</div>
);
}
function actionLabel(key: string): string {
return RECOMMENDATION_ACTION_LABELS[key as keyof typeof RECOMMENDATION_ACTION_LABELS] ?? key;
}
function BreakdownTable({ rows, labelHeader, mapLabel }: {
rows: Record<string, OutcomeBucketStats>;
labelHeader: string;
mapLabel?: (key: string) => string;
}) {
const entries = Object.entries(rows);
if (entries.length === 0) {
return <Callout variant="empty">No evaluated setups in this breakdown yet.</Callout>;
}
return (
<div className="glass overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="border-b border-white/[0.06] text-left text-xs uppercase tracking-wider text-gray-500">
<th className="px-4 py-3">{labelHeader}</th>
<th className="px-4 py-3 text-right">Setups</th>
<th className="px-4 py-3 text-right">Wins</th>
<th className="px-4 py-3 text-right">Losses</th>
<th className="px-4 py-3 text-right">Expired</th>
<th className="px-4 py-3 text-right">Hit Rate</th>
<th className="px-4 py-3 text-right">Avg R</th>
<th className="px-4 py-3 text-right">Total R</th>
</tr>
</thead>
<tbody>
{entries.map(([key, stats]) => (
<tr key={key} className="border-b border-white/[0.04] transition-colors duration-150 hover:bg-white/[0.03]">
<td className="px-4 py-3 font-medium text-gray-200">{mapLabel ? mapLabel(key) : key}</td>
<td className="px-4 py-3 text-right text-gray-300">{stats.total}</td>
<td className="px-4 py-3 text-right text-emerald-400">{stats.wins}</td>
<td className="px-4 py-3 text-right text-red-400">{stats.losses}</td>
<td className="px-4 py-3 text-right text-gray-400">{stats.expired}</td>
<td className="px-4 py-3 text-right text-gray-200">{fmtPct(stats.hit_rate)}</td>
<td className={`px-4 py-3 text-right font-mono ${rColor(stats.avg_r)}`}>{fmtR(stats.avg_r)}</td>
<td className={`px-4 py-3 text-right font-mono ${rColor(stats.total_r)}`}>{fmtR(stats.total_r)}</td>
</tr>
))}
</tbody>
</table>
</div>
);
}
export default function PerformancePage() {
const { data, isLoading, isError, error } = usePerformance();
const queryClient = useQueryClient();
const toast = useToast();
const evaluateMutation = useMutation({
mutationFn: () => triggerJob('outcome_evaluator'),
onSuccess: () => {
toast.addToast('success', 'Outcome evaluation triggered. Stats will refresh shortly.');
setTimeout(() => queryClient.invalidateQueries({ queryKey: ['performance'] }), 3000);
},
onError: () => {
toast.addToast('error', 'Failed to trigger outcome evaluation');
},
});
return (
<div className="space-y-6 animate-slide-up">
<PageHeader
title="Performance"
subtitle="Do the signals actually win? Outcomes of past trade setups"
actions={
<Button onClick={() => evaluateMutation.mutate()} loading={evaluateMutation.isPending}>
{evaluateMutation.isPending ? 'Evaluating…' : 'Evaluate Now'}
</Button>
}
/>
<Disclosure summary="How outcomes are measured">
<p className="text-xs text-gray-400">
Each setup is replayed against the daily bars after its detection: a{' '}
<span className="text-emerald-400">win</span> means the target was reached before the
stop, a <span className="text-red-400">loss</span> means the stop was hit first (bars
where both levels fall inside the same day count conservatively as losses). Setups with
neither level hit within 30 trading days <span className="text-gray-300">expire</span> at
0R. Avg R is the expectancy per trade: wins earn their R:R ratio, losses cost 1R a
positive value means the signals have been profitable on a risk-adjusted basis. The
evaluator runs nightly after OHLCV collection.
</p>
</Disclosure>
{isLoading && (
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
<SkeletonCard /><SkeletonCard /><SkeletonCard /><SkeletonCard />
</div>
)}
{isError && (
<Callout variant="error">
{error instanceof Error ? error.message : 'Failed to load performance stats'}
</Callout>
)}
{data && data.overall.total === 0 && (
<Callout variant="empty">
No evaluated setups yet. Outcomes appear once setups are old enough for their stop or
target to be hit the evaluator runs nightly, or click Evaluate Now.
{data.pending > 0 && ` ${data.pending} setup${data.pending === 1 ? '' : 's'} pending evaluation.`}
</Callout>
)}
{data && data.overall.total > 0 && (
<>
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
<StatCard
label="Hit Rate"
value={fmtPct(data.overall.hit_rate)}
sub={`${data.overall.wins} wins / ${data.overall.losses} losses`}
/>
<StatCard
label="Expectancy"
value={fmtR(data.overall.avg_r)}
valueClass={rColor(data.overall.avg_r)}
sub="average R per trade"
/>
<StatCard
label="Total R"
value={fmtR(data.overall.total_r)}
valueClass={rColor(data.overall.total_r)}
sub="cumulative risk-adjusted result"
/>
<StatCard
label="Evaluated"
value={String(data.overall.total)}
sub={`${data.pending} pending · ${data.overall.expired} expired`}
/>
</div>
<Section title="By Direction">
<BreakdownTable rows={data.by_direction} labelHeader="Direction" />
</Section>
<Section title="By Recommended Action">
<BreakdownTable rows={data.by_action} labelHeader="Action" mapLabel={actionLabel} />
</Section>
<Section title="By Confidence" hint="at detection time">
<BreakdownTable rows={data.by_confidence} labelHeader="Confidence" />
</Section>
</>
)}
</div>
);
}
+1 -1
View File
@@ -1 +1 @@
{"root":["./src/app.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/api/admin.ts","./src/api/auth.ts","./src/api/client.ts","./src/api/fundamentals.ts","./src/api/health.ts","./src/api/indicators.ts","./src/api/ingestion.ts","./src/api/ohlcv.ts","./src/api/scores.ts","./src/api/sentiment.ts","./src/api/sr-levels.ts","./src/api/tickers.ts","./src/api/trades.ts","./src/api/watchlist.ts","./src/components/admin/datacleanup.tsx","./src/components/admin/jobcontrols.tsx","./src/components/admin/pipelinereadinesspanel.tsx","./src/components/admin/recommendationsettings.tsx","./src/components/admin/settingsform.tsx","./src/components/admin/tickermanagement.tsx","./src/components/admin/tickeruniversebootstrap.tsx","./src/components/admin/usertable.tsx","./src/components/auth/protectedroute.tsx","./src/components/charts/candlestickchart.tsx","./src/components/layout/appshell.tsx","./src/components/layout/mobilenav.tsx","./src/components/layout/sidebar.tsx","./src/components/rankings/rankingstable.tsx","./src/components/rankings/weightsform.tsx","./src/components/scanner/tradetable.tsx","./src/components/ticker/dimensionbreakdownpanel.tsx","./src/components/ticker/fundamentalspanel.tsx","./src/components/ticker/indicatorselector.tsx","./src/components/ticker/recommendationpanel.tsx","./src/components/ticker/sroverlay.tsx","./src/components/ticker/sentimentpanel.tsx","./src/components/ui/badge.tsx","./src/components/ui/button.tsx","./src/components/ui/callout.tsx","./src/components/ui/confirmdialog.tsx","./src/components/ui/disclosure.tsx","./src/components/ui/field.tsx","./src/components/ui/pageheader.tsx","./src/components/ui/scorecard.tsx","./src/components/ui/section.tsx","./src/components/ui/skeleton.tsx","./src/components/ui/tabs.tsx","./src/components/ui/toast.tsx","./src/components/watchlist/addtickerform.tsx","./src/components/watchlist/watchlisttable.tsx","./src/hooks/useadmin.ts","./src/hooks/useauth.ts","./src/hooks/usefetchsymboldata.ts","./src/hooks/usescores.ts","./src/hooks/usetickerdetail.ts","./src/hooks/usetickers.ts","./src/hooks/usetrades.ts","./src/hooks/usewatchlist.ts","./src/lib/format.ts","./src/lib/ingestionstatus.ts","./src/lib/recommendation.ts","./src/lib/types.ts","./src/pages/adminpage.tsx","./src/pages/loginpage.tsx","./src/pages/rankingspage.tsx","./src/pages/registerpage.tsx","./src/pages/scannerpage.tsx","./src/pages/tickerdetailpage.tsx","./src/pages/watchlistpage.tsx","./src/stores/authstore.ts"],"version":"5.6.3"}
{"root":["./src/app.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/api/admin.ts","./src/api/auth.ts","./src/api/client.ts","./src/api/fundamentals.ts","./src/api/health.ts","./src/api/indicators.ts","./src/api/ingestion.ts","./src/api/ohlcv.ts","./src/api/performance.ts","./src/api/scores.ts","./src/api/sentiment.ts","./src/api/sr-levels.ts","./src/api/tickers.ts","./src/api/trades.ts","./src/api/watchlist.ts","./src/components/admin/datacleanup.tsx","./src/components/admin/jobcontrols.tsx","./src/components/admin/pipelinereadinesspanel.tsx","./src/components/admin/recommendationsettings.tsx","./src/components/admin/settingsform.tsx","./src/components/admin/tickermanagement.tsx","./src/components/admin/tickeruniversebootstrap.tsx","./src/components/admin/usertable.tsx","./src/components/auth/protectedroute.tsx","./src/components/charts/candlestickchart.tsx","./src/components/layout/appshell.tsx","./src/components/layout/mobilenav.tsx","./src/components/layout/sidebar.tsx","./src/components/rankings/rankingstable.tsx","./src/components/rankings/weightsform.tsx","./src/components/scanner/tradetable.tsx","./src/components/ticker/dimensionbreakdownpanel.tsx","./src/components/ticker/fundamentalspanel.tsx","./src/components/ticker/indicatorselector.tsx","./src/components/ticker/recommendationpanel.tsx","./src/components/ticker/sroverlay.tsx","./src/components/ticker/sentimentpanel.tsx","./src/components/ui/badge.tsx","./src/components/ui/button.tsx","./src/components/ui/callout.tsx","./src/components/ui/confirmdialog.tsx","./src/components/ui/disclosure.tsx","./src/components/ui/field.tsx","./src/components/ui/pageheader.tsx","./src/components/ui/scorecard.tsx","./src/components/ui/section.tsx","./src/components/ui/skeleton.tsx","./src/components/ui/tabs.tsx","./src/components/ui/toast.tsx","./src/components/watchlist/addtickerform.tsx","./src/components/watchlist/watchlisttable.tsx","./src/hooks/useadmin.ts","./src/hooks/useauth.ts","./src/hooks/usefetchsymboldata.ts","./src/hooks/useperformance.ts","./src/hooks/usescores.ts","./src/hooks/usetickerdetail.ts","./src/hooks/usetickers.ts","./src/hooks/usetrades.ts","./src/hooks/usewatchlist.ts","./src/lib/format.ts","./src/lib/ingestionstatus.ts","./src/lib/recommendation.ts","./src/lib/types.ts","./src/pages/adminpage.tsx","./src/pages/loginpage.tsx","./src/pages/performancepage.tsx","./src/pages/rankingspage.tsx","./src/pages/registerpage.tsx","./src/pages/scannerpage.tsx","./src/pages/tickerdetailpage.tsx","./src/pages/watchlistpage.tsx","./src/stores/authstore.ts"],"version":"5.6.3"}
+272
View File
@@ -0,0 +1,272 @@
"""Unit tests for the trade setup outcome evaluation service."""
from __future__ import annotations
from datetime import date, datetime, timedelta, timezone
import pytest
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.ohlcv import OHLCVRecord
from app.models.ticker import Ticker
from app.models.trade_setup import TradeSetup
from app.services.outcome_service import (
OUTCOME_AMBIGUOUS,
OUTCOME_EXPIRED,
OUTCOME_STOP_HIT,
OUTCOME_TARGET_HIT,
Bar,
evaluate_pending_setups,
evaluate_setup_against_bars,
get_performance_stats,
)
@pytest.fixture
async def outcome_session() -> AsyncSession:
"""DB session compatible with evaluate_pending_setups (which commits)."""
from tests.conftest import _test_session_factory
async with _test_session_factory() as session:
yield session
def _bars(*hl: tuple[float, float], start: date = date(2026, 1, 5)) -> list[Bar]:
return [
Bar(date=start + timedelta(days=i), high=high, low=low)
for i, (high, low) in enumerate(hl)
]
# ---------------------------------------------------------------------------
# evaluate_setup_against_bars — pure logic
# ---------------------------------------------------------------------------
class TestEvaluateSetupAgainstBars:
def test_long_target_hit(self):
# entry ~100, stop 95, target 110
bars = _bars((105, 99), (111, 104))
outcome, outcome_date = evaluate_setup_against_bars("long", 95, 110, bars)
assert outcome == OUTCOME_TARGET_HIT
assert outcome_date == bars[1].date
def test_long_stop_hit(self):
bars = _bars((105, 99), (103, 94))
outcome, outcome_date = evaluate_setup_against_bars("long", 95, 110, bars)
assert outcome == OUTCOME_STOP_HIT
assert outcome_date == bars[1].date
def test_long_stop_before_target_across_bars(self):
# Stop hit on bar 0, target would hit on bar 1 — stop wins (first bar decides)
bars = _bars((100, 94), (112, 100))
outcome, _ = evaluate_setup_against_bars("long", 95, 110, bars)
assert outcome == OUTCOME_STOP_HIT
def test_short_target_hit(self):
# short: entry ~100, stop 105, target 90
bars = _bars((102, 96), (98, 89))
outcome, outcome_date = evaluate_setup_against_bars("short", 105, 90, bars)
assert outcome == OUTCOME_TARGET_HIT
assert outcome_date == bars[1].date
def test_short_stop_hit(self):
bars = _bars((102, 96), (106, 98))
outcome, _ = evaluate_setup_against_bars("short", 105, 90, bars)
assert outcome == OUTCOME_STOP_HIT
def test_ambiguous_when_both_levels_in_same_bar(self):
# one giant bar spans both stop (95) and target (110)
bars = _bars((112, 94))
outcome, _ = evaluate_setup_against_bars("long", 95, 110, bars)
assert outcome == OUTCOME_AMBIGUOUS
def test_pending_when_not_enough_bars(self):
bars = _bars((105, 99), (104, 98))
outcome, outcome_date = evaluate_setup_against_bars("long", 95, 110, bars, max_bars=30)
assert outcome is None
assert outcome_date is None
def test_expired_after_max_bars(self):
bars = _bars(*[(105, 99)] * 10)
outcome, outcome_date = evaluate_setup_against_bars("long", 95, 110, bars, max_bars=10)
assert outcome == OUTCOME_EXPIRED
assert outcome_date == bars[9].date
def test_hit_beyond_max_bars_is_ignored(self):
# target hit on bar 11 but window is 10 — expired
bars = _bars(*[(105, 99)] * 10, (115, 105))
outcome, _ = evaluate_setup_against_bars("long", 95, 110, bars, max_bars=10)
assert outcome == OUTCOME_EXPIRED
def test_no_bars_is_pending(self):
outcome, _ = evaluate_setup_against_bars("long", 95, 110, [])
assert outcome is None
# ---------------------------------------------------------------------------
# evaluate_pending_setups — DB integration
# ---------------------------------------------------------------------------
async def _make_ticker(db: AsyncSession, symbol: str = "AAPL") -> Ticker:
ticker = Ticker(symbol=symbol)
db.add(ticker)
await db.flush()
return ticker
def _make_setup(
ticker: Ticker,
direction: str = "long",
entry: float = 100.0,
stop: float = 95.0,
target: float = 110.0,
rr: float = 2.0,
detected: datetime | None = None,
**kwargs,
) -> TradeSetup:
return TradeSetup(
ticker_id=ticker.id,
direction=direction,
entry_price=entry,
stop_loss=stop,
target=target,
rr_ratio=rr,
composite_score=50.0,
detected_at=detected or datetime(2026, 1, 2, 21, 0, tzinfo=timezone.utc),
**kwargs,
)
def _add_bars(db: AsyncSession, ticker: Ticker, *hl: tuple[float, float], start: date = date(2026, 1, 5)):
for i, (high, low) in enumerate(hl):
db.add(OHLCVRecord(
ticker_id=ticker.id,
date=start + timedelta(days=i),
open=(high + low) / 2,
high=high,
low=low,
close=(high + low) / 2,
volume=1_000_000,
))
class TestEvaluatePendingSetups:
async def test_writes_outcome_and_metadata(self, outcome_session: AsyncSession):
ticker = await _make_ticker(outcome_session)
setup = _make_setup(ticker)
outcome_session.add(setup)
_add_bars(outcome_session, ticker, (105, 99), (111, 104))
await outcome_session.flush()
summary = await evaluate_pending_setups(outcome_session)
assert summary["evaluated"] == 1
assert summary["by_outcome"] == {OUTCOME_TARGET_HIT: 1}
result = await outcome_session.execute(select(TradeSetup))
stored = result.scalar_one()
assert stored.actual_outcome == OUTCOME_TARGET_HIT
assert stored.outcome_date == date(2026, 1, 6)
assert stored.evaluated_at is not None
async def test_undecided_setup_stays_pending(self, outcome_session: AsyncSession):
ticker = await _make_ticker(outcome_session)
outcome_session.add(_make_setup(ticker))
_add_bars(outcome_session, ticker, (105, 99)) # no level hit, < max_bars
await outcome_session.flush()
summary = await evaluate_pending_setups(outcome_session)
assert summary["evaluated"] == 0
assert summary["still_pending"] == 1
result = await outcome_session.execute(select(TradeSetup))
assert result.scalar_one().actual_outcome is None
async def test_only_bars_after_detection_are_used(self, outcome_session: AsyncSession):
ticker = await _make_ticker(outcome_session)
# bar on the detection date itself would hit the stop — must be ignored
_add_bars(outcome_session, ticker, (100, 90), start=date(2026, 1, 2))
_add_bars(outcome_session, ticker, (111, 104), start=date(2026, 1, 5))
outcome_session.add(_make_setup(ticker))
await outcome_session.flush()
await evaluate_pending_setups(outcome_session)
result = await outcome_session.execute(select(TradeSetup))
assert result.scalar_one().actual_outcome == OUTCOME_TARGET_HIT
async def test_already_evaluated_setups_are_skipped(self, outcome_session: AsyncSession):
ticker = await _make_ticker(outcome_session)
outcome_session.add(_make_setup(ticker, actual_outcome=OUTCOME_STOP_HIT))
await outcome_session.flush()
summary = await evaluate_pending_setups(outcome_session)
assert summary["evaluated"] == 0
assert summary["still_pending"] == 0
# ---------------------------------------------------------------------------
# get_performance_stats
# ---------------------------------------------------------------------------
class TestGetPerformanceStats:
async def test_empty_database(self, db_session: AsyncSession):
stats = await get_performance_stats(db_session)
assert stats["overall"]["total"] == 0
assert stats["overall"]["hit_rate"] is None
assert stats["pending"] == 0
async def test_aggregation(self, db_session: AsyncSession):
ticker = await _make_ticker(db_session)
db_session.add(_make_setup(
ticker, direction="long", rr=3.0,
actual_outcome=OUTCOME_TARGET_HIT, confidence_score=80.0,
recommended_action="LONG_HIGH",
))
db_session.add(_make_setup(
ticker, direction="long", rr=2.0,
actual_outcome=OUTCOME_STOP_HIT, confidence_score=55.0,
recommended_action="LONG_MODERATE",
))
db_session.add(_make_setup(
ticker, direction="short", rr=2.5,
actual_outcome=OUTCOME_EXPIRED, confidence_score=40.0,
recommended_action="NEUTRAL",
))
db_session.add(_make_setup(ticker, direction="short")) # pending
await db_session.flush()
stats = await get_performance_stats(db_session)
overall = stats["overall"]
assert overall["total"] == 3
assert overall["wins"] == 1
assert overall["losses"] == 1
assert overall["expired"] == 1
assert overall["hit_rate"] == 50.0
# realized: +3.0 (win), -1.0 (loss), 0.0 (expired) → avg 0.667
assert overall["avg_r"] == pytest.approx(0.667, abs=0.001)
assert overall["total_r"] == pytest.approx(2.0)
assert stats["pending"] == 1
assert stats["by_direction"]["long"]["total"] == 2
assert stats["by_direction"]["short"]["total"] == 1
assert stats["by_action"]["LONG_HIGH"]["wins"] == 1
assert stats["by_confidence"]["≥70%"]["wins"] == 1
assert stats["by_confidence"]["50-70%"]["losses"] == 1
assert stats["by_confidence"]["<50%"]["expired"] == 1
async def test_ambiguous_counts_as_loss(self, db_session: AsyncSession):
ticker = await _make_ticker(db_session)
db_session.add(_make_setup(ticker, actual_outcome=OUTCOME_AMBIGUOUS))
await db_session.flush()
stats = await get_performance_stats(db_session)
assert stats["overall"]["losses"] == 1
assert stats["overall"]["hit_rate"] == 0.0
assert stats["overall"]["avg_r"] == -1.0
+3 -1
View File
@@ -68,7 +68,7 @@ class TestResumeTickers:
class TestConfigureScheduler:
def test_configure_adds_five_jobs(self):
def test_configure_adds_six_jobs(self):
# Remove any existing jobs first
scheduler.remove_all_jobs()
configure_scheduler()
@@ -80,6 +80,7 @@ class TestConfigureScheduler:
"fundamental_collector",
"rr_scanner",
"ticker_universe_sync",
"outcome_evaluator",
}
def test_configure_is_idempotent(self):
@@ -91,6 +92,7 @@ class TestConfigureScheduler:
assert sorted(job_ids) == sorted([
"data_collector",
"fundamental_collector",
"outcome_evaluator",
"rr_scanner",
"sentiment_collector",
"ticker_universe_sync",