add backtest harness (Phase 1): historical replay + hit-rate & calibration reports
Replays the price-derived engine over stored OHLCV: at each weekly as-of date, rebuild the setup from bars <= D (no lookahead) and walk the actual forward bars for the realized outcome. Reports realized hit-rate/expectancy of qualified setups (and all setups, by direction) plus a probability calibration curve (predicted target prob vs realized hit rate). Reuses pure functions throughout; extracted compute_technical_from_arrays / compute_momentum_from_closes from scoring_service so live and backtest stay in sync. Runs as a weekly/triggerable 'backtest' job caching the report in a SystemSetting; GET /backtest/report serves it. Sentiment/fundamentals held neutral (no point-in-time history) — calibrates the price/S-R/probability machinery. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,118 @@
|
||||
"""Tests for the historical backtest harness."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from datetime import date, timedelta
|
||||
|
||||
import pytest
|
||||
|
||||
from app.models.ohlcv import OHLCVRecord
|
||||
from app.models.ticker import Ticker
|
||||
from app.services import backtest_service as bt
|
||||
from app.services.outcome_service import (
|
||||
OUTCOME_EXPIRED,
|
||||
OUTCOME_STOP_HIT,
|
||||
OUTCOME_TARGET_HIT,
|
||||
)
|
||||
from tests.conftest import _test_session_factory # type: ignore
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def session():
|
||||
async with _test_session_factory() as s:
|
||||
yield s
|
||||
|
||||
|
||||
def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, direction: str = "long") -> dict:
|
||||
target_hit = outcome == OUTCOME_TARGET_HIT
|
||||
realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
|
||||
return {
|
||||
"primary_prob": prob,
|
||||
"outcome": outcome,
|
||||
"target_hit": target_hit,
|
||||
"rr": rr,
|
||||
"realized_r": realized,
|
||||
"qualified": qualified,
|
||||
"direction": direction,
|
||||
}
|
||||
|
||||
|
||||
def test_bucket_stats_counts_and_expectancy():
|
||||
cands = [
|
||||
_cand(70, OUTCOME_TARGET_HIT, 3.0), # +3R win
|
||||
_cand(60, OUTCOME_TARGET_HIT, 2.0), # +2R win
|
||||
_cand(40, OUTCOME_STOP_HIT, 3.0), # -1R loss
|
||||
_cand(30, OUTCOME_EXPIRED, 3.0), # 0R expired
|
||||
]
|
||||
s = bt._bucket_stats(cands)
|
||||
assert s["total"] == 4
|
||||
assert s["wins"] == 2
|
||||
assert s["losses"] == 1
|
||||
assert s["expired"] == 1
|
||||
# hit rate is over decided (wins+losses) only
|
||||
assert s["hit_rate"] == round(2 / 3 * 100, 1)
|
||||
# avg R = (3 + 2 - 1 + 0) / 4 = 1.0
|
||||
assert s["avg_r"] == 1.0
|
||||
assert s["total_r"] == 4.0
|
||||
|
||||
|
||||
def test_bucket_stats_empty():
|
||||
s = bt._bucket_stats([])
|
||||
assert s["total"] == 0
|
||||
assert s["hit_rate"] is None
|
||||
assert s["avg_r"] is None
|
||||
|
||||
|
||||
def test_calibration_buckets():
|
||||
cands = [
|
||||
_cand(65, OUTCOME_TARGET_HIT, 2.0),
|
||||
_cand(62, OUTCOME_STOP_HIT, 2.0),
|
||||
_cand(15, OUTCOME_STOP_HIT, 2.0),
|
||||
]
|
||||
rows = bt._calibration(cands)
|
||||
by_bucket = {r["bucket"]: r for r in rows}
|
||||
assert by_bucket["60-80%"]["n"] == 2
|
||||
assert by_bucket["60-80%"]["realized_hit_rate"] == 50.0 # 1 of 2 hit
|
||||
assert by_bucket["0-20%"]["n"] == 1
|
||||
assert by_bucket["0-20%"]["realized_hit_rate"] == 0.0
|
||||
|
||||
|
||||
def test_window_setups_too_short_returns_empty():
|
||||
assert bt._window_setups([], {}, {}) == []
|
||||
|
||||
|
||||
async def _seed_oscillating_ticker(session, symbol: str, n: int = 160) -> None:
|
||||
t = Ticker(symbol=symbol)
|
||||
session.add(t)
|
||||
await session.flush()
|
||||
base = date(2025, 1, 1)
|
||||
for i in range(n):
|
||||
close = 100.0 + 8.0 * math.sin(i / 6.0)
|
||||
session.add(OHLCVRecord(
|
||||
ticker_id=t.id,
|
||||
date=base + timedelta(days=i),
|
||||
open=close,
|
||||
high=close + 1.5,
|
||||
low=close - 1.5,
|
||||
close=close,
|
||||
volume=1_000_000 + (i % 5) * 1000,
|
||||
))
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def test_run_backtest_smoke(session):
|
||||
await _seed_oscillating_ticker(session, "OSC")
|
||||
report = await bt.run_backtest(session)
|
||||
|
||||
# well-formed report
|
||||
assert report["tickers"] == 1
|
||||
assert isinstance(report["candidates"], int)
|
||||
for key in ("overall_qualified", "overall_all", "by_direction", "calibration"):
|
||||
assert key in report
|
||||
# the oscillating series should yield at least some resolved setups
|
||||
assert report["candidates"] >= 1
|
||||
# every calibration row is internally consistent
|
||||
for row in report["calibration"]:
|
||||
assert 0 <= row["realized_hit_rate"] <= 100
|
||||
assert row["n"] >= 1
|
||||
@@ -83,6 +83,7 @@ class TestConfigureScheduler:
|
||||
"outcome_evaluator",
|
||||
"alerts",
|
||||
"market_regime",
|
||||
"backtest",
|
||||
}
|
||||
|
||||
def test_configure_is_idempotent(self):
|
||||
@@ -93,6 +94,7 @@ class TestConfigureScheduler:
|
||||
# Each ID should appear exactly once
|
||||
assert sorted(job_ids) == sorted([
|
||||
"alerts",
|
||||
"backtest",
|
||||
"data_collector",
|
||||
"fundamental_collector",
|
||||
"market_regime",
|
||||
|
||||
Reference in New Issue
Block a user