c63951ca02
The target-vs-stop model counts a near-miss of a far S/R target as a full loss and ignores the partial gains you actually bank — so it measures a different strategy than "scalp the early pop, take +8%". Add a realistic take-profit exit model next to it (original untouched). Per setup the replay now also records risk%, whether the stop was hit, the favourable excursion reachable before the stop (MFE), and the horizon-close move. From those a fixed-take-profit sweep (4/6/8/10/12/15%) is scored in R: bank +X% if reached before the stop, else -1R, else the horizon close. Hit rate = how often +X% was banked (the MFE CDF), so you can pick the EV-optimal TP without top-ticking fantasy. Shown as a new table in the Backtest panel; the IC, calibration and momentum sweep are unchanged. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
181 lines
6.2 KiB
Python
181 lines
6.2 KiB
Python
"""Tests for the historical backtest harness."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
from datetime import date, timedelta
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
from app.models.ohlcv import OHLCVRecord
|
|
from app.models.ticker import Ticker
|
|
from app.services import backtest_service as bt
|
|
from app.services.outcome_service import (
|
|
OUTCOME_EXPIRED,
|
|
OUTCOME_STOP_HIT,
|
|
OUTCOME_TARGET_HIT,
|
|
)
|
|
from tests.conftest import _test_session_factory # type: ignore
|
|
|
|
|
|
@pytest.fixture
|
|
async def session():
|
|
async with _test_session_factory() as s:
|
|
yield s
|
|
|
|
|
|
def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, direction: str = "long") -> dict:
|
|
target_hit = outcome == OUTCOME_TARGET_HIT
|
|
realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
|
|
return {
|
|
"primary_prob": prob,
|
|
"outcome": outcome,
|
|
"target_hit": target_hit,
|
|
"rr": rr,
|
|
"realized_r": realized,
|
|
"qualified": qualified,
|
|
"direction": direction,
|
|
}
|
|
|
|
|
|
def _bar(high: float, low: float, close: float) -> SimpleNamespace:
|
|
return SimpleNamespace(high=high, low=low, close=close)
|
|
|
|
|
|
class TestTakeProfitPrimitives:
|
|
def test_long_tp_reachable_before_stop(self):
|
|
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
|
|
assert risk == pytest.approx(0.05)
|
|
assert stopped is False
|
|
assert mfe == pytest.approx(0.09)
|
|
assert close_pct == pytest.approx(0.08)
|
|
|
|
def test_long_stop_zeroes_mfe(self):
|
|
# Low pierces the stop on the only bar → loss, nothing banked before it.
|
|
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
|
|
assert stopped is True
|
|
assert mfe == pytest.approx(0.0)
|
|
assert close_pct == pytest.approx(-0.04)
|
|
|
|
def test_long_drift_no_trigger(self):
|
|
bars = [_bar(102, 99, 101), _bar(103, 100, 102)]
|
|
risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
|
|
assert stopped is False
|
|
assert mfe == pytest.approx(0.03)
|
|
assert close_pct == pytest.approx(0.02)
|
|
|
|
def test_short_direction(self):
|
|
# short entry 100, stop 105; price falls → favourable = (entry - low)/entry
|
|
risk, stopped, mfe, close_pct = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
|
|
assert risk == pytest.approx(0.05)
|
|
assert stopped is False
|
|
assert mfe == pytest.approx(0.08)
|
|
assert close_pct == pytest.approx(0.07)
|
|
|
|
|
|
class TestTakeProfitBucket:
|
|
def test_bucket_mix(self):
|
|
cands = [
|
|
{"risk_pct": 0.05, "mfe_pct": 0.09, "tp_stopped": False, "tp_close_pct": 0.08}, # +1.6R win
|
|
{"risk_pct": 0.05, "mfe_pct": 0.02, "tp_stopped": True, "tp_close_pct": -0.04}, # -1R stop
|
|
{"risk_pct": 0.05, "mfe_pct": 0.03, "tp_stopped": False, "tp_close_pct": 0.01}, # +0.2R timeout
|
|
]
|
|
b = bt._take_profit_bucket(cands, 0.08)
|
|
assert b["total"] == 3
|
|
assert b["wins"] == 1
|
|
assert b["hit_rate"] == pytest.approx(33.3, abs=0.1)
|
|
assert b["total_r"] == pytest.approx(0.8, abs=0.01)
|
|
assert b["avg_r"] == pytest.approx(0.267, abs=0.01)
|
|
|
|
def test_zero_risk_skipped(self):
|
|
cands = [{"risk_pct": 0.0, "mfe_pct": 0.2, "tp_stopped": False, "tp_close_pct": 0.1}]
|
|
b = bt._take_profit_bucket(cands, 0.08)
|
|
assert b["total"] == 0
|
|
assert b["avg_r"] is None
|
|
|
|
|
|
def test_bucket_stats_counts_and_expectancy():
|
|
cands = [
|
|
_cand(70, OUTCOME_TARGET_HIT, 3.0), # +3R win
|
|
_cand(60, OUTCOME_TARGET_HIT, 2.0), # +2R win
|
|
_cand(40, OUTCOME_STOP_HIT, 3.0), # -1R loss
|
|
_cand(30, OUTCOME_EXPIRED, 3.0), # 0R expired
|
|
]
|
|
s = bt._bucket_stats(cands)
|
|
assert s["total"] == 4
|
|
assert s["wins"] == 2
|
|
assert s["losses"] == 1
|
|
assert s["expired"] == 1
|
|
# hit rate is over decided (wins+losses) only
|
|
assert s["hit_rate"] == round(2 / 3 * 100, 1)
|
|
# avg R = (3 + 2 - 1 + 0) / 4 = 1.0
|
|
assert s["avg_r"] == 1.0
|
|
assert s["total_r"] == 4.0
|
|
|
|
|
|
def test_bucket_stats_empty():
|
|
s = bt._bucket_stats([])
|
|
assert s["total"] == 0
|
|
assert s["hit_rate"] is None
|
|
assert s["avg_r"] is None
|
|
|
|
|
|
def test_calibration_buckets():
|
|
cands = [
|
|
_cand(65, OUTCOME_TARGET_HIT, 2.0),
|
|
_cand(62, OUTCOME_STOP_HIT, 2.0),
|
|
_cand(15, OUTCOME_STOP_HIT, 2.0),
|
|
]
|
|
rows = bt._calibration(cands)
|
|
by_bucket = {r["bucket"]: r for r in rows}
|
|
assert by_bucket["60-80%"]["n"] == 2
|
|
assert by_bucket["60-80%"]["realized_hit_rate"] == 50.0 # 1 of 2 hit
|
|
assert by_bucket["0-20%"]["n"] == 1
|
|
assert by_bucket["0-20%"]["realized_hit_rate"] == 0.0
|
|
|
|
|
|
def test_window_setups_too_short_returns_empty():
|
|
assert bt._window_setups([], {}, {}) == []
|
|
|
|
|
|
async def _seed_oscillating_ticker(session, symbol: str, n: int = 160) -> None:
|
|
t = Ticker(symbol=symbol)
|
|
session.add(t)
|
|
await session.flush()
|
|
base = date(2025, 1, 1)
|
|
for i in range(n):
|
|
close = 100.0 + 8.0 * math.sin(i / 6.0)
|
|
session.add(OHLCVRecord(
|
|
ticker_id=t.id,
|
|
date=base + timedelta(days=i),
|
|
open=close,
|
|
high=close + 1.5,
|
|
low=close - 1.5,
|
|
close=close,
|
|
volume=1_000_000 + (i % 5) * 1000,
|
|
))
|
|
await session.commit()
|
|
|
|
|
|
async def test_run_backtest_smoke(session):
|
|
await _seed_oscillating_ticker(session, "OSC")
|
|
report = await bt.run_backtest(session)
|
|
|
|
# well-formed report
|
|
assert report["tickers"] == 1
|
|
assert isinstance(report["candidates"], int)
|
|
for key in ("overall_qualified", "overall_all", "by_direction", "calibration", "sweep"):
|
|
assert key in report
|
|
# the oscillating series should yield at least some resolved setups
|
|
assert report["candidates"] >= 1
|
|
|
|
# sweep: lowering the momentum-percentile cutoff can only add qualifiers
|
|
sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
|
|
counts = [r["total"] for r in sweep]
|
|
assert counts == sorted(counts) # ascending as threshold descends
|
|
# every calibration row is internally consistent
|
|
for row in report["calibration"]:
|
|
assert 0 <= row["realized_hit_rate"] <= 100
|
|
assert row["n"] >= 1
|