641 lines
26 KiB
Python
641 lines
26 KiB
Python
"""Tests for the historical backtest harness."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import math
|
||
from datetime import date, timedelta
|
||
from types import SimpleNamespace
|
||
|
||
import pytest
|
||
|
||
from app.models.ohlcv import OHLCVRecord
|
||
from app.models.ticker import Ticker
|
||
from app.services import backtest_service as bt
|
||
from app.services.outcome_service import (
|
||
OUTCOME_EXPIRED,
|
||
OUTCOME_STOP_HIT,
|
||
OUTCOME_TARGET_HIT,
|
||
)
|
||
from tests.conftest import _test_session_factory # type: ignore
|
||
|
||
|
||
@pytest.fixture
|
||
async def session():
|
||
async with _test_session_factory() as s:
|
||
yield s
|
||
|
||
|
||
def _cand(
|
||
prob: float,
|
||
outcome: str,
|
||
rr: float,
|
||
qualified: bool = True,
|
||
direction: str = "long",
|
||
risk_pct: float = 0.05,
|
||
hold_days: int = 10,
|
||
) -> dict:
|
||
target_hit = outcome == OUTCOME_TARGET_HIT
|
||
realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
|
||
return {
|
||
"primary_prob": prob,
|
||
"outcome": outcome,
|
||
"target_hit": target_hit,
|
||
"rr": rr,
|
||
"realized_r": realized,
|
||
"qualified": qualified,
|
||
"direction": direction,
|
||
"risk_pct": risk_pct,
|
||
"hold_days": hold_days,
|
||
}
|
||
|
||
|
||
# Round-trip cost in R for the default _cand risk_pct: 2 * 0.001 / 0.05 = 0.04R.
|
||
_COST_R_005 = 2 * bt.COST_PER_SIDE / 0.05
|
||
|
||
|
||
def _bar(high: float, low: float, close: float, open_: float | None = None) -> SimpleNamespace:
|
||
"""Synthetic daily bar. ``open`` defaults to the high so a stop is pierced
|
||
intraday (fill at the stop level); pass an explicit open beyond the stop to
|
||
model a gap through it."""
|
||
return SimpleNamespace(
|
||
high=high, low=low, close=close, open=open_ if open_ is not None else high
|
||
)
|
||
|
||
|
||
def _signal_test_series(extra_return: float = 0.0) -> tuple[list[date], list[float], list[float], dict[date, float]]:
|
||
base = date(2024, 1, 1)
|
||
dates = [base + timedelta(days=i) for i in range(280)]
|
||
benchmark = [100.0]
|
||
closes = [100.0]
|
||
for i in range(1, len(dates)):
|
||
market_ret = 0.0004 + 0.002 * math.sin(i / 9.0)
|
||
benchmark.append(benchmark[-1] * (1.0 + market_ret))
|
||
# Same market beta for both test stocks; only ``extra_return`` is
|
||
# idiosyncratic drift, which residual momentum should keep.
|
||
stock_ret = 1.4 * market_ret + extra_return
|
||
closes.append(closes[-1] * (1.0 + stock_ret))
|
||
highs = [c * 1.01 for c in closes]
|
||
benchmark_closes = dict(zip(dates, benchmark))
|
||
return dates, closes, highs, benchmark_closes
|
||
|
||
|
||
def test_signal_values_emit_residual_momentum_only_with_benchmark():
|
||
dates, closes, highs, benchmark = _signal_test_series(extra_return=0.0008)
|
||
no_benchmark = bt._signal_values(dates, closes, highs, 260)
|
||
with_benchmark = bt._signal_values(dates, closes, highs, 260, benchmark)
|
||
|
||
assert "mom_12_1" in no_benchmark
|
||
assert "mom_12_1_resid" not in no_benchmark
|
||
assert "mom_12_1_resid" in with_benchmark
|
||
|
||
|
||
def test_residual_momentum_removes_market_beta_but_keeps_specific_drift():
|
||
dates, pure_beta, highs, benchmark = _signal_test_series(extra_return=0.0)
|
||
_, drift_stock, drift_highs, _ = _signal_test_series(extra_return=0.0008)
|
||
|
||
pure = bt._signal_values(dates, pure_beta, highs, 260, benchmark)
|
||
drift = bt._signal_values(dates, drift_stock, drift_highs, 260, benchmark)
|
||
|
||
assert pure["mom_12_1_resid"] == pytest.approx(0.0, abs=0.03)
|
||
assert drift["mom_12_1_resid"] > pure["mom_12_1_resid"] + 0.12
|
||
|
||
|
||
def test_assigns_raw_and_residual_percentiles_independently():
|
||
cands = [
|
||
{"iso_week": (2026, 1), "momentum": 0.10, "residual_momentum": 0.30},
|
||
{"iso_week": (2026, 1), "momentum": 0.30, "residual_momentum": 0.10},
|
||
{"iso_week": (2026, 1), "momentum": 0.20, "residual_momentum": 0.20},
|
||
]
|
||
|
||
bt._assign_momentum_percentiles(cands)
|
||
bt._assign_residual_momentum_percentiles(cands)
|
||
|
||
by_raw = {c["momentum"]: c["momentum_percentile"] for c in cands}
|
||
by_resid = {c["residual_momentum"]: c["residual_momentum_percentile"] for c in cands}
|
||
assert by_raw[0.30] == 100.0
|
||
assert by_raw[0.10] == 0.0
|
||
assert by_resid[0.30] == 100.0
|
||
assert by_resid[0.10] == 0.0
|
||
|
||
|
||
def test_activation_percentile_prefers_residual_with_raw_fallback():
|
||
cands = [
|
||
{"momentum_percentile": 80.0, "residual_momentum_percentile": 95.0},
|
||
{"momentum_percentile": 70.0, "residual_momentum_percentile": None},
|
||
]
|
||
|
||
bt._assign_activation_momentum_percentiles(cands)
|
||
|
||
assert cands[0][bt.PRODUCTION_PERCENTILE_KEY] == 95.0
|
||
assert cands[1][bt.PRODUCTION_PERCENTILE_KEY] == 70.0
|
||
|
||
|
||
def test_strategy_variants_keep_only_current_research_candidates():
|
||
variants = {cfg["variant"]: cfg for cfg in bt.STRATEGY_VARIANTS}
|
||
|
||
assert "production_raw_80_fixed10" not in variants
|
||
assert "raw_80_regime_scaled" not in variants
|
||
assert "residual_80_regime_scaled" not in variants
|
||
assert "residual_90_fixed10" not in variants
|
||
assert "raw_90_fixed15" not in variants
|
||
assert "residual_80_fixed20" not in variants
|
||
assert variants["production_residual_80_fixed10"]["percentile_key"] == bt.PRODUCTION_PERCENTILE_KEY
|
||
assert variants["legacy_raw_80_fixed10"]["percentile_key"] == bt.RAW_PERCENTILE_KEY
|
||
assert variants["residual_80_fixed15"]["max_positions"] == 15
|
||
assert all(cfg["risk_scale"] is None for cfg in bt.STRATEGY_VARIANTS)
|
||
|
||
|
||
def test_strategy_variant_sims_emit_fixed_variants_without_mutating_qualified(monkeypatch):
|
||
cands = [{
|
||
"qualified": False,
|
||
"meets_core": True,
|
||
"direction": "long",
|
||
"momentum_percentile": 90.0,
|
||
"residual_momentum_percentile": 91.0,
|
||
"activation_momentum_percentile": 91.0,
|
||
}]
|
||
calls = []
|
||
|
||
def fake_sim(candidates, prices, spy_closes, exit_policy, hold_days, **kwargs):
|
||
calls.append({"exit_policy": exit_policy, "hold_days": hold_days, **kwargs})
|
||
return {
|
||
"starting_capital": bt.SIM_STARTING_CAPITAL,
|
||
"final_equity": 11_000.0,
|
||
"total_return_pct": 10.0,
|
||
"cagr_pct": 9.0,
|
||
"max_drawdown_pct": 5.0,
|
||
"sharpe": 1.1,
|
||
"trades": 1,
|
||
"win_rate": 100.0,
|
||
"avg_trade_pnl": 100.0,
|
||
"best_trade_r": 1.0,
|
||
"worst_trade_r": 1.0,
|
||
"best_trade_pnl": 100.0,
|
||
"worst_trade_pnl": 100.0,
|
||
"avg_hold_days": 30.0,
|
||
"skipped_book_full": 0,
|
||
"spy_return_pct": 1.0,
|
||
"yearly_returns": [],
|
||
"start_date": "2026-01-01",
|
||
"end_date": "2026-02-01",
|
||
}
|
||
|
||
monkeypatch.setattr(bt, "_simulate_portfolio", fake_sim)
|
||
rows = bt._strategy_variant_sims(cands, {}, {}, 30)
|
||
|
||
assert [r["variant"] for r in rows] == [cfg["variant"] for cfg in bt.STRATEGY_VARIANTS]
|
||
assert all(call["exit_policy"] == "hold" for call in calls)
|
||
assert any(call["ranking_key"] == bt.PRODUCTION_PERCENTILE_KEY for call in calls)
|
||
assert any(call["ranking_key"] == bt.RAW_PERCENTILE_KEY for call in calls)
|
||
assert any(call["max_positions"] == 15 for call in calls)
|
||
assert cands[0]["qualified"] is False
|
||
|
||
|
||
def test_build_research_recommendation_applies_promotion_rules():
|
||
report = {
|
||
"strategy_variants": {"variants": [
|
||
{"variant": "production_residual_80_fixed10", "label": "Base", "sharpe": 1.40,
|
||
"max_drawdown_pct": 20.0, "cagr_pct": 32.0, "skipped_book_full": 7},
|
||
{"variant": "residual_80_fixed15", "label": "Capacity", "sharpe": 1.39,
|
||
"max_drawdown_pct": 20.0, "cagr_pct": 32.0, "skipped_book_full": 0},
|
||
{"variant": "raw_90_fixed10", "label": "Cutoff 90", "sharpe": 1.25,
|
||
"max_drawdown_pct": 19.0, "cagr_pct": 28.0},
|
||
]},
|
||
}
|
||
|
||
rec = bt._build_research_recommendation(report)
|
||
by_topic = {item["topic"]: item for item in rec["items"]}
|
||
|
||
assert by_topic["capacity_15"]["candidate"] is False
|
||
assert "not needed yet" in by_topic["capacity_15"]["text"]
|
||
assert by_topic["cutoff_90"]["candidate"] is False
|
||
assert "Cutoff 90" in by_topic["cutoff_90"]["text"]
|
||
|
||
|
||
class TestStopFillR:
|
||
def test_intraday_fill_at_stop(self):
|
||
assert bt._stop_fill_r("long", 100.0, 95.0, _bar(101, 94, 96)) == pytest.approx(-1.0)
|
||
|
||
def test_gap_fill_at_open(self):
|
||
# Opens at 92, below the 95 stop → filled at the open, worse than −1R.
|
||
assert bt._stop_fill_r("long", 100.0, 95.0, _bar(93, 90, 91, open_=92)) == pytest.approx(-1.6)
|
||
|
||
def test_short_gap_fill_at_open(self):
|
||
# Short stop 105; opens at 107 above it → fill 107.
|
||
assert bt._stop_fill_r("short", 100.0, 105.0, _bar(110, 104, 108, open_=107)) == pytest.approx(-1.4)
|
||
|
||
|
||
class TestRiskAndStopDay:
|
||
def test_no_stop(self):
|
||
risk, stop_day = bt._risk_and_stop_day("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
|
||
assert risk == pytest.approx(0.05)
|
||
assert stop_day is None
|
||
|
||
def test_stop_day_is_one_based(self):
|
||
bars = [_bar(102, 99, 101), _bar(101, 94, 96)]
|
||
risk, stop_day = bt._risk_and_stop_day("long", 100.0, 95.0, bars, 30)
|
||
assert risk == pytest.approx(0.05)
|
||
assert stop_day == 2
|
||
|
||
def test_short_direction(self):
|
||
_, stop_day = bt._risk_and_stop_day("short", 100.0, 105.0, [_bar(106, 101, 104)], 30)
|
||
assert stop_day == 1
|
||
|
||
|
||
class TestTimeExits:
|
||
def test_long_exits_at_horizon_close(self):
|
||
bars = [_bar(103, 99, 102), _bar(105, 101, 104), _bar(107, 103, 106)]
|
||
res = bt._time_exits("long", 100.0, 95.0, bars, (2, 5))
|
||
assert res[2] == pytest.approx(0.8) # close 104 → +4% / 5% risk
|
||
assert res[5] == pytest.approx(1.2) # only 3 bars → last close 106
|
||
|
||
def test_stop_on_first_bar_loses_everywhere(self):
|
||
res = bt._time_exits("long", 100.0, 95.0, [_bar(101, 94, 96), _bar(105, 101, 104)], (1, 5))
|
||
assert res[1] == pytest.approx(-1.0)
|
||
assert res[5] == pytest.approx(-1.0)
|
||
|
||
def test_stop_after_short_horizon_only_hits_long_hold(self):
|
||
# Day-2 close banked by the 2-day hold; the stop on day 3 only hits n=5.
|
||
bars = [_bar(103, 99, 102), _bar(104, 100, 103), _bar(101, 94, 95)]
|
||
res = bt._time_exits("long", 100.0, 95.0, bars, (2, 5))
|
||
assert res[2] == pytest.approx(0.6) # close 103 → +3% / 5% risk
|
||
assert res[5] == pytest.approx(-1.0)
|
||
|
||
def test_short_direction(self):
|
||
res = bt._time_exits("short", 100.0, 105.0, [_bar(101, 95, 96)], (1,))
|
||
assert res[1] == pytest.approx(0.8) # close 96 → +4% / 5% risk
|
||
|
||
def test_zero_risk_returns_zero(self):
|
||
res = bt._time_exits("long", 100.0, 100.0, [_bar(103, 99, 102)], (5,))
|
||
assert res[5] == 0.0
|
||
|
||
def test_gap_through_stop_fills_at_open(self):
|
||
res = bt._time_exits("long", 100.0, 95.0, [_bar(93, 90, 91, open_=92)], (5,))
|
||
assert res[5] == pytest.approx(-1.6)
|
||
|
||
|
||
class TestTimeExitBucket:
|
||
def test_bucket(self):
|
||
cands = [
|
||
{"time_r": {5: 1.4, 21: 0.8}, "risk_pct": 0.10},
|
||
{"time_r": {5: -1.0, 21: -1.0}, "risk_pct": 0.10},
|
||
{"time_r": {5: 0.5, 21: 0.5}, "risk_pct": 0.10},
|
||
]
|
||
b = bt._time_exit_bucket(cands, 5)
|
||
assert b["hold_days"] == 5
|
||
assert b["total"] == 3
|
||
assert b["wins"] == 2
|
||
assert b["win_rate"] == pytest.approx(66.7, abs=0.1)
|
||
assert b["avg_r"] == pytest.approx(0.3, abs=0.01)
|
||
assert b["net_avg_r"] == pytest.approx(0.28, abs=0.01)
|
||
assert b["best_r"] == pytest.approx(1.4)
|
||
assert b["worst_r"] == pytest.approx(-1.0)
|
||
# No stop_day on any candidate → every hold runs the full 5 days.
|
||
assert b["avg_hold_days"] == 5.0
|
||
assert b["net_r_per_day"] == pytest.approx(0.28 / 5.0, abs=0.001)
|
||
# robustness on net rs [1.38, -1.02, 0.48]
|
||
assert b["median_net_r"] == pytest.approx(0.48, abs=0.001)
|
||
assert b["profit_factor"] == pytest.approx(1.86 / 1.02, abs=0.01)
|
||
assert b["net_avg_r_ex_top5"] == pytest.approx((0.48 - 1.02) / 2, abs=0.001)
|
||
|
||
def test_missing_hold_skipped(self):
|
||
b = bt._time_exit_bucket([{"time_r": {5: 1.0}}], 21)
|
||
assert b["total"] == 0
|
||
assert b["avg_r"] is None
|
||
|
||
|
||
def _acand(
|
||
rr: float = 2.0,
|
||
conf: float = 60.0,
|
||
action: str = "LONG_MODERATE",
|
||
mp: float | None = 90.0,
|
||
direction: str = "long",
|
||
) -> dict:
|
||
"""Ablation candidate: meets_core mirrors the default floors (min_rr 1.2,
|
||
min_confidence 55, exclude_neutral on)."""
|
||
action_dir = "long" if action.startswith("LONG") else "short" if action.startswith("SHORT") else "neutral"
|
||
meets = rr >= 1.2 and conf >= 55.0 and action_dir != "neutral" and action_dir == direction
|
||
return {
|
||
"rr": rr,
|
||
"confidence": conf,
|
||
"action": action,
|
||
"momentum_percentile": mp,
|
||
"activation_momentum_percentile": mp,
|
||
"direction": direction,
|
||
"meets_core": meets,
|
||
"risk_level": "Low",
|
||
"target_hit": True,
|
||
"outcome": OUTCOME_TARGET_HIT,
|
||
"realized_r": rr,
|
||
"risk_pct": 0.05,
|
||
"time_r": {d: 0.5 for d in bt.TIME_EXIT_DAYS},
|
||
}
|
||
|
||
|
||
class TestGateAblation:
|
||
ACTIVATION = {
|
||
"min_rr": 1.2,
|
||
"min_confidence": 55.0,
|
||
"exclude_neutral": True,
|
||
"require_high_conviction": False,
|
||
"exclude_conflicts": False,
|
||
}
|
||
|
||
def test_variant_counts(self):
|
||
cands = [
|
||
_acand(), # clears everything
|
||
_acand(conf=40.0), # fails confidence floor
|
||
_acand(rr=1.0), # fails R:R floor
|
||
_acand(action="NEUTRAL"), # fails NEUTRAL exclusion
|
||
_acand(mp=50.0), # fails the momentum cutoff
|
||
_acand(direction="short", action="SHORT_MODERATE", mp=95.0), # short — gated out
|
||
]
|
||
rows = {r["variant"]: r for r in bt._gate_ablation(cands, self.ACTIVATION, 80.0)}
|
||
assert rows["all_floors"]["total"] == 1
|
||
assert rows["no_confidence_floor"]["total"] == 2
|
||
assert rows["no_rr_floor"]["total"] == 2
|
||
assert rows["no_neutral_exclusion"]["total"] == 2
|
||
assert rows["momentum_only"]["total"] == 4
|
||
assert rows["all_floors"]["net_avg_r"] is not None
|
||
# Every variant is also graded under the hold-to-horizon exit.
|
||
assert rows["all_floors"]["hold_days"] == max(bt.TIME_EXIT_DAYS)
|
||
assert rows["all_floors"]["hold_avg_r"] == pytest.approx(0.5)
|
||
assert rows["all_floors"]["hold_net_avg_r"] is not None
|
||
assert rows["momentum_only"]["hold_total_r"] == pytest.approx(4 * 0.5, abs=0.01)
|
||
|
||
def test_threshold_zero_disables_momentum_gate(self):
|
||
# Floors only: the short and the low-momentum long both pass all_floors.
|
||
cands = [_acand(mp=50.0), _acand(direction="short", action="SHORT_MODERATE", mp=None)]
|
||
rows = {r["variant"]: r for r in bt._gate_ablation(cands, self.ACTIVATION, 0.0)}
|
||
assert rows["all_floors"]["total"] == 2
|
||
|
||
|
||
def _sim_prices(start_ord: int, closes: list[float]) -> tuple:
|
||
"""Column arrays for consecutive daily bars: open = close (no gaps),
|
||
high/low = close ± 1."""
|
||
ords = list(range(start_ord, start_ord + len(closes)))
|
||
return (
|
||
ords,
|
||
list(closes),
|
||
[c + 1.0 for c in closes],
|
||
[c - 1.0 for c in closes],
|
||
list(closes),
|
||
[1_000_000] * len(closes),
|
||
)
|
||
|
||
|
||
def _sim_cand(
|
||
sym: str, day_ord: int, entry: float, stop: float, target: float, mp: float = 90.0
|
||
) -> dict:
|
||
return {
|
||
"qualified": True,
|
||
"direction": "long",
|
||
"symbol": sym,
|
||
"date": date.fromordinal(day_ord).isoformat(),
|
||
"entry": entry,
|
||
"stop": stop,
|
||
"target": target,
|
||
"momentum_percentile": mp,
|
||
"activation_momentum_percentile": mp,
|
||
}
|
||
|
||
|
||
class TestSimulatePortfolio:
|
||
ORD = date(2025, 1, 6).toordinal()
|
||
|
||
def test_hold_policy_accounting(self):
|
||
closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
|
||
prices = {"AAA": _sim_prices(self.ORD, closes)}
|
||
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=130.0)
|
||
sim = bt._simulate_portfolio([cand], prices, None, "hold", 3)
|
||
assert sim is not None
|
||
assert sim["trades"] == 1
|
||
# 20 shares (1% risk / $5 stop distance), exit at the day-3 close 106:
|
||
# pnl = 2120 − 2000 − 2.00 entry cost − 2.12 exit cost = 115.88
|
||
assert sim["final_equity"] == pytest.approx(10_115.88, abs=0.01)
|
||
assert sim["win_rate"] == 100.0
|
||
assert sim["best_trade_r"] == pytest.approx(1.2)
|
||
assert sim["avg_hold_days"] == 3.0
|
||
assert sim["max_drawdown_pct"] == 0.0
|
||
assert sim["cagr_pct"] is None # window far too short to annualize
|
||
assert sim["spy_return_pct"] is None
|
||
assert sim["yearly_returns"] == [
|
||
{"year": 2025, "return_pct": pytest.approx(1.2, abs=0.05)}
|
||
]
|
||
|
||
def test_target_policy_exits_at_target(self):
|
||
closes = [100.0, 102.0, 104.0, 106.0, 108.0, 110.0]
|
||
prices = {"AAA": _sim_prices(self.ORD, closes)}
|
||
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=105.0)
|
||
sim = bt._simulate_portfolio([cand], prices, None, "target", 30)
|
||
assert sim is not None
|
||
assert sim["trades"] == 1
|
||
assert sim["best_trade_r"] == pytest.approx(1.0) # filled exactly at 105
|
||
|
||
def test_stop_gap_fills_at_open(self):
|
||
# Day-1 bar gaps to a 90 open, below the 95 stop → fill at the open.
|
||
ords = list(range(self.ORD, self.ORD + 2))
|
||
prices = {"AAA": (ords, [100.0, 90.0], [101.0, 92.0], [99.0, 88.0], [100.0, 91.0], [1, 1])}
|
||
cand = _sim_cand("AAA", self.ORD, entry=100.0, stop=95.0, target=120.0)
|
||
sim = bt._simulate_portfolio([cand], prices, None, "hold", 30)
|
||
assert sim is not None
|
||
assert sim["trades"] == 1
|
||
assert sim["worst_trade_r"] == pytest.approx(-2.0) # (90 − 100) / 5
|
||
|
||
def test_nothing_qualified_returns_none(self):
|
||
assert bt._simulate_portfolio([], {}, None, "hold", 30) is None
|
||
|
||
|
||
def test_bucket_stats_counts_and_expectancy():
|
||
cands = [
|
||
_cand(70, OUTCOME_TARGET_HIT, 3.0), # +3R win
|
||
_cand(60, OUTCOME_TARGET_HIT, 2.0), # +2R win
|
||
_cand(40, OUTCOME_STOP_HIT, 3.0), # -1R loss
|
||
_cand(30, OUTCOME_EXPIRED, 3.0), # 0R expired
|
||
]
|
||
s = bt._bucket_stats(cands)
|
||
assert s["total"] == 4
|
||
assert s["wins"] == 2
|
||
assert s["losses"] == 1
|
||
assert s["expired"] == 1
|
||
# hit rate is over decided (wins+losses) only
|
||
assert s["hit_rate"] == round(2 / 3 * 100, 1)
|
||
# avg R = (3 + 2 - 1 + 0) / 4 = 1.0
|
||
assert s["avg_r"] == 1.0
|
||
assert s["total_r"] == 4.0
|
||
# net = gross minus a 0.04R round trip per candidate (risk_pct 0.05)
|
||
assert s["net_avg_r"] == pytest.approx(1.0 - _COST_R_005, abs=0.001)
|
||
assert s["net_total_r"] == pytest.approx(4.0 - 4 * _COST_R_005, abs=0.01)
|
||
assert s["best_r"] == 3.0
|
||
assert s["worst_r"] == -1.0
|
||
assert s["avg_hold_days"] == 10.0
|
||
assert s["net_r_per_day"] == pytest.approx((1.0 - _COST_R_005) / 10.0, abs=0.001)
|
||
# robustness: net rs are [2.96, 1.96, -1.04, -0.04]
|
||
assert s["median_net_r"] == pytest.approx(0.96, abs=0.001)
|
||
assert s["profit_factor"] == pytest.approx(4.92 / 1.08, abs=0.01)
|
||
# ex-top-5%: ceil(4 * 0.05) = 1 winner trimmed → mean of the remaining three
|
||
assert s["net_avg_r_ex_top5"] == pytest.approx((1.96 - 1.04 - 0.04) / 3, abs=0.001)
|
||
|
||
|
||
def test_bucket_stats_empty():
|
||
s = bt._bucket_stats([])
|
||
assert s["total"] == 0
|
||
assert s["hit_rate"] is None
|
||
assert s["avg_r"] is None
|
||
assert s["net_avg_r"] is None
|
||
|
||
|
||
def test_bucket_stats_no_risk_pct_means_no_cost():
|
||
c = _cand(50, OUTCOME_TARGET_HIT, 2.0)
|
||
del c["risk_pct"]
|
||
s = bt._bucket_stats([c])
|
||
assert s["net_avg_r"] == s["avg_r"]
|
||
assert s["net_total_r"] == s["total_r"]
|
||
|
||
|
||
def test_build_recommendation_reads_the_report():
|
||
report = {
|
||
"overall_qualified": {"net_avg_r": 0.13, "net_avg_r_ex_top5": 0.05},
|
||
"time_exit_sweep": [
|
||
{"hold_days": 21, "net_avg_r": 0.38},
|
||
{"hold_days": 30, "net_avg_r": 0.50, "net_avg_r_ex_top5": 0.21},
|
||
],
|
||
"gate_ablation": [
|
||
{"variant": "all_floors", "total": 100, "hold_net_avg_r": 0.50},
|
||
{"variant": "no_confidence_floor", "total": 130, "hold_net_avg_r": 0.49},
|
||
{"variant": "no_rr_floor", "total": 400, "hold_net_avg_r": 0.34},
|
||
{"variant": "no_neutral_exclusion", "total": 120, "hold_net_avg_r": 0.46},
|
||
],
|
||
"sweep": [
|
||
{"min_momentum_percentile": 80.0, "net_avg_r": 0.13, "total": 100},
|
||
{"min_momentum_percentile": 60.0, "net_avg_r": 0.05, "total": 300},
|
||
{"min_momentum_percentile": 0.0, "net_avg_r": -0.12, "total": 1000},
|
||
],
|
||
"portfolio_sim": {"policies": [
|
||
{"policy": "target", "cagr_pct": 23.7, "total_return_pct": 134.8,
|
||
"spy_return_pct": 95.9, "max_drawdown_pct": 20.7},
|
||
{"policy": "hold", "cagr_pct": 31.9, "total_return_pct": 203.6,
|
||
"spy_return_pct": 95.9, "max_drawdown_pct": 21.2},
|
||
]},
|
||
}
|
||
rec = bt._build_recommendation(report)
|
||
by_topic: dict[str, list[str]] = {}
|
||
for item in rec["items"]:
|
||
by_topic.setdefault(item["topic"], []).append(item["text"])
|
||
|
||
assert rec["headline"] is not None and "hold 30" in rec["headline"]
|
||
assert any("hold 30 trading days" in t for t in by_topic["exit"])
|
||
gate_texts = " | ".join(by_topic["gate"])
|
||
assert "confidence floor adds nothing" in gate_texts
|
||
assert "keep the R:R floor" in gate_texts
|
||
assert "keep the NEUTRAL exclusion" in gate_texts
|
||
assert "80" in by_topic["cutoff"][0]
|
||
assert "beats" in by_topic["benchmark"][0]
|
||
# robustness is judged under the RECOMMENDED exit (the 30d hold), not the
|
||
# target model the recommendation advises abandoning
|
||
assert any(
|
||
"not a handful of outliers" in t and "under the recommended 30d hold" in t
|
||
for t in by_topic["robustness"]
|
||
)
|
||
|
||
|
||
def test_build_recommendation_flags_outlier_dependence():
|
||
rec = bt._build_recommendation({
|
||
"overall_qualified": {"net_avg_r": 0.13, "net_avg_r_ex_top5": -0.02},
|
||
})
|
||
robustness = [i["text"] for i in rec["items"] if i["topic"] == "robustness"]
|
||
assert robustness and "WARNING" in robustness[0]
|
||
|
||
|
||
def test_window_setups_too_short_returns_empty():
|
||
assert bt._window_setups([], {}, {}) == []
|
||
|
||
|
||
def test_replay_ticker_candidates_carry_gate_fields():
|
||
"""The ablation recomputes floors from candidate fields — a candidate missing
|
||
action/risk_level silently zeroes the ablation rows (July 2026 regression)."""
|
||
from app.services.admin_service import ACTIVATION_DEFAULTS
|
||
from app.services.recommendation_service import DEFAULT_RECOMMENDATION_CONFIG
|
||
|
||
base = date(2025, 1, 1)
|
||
bars = []
|
||
for i in range(160):
|
||
close = 100.0 + 8.0 * math.sin(i / 6.0)
|
||
bars.append(SimpleNamespace(
|
||
date=base + timedelta(days=i),
|
||
open=close,
|
||
high=close + 1.5,
|
||
low=close - 1.5,
|
||
close=close,
|
||
volume=1_000_000 + (i % 5) * 1000,
|
||
))
|
||
cands = bt._replay_ticker(
|
||
"OSC", bars, dict(DEFAULT_RECOMMENDATION_CONFIG), dict(ACTIVATION_DEFAULTS)
|
||
)
|
||
assert cands, "expected the oscillating series to produce candidates"
|
||
for c in cands:
|
||
assert c.get("action") is not None
|
||
assert "risk_level" in c
|
||
|
||
|
||
async def _seed_oscillating_ticker(session, symbol: str, n: int = 160) -> None:
|
||
t = Ticker(symbol=symbol)
|
||
session.add(t)
|
||
await session.flush()
|
||
base = date(2025, 1, 1)
|
||
for i in range(n):
|
||
close = 100.0 + 8.0 * math.sin(i / 6.0)
|
||
session.add(OHLCVRecord(
|
||
ticker_id=t.id,
|
||
date=base + timedelta(days=i),
|
||
open=close,
|
||
high=close + 1.5,
|
||
low=close - 1.5,
|
||
close=close,
|
||
volume=1_000_000 + (i % 5) * 1000,
|
||
))
|
||
await session.commit()
|
||
|
||
|
||
async def test_run_backtest_smoke(session):
|
||
await _seed_oscillating_ticker(session, "OSC")
|
||
report = await bt.run_backtest(session)
|
||
|
||
# well-formed report
|
||
assert report["tickers"] == 1
|
||
assert isinstance(report["candidates"], int)
|
||
for key in (
|
||
"overall_qualified", "overall_all", "by_direction", "sweep",
|
||
"gate_ablation", "time_exit_sweep", "portfolio_sim", "strategy_variants",
|
||
"recommendation", "research_recommendation",
|
||
):
|
||
assert key in report
|
||
# the oscillating series should yield at least some resolved setups
|
||
assert report["candidates"] >= 1
|
||
|
||
# cost assumption is reported, and every bucket carries net numbers
|
||
assert report["params"]["cost_per_side_pct"] == pytest.approx(bt.COST_PER_SIDE * 100)
|
||
assert "net_avg_r" in report["overall_all"]
|
||
|
||
# ablation baseline reproduces the qualified set exactly, and every row
|
||
# carries the hold-to-horizon grading alongside the target model
|
||
ablation = {r["variant"]: r for r in report["gate_ablation"]}
|
||
assert ablation["all_floors"]["total"] == report["overall_qualified"]["total"]
|
||
for row in report["gate_ablation"]:
|
||
assert "hold_net_avg_r" in row
|
||
|
||
# time-exit sweep covers the configured hold lengths
|
||
assert [r["hold_days"] for r in report["time_exit_sweep"]] == list(bt.TIME_EXIT_DAYS)
|
||
|
||
# portfolio simulation section is always present (policies may be empty
|
||
# when nothing qualifies)
|
||
assert "portfolio_sim" in report
|
||
assert isinstance(report["portfolio_sim"]["policies"], list)
|
||
assert report["portfolio_sim"]["params"]["max_positions"] == bt.SIM_MAX_POSITIONS
|
||
assert isinstance(report["strategy_variants"]["variants"], list)
|
||
|
||
# sweep: lowering the momentum-percentile cutoff can only add qualifiers
|
||
sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
|
||
counts = [r["total"] for r in sweep]
|
||
assert counts == sorted(counts) # ascending as threshold descends
|