signal-platform/tests/unit/test_backtest_service.py

"""Tests for the historical backtest harness."""

from __future__ import annotations

import math
from datetime import date, timedelta
from types import SimpleNamespace

import pytest

from app.models.ohlcv import OHLCVRecord
from app.models.ticker import Ticker
from app.services import backtest_service as bt
from app.services.outcome_service import (
    OUTCOME_EXPIRED,
    OUTCOME_STOP_HIT,
    OUTCOME_TARGET_HIT,
)
from tests.conftest import _test_session_factory  # type: ignore


@pytest.fixture
async def session():
    async with _test_session_factory() as s:
        yield s


def _cand(prob: float, outcome: str, rr: float, qualified: bool = True, direction: str = "long") -> dict:
    target_hit = outcome == OUTCOME_TARGET_HIT
    realized = rr if target_hit else (0.0 if outcome == OUTCOME_EXPIRED else -1.0)
    return {
        "primary_prob": prob,
        "outcome": outcome,
        "target_hit": target_hit,
        "rr": rr,
        "realized_r": realized,
        "qualified": qualified,
        "direction": direction,
    }


def _bar(high: float, low: float, close: float) -> SimpleNamespace:
    return SimpleNamespace(high=high, low=low, close=close)


class TestTakeProfitPrimitives:
    def test_long_tp_reachable_before_stop(self):
        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(109, 101, 108)], 30)
        assert risk == pytest.approx(0.05)
        assert stopped is False
        assert mfe == pytest.approx(0.09)
        assert close_pct == pytest.approx(0.08)

    def test_long_stop_zeroes_mfe(self):
        # Low pierces the stop on the only bar → loss, nothing banked before it.
        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, [_bar(101, 94, 96)], 30)
        assert stopped is True
        assert mfe == pytest.approx(0.0)
        assert close_pct == pytest.approx(-0.04)

    def test_long_drift_no_trigger(self):
        bars = [_bar(102, 99, 101), _bar(103, 100, 102)]
        risk, stopped, mfe, close_pct = bt._tp_primitives("long", 100.0, 95.0, bars, 30)
        assert stopped is False
        assert mfe == pytest.approx(0.03)
        assert close_pct == pytest.approx(0.02)

    def test_short_direction(self):
        # short entry 100, stop 105; price falls → favourable = (entry - low)/entry
        risk, stopped, mfe, close_pct = bt._tp_primitives("short", 100.0, 105.0, [_bar(101, 92, 93)], 30)
        assert risk == pytest.approx(0.05)
        assert stopped is False
        assert mfe == pytest.approx(0.08)
        assert close_pct == pytest.approx(0.07)


class TestTakeProfitBucket:
    def test_bucket_mix(self):
        cands = [
            {"risk_pct": 0.05, "mfe_pct": 0.09, "tp_stopped": False, "tp_close_pct": 0.08},  # +1.6R win
            {"risk_pct": 0.05, "mfe_pct": 0.02, "tp_stopped": True, "tp_close_pct": -0.04},  # -1R stop
            {"risk_pct": 0.05, "mfe_pct": 0.03, "tp_stopped": False, "tp_close_pct": 0.01},  # +0.2R timeout
        ]
        b = bt._take_profit_bucket(cands, 0.08)
        assert b["total"] == 3
        assert b["wins"] == 1
        assert b["hit_rate"] == pytest.approx(33.3, abs=0.1)
        assert b["total_r"] == pytest.approx(0.8, abs=0.01)
        assert b["avg_r"] == pytest.approx(0.267, abs=0.01)

    def test_zero_risk_skipped(self):
        cands = [{"risk_pct": 0.0, "mfe_pct": 0.2, "tp_stopped": False, "tp_close_pct": 0.1}]
        b = bt._take_profit_bucket(cands, 0.08)
        assert b["total"] == 0
        assert b["avg_r"] is None


def test_bucket_stats_counts_and_expectancy():
    cands = [
        _cand(70, OUTCOME_TARGET_HIT, 3.0),   # +3R win
        _cand(60, OUTCOME_TARGET_HIT, 2.0),   # +2R win
        _cand(40, OUTCOME_STOP_HIT, 3.0),     # -1R loss
        _cand(30, OUTCOME_EXPIRED, 3.0),      # 0R expired
    ]
    s = bt._bucket_stats(cands)
    assert s["total"] == 4
    assert s["wins"] == 2
    assert s["losses"] == 1
    assert s["expired"] == 1
    # hit rate is over decided (wins+losses) only
    assert s["hit_rate"] == round(2 / 3 * 100, 1)
    # avg R = (3 + 2 - 1 + 0) / 4 = 1.0
    assert s["avg_r"] == 1.0
    assert s["total_r"] == 4.0


def test_bucket_stats_empty():
    s = bt._bucket_stats([])
    assert s["total"] == 0
    assert s["hit_rate"] is None
    assert s["avg_r"] is None


def test_calibration_buckets():
    cands = [
        _cand(65, OUTCOME_TARGET_HIT, 2.0),
        _cand(62, OUTCOME_STOP_HIT, 2.0),
        _cand(15, OUTCOME_STOP_HIT, 2.0),
    ]
    rows = bt._calibration(cands)
    by_bucket = {r["bucket"]: r for r in rows}
    assert by_bucket["60-80%"]["n"] == 2
    assert by_bucket["60-80%"]["realized_hit_rate"] == 50.0  # 1 of 2 hit
    assert by_bucket["0-20%"]["n"] == 1
    assert by_bucket["0-20%"]["realized_hit_rate"] == 0.0


def test_window_setups_too_short_returns_empty():
    assert bt._window_setups([], {}, {}) == []


async def _seed_oscillating_ticker(session, symbol: str, n: int = 160) -> None:
    t = Ticker(symbol=symbol)
    session.add(t)
    await session.flush()
    base = date(2025, 1, 1)
    for i in range(n):
        close = 100.0 + 8.0 * math.sin(i / 6.0)
        session.add(OHLCVRecord(
            ticker_id=t.id,
            date=base + timedelta(days=i),
            open=close,
            high=close + 1.5,
            low=close - 1.5,
            close=close,
            volume=1_000_000 + (i % 5) * 1000,
        ))
    await session.commit()


async def test_run_backtest_smoke(session):
    await _seed_oscillating_ticker(session, "OSC")
    report = await bt.run_backtest(session)

    # well-formed report
    assert report["tickers"] == 1
    assert isinstance(report["candidates"], int)
    for key in ("overall_qualified", "overall_all", "by_direction", "calibration", "sweep"):
        assert key in report
    # the oscillating series should yield at least some resolved setups
    assert report["candidates"] >= 1

    # sweep: lowering the momentum-percentile cutoff can only add qualifiers
    sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True)
    counts = [r["total"] for r in sweep]
    assert counts == sorted(counts)  # ascending as threshold descends
    # every calibration row is internally consistent
    for row in report["calibration"]:
        assert 0 <= row["realized_hit_rate"] <= 100
        assert row["n"] >= 1