From 80b4113280d82e4446c64830660075f95cc4db5f Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Thu, 2 Jul 2026 16:25:04 +0200 Subject: [PATCH] feat: add strategy variant lab and signal context snapshots Backtest report now includes research-only hold-to-horizon portfolio variants comparing raw vs residual 12-1 momentum, cutoff 80 vs 90, max 10 vs 15 positions, and SPY-200 risk scaling. A dynamic research recommendation panel flags residual momentum, cutoff 90, or regime scaling only when transparent promotion rules pass. Adds signal_context_snapshots with migration 016 and captures one point-in-time context row per newly generated TradeSetup: setup fields, composite/dimensions, latest sentiment, latest fundamentals, and strategy_version=momentum_12_1_rr_time_v1. This is forward-only; no historical sentiment/fundamental backfill is attempted. No live gate, paper-trade exit, or production ranking behavior changes. Verification: 458 backend tests pass, ruff check app/ clean, frontend npm run build clean. Co-Authored-By: Claude Fable 5 --- README.md | 19 +- .../016_add_signal_context_snapshots.py | 55 +++ app/models/__init__.py | 2 + app/models/signal_context_snapshot.py | 45 +++ app/services/backtest_service.py | 347 +++++++++++++++++- app/services/rr_scanner_service.py | 139 ++++++- .../src/components/signals/BacktestPanel.tsx | 75 +++- frontend/src/lib/types.ts | 22 ++ tests/unit/test_backtest_service.py | 99 ++++- tests/unit/test_signal_context_snapshot.py | 110 ++++++ 10 files changed, 885 insertions(+), 28 deletions(-) create mode 100644 alembic/versions/016_add_signal_context_snapshots.py create mode 100644 app/models/signal_context_snapshot.py create mode 100644 tests/unit/test_signal_context_snapshot.py diff --git a/README.md b/README.md index 1c3164c..ee3fa12 100644 --- a/README.md +++ b/README.md @@ -42,15 +42,15 @@ Fundamentals (weekly, early Monday) · Alerts (hourly, Telegram) · Backtest (we | Component | Verdict | Evidence | |---|---|---| -| **12-1 cross-sectional momentum** (the activation gate, long-only) | **The only demonstrated edge — in-sample** | Qualified setups ≈ **+0.25R** avg vs ≈ −0.05R all-setups baseline; the percentile sweep is cleanly monotonic (cutoff 50 → +0.14R, 70 → +0.21R, 80 → +0.25R). Rank-IC ≈ 0.05, t ≈ 1.6 — right sign and size for the classic factor, **not yet statistically significant** | -| S/R setup engine (ATR stops, S/R targets, reach-probability) | **No selection edge — execution/timing only** | ≈ breakeven (+0.01R) before the momentum gate. The probability model is honest (calibrated) but does not discriminate winners | +| **12-1 cross-sectional momentum** (the activation gate, long-only) | **The only demonstrated edge — in-sample** | Qualified setups beat the all-setups baseline after costs; rank-IC ≈ 0.05. Residual 12-1 momentum is now evaluated as a research signal, but is not production ranking yet | +| S/R setup engine (ATR stops, S/R targets, reach-probability) | **Filter/execution context, not the exit** | R:R/room-to-run still earns its keep as a filter, but S/R targets underperform the time exit. The probability model is display-only | | Composite score + 5 dimensions | **Display/ranking only** | Sub-scores are hand-built heuristics; none has a measured IC. Note: the "momentum" *dimension* is 5/20-day ROC — NOT the validated 12-1 factor (that lives in `momentum_service`) | | LLM sentiment | Display + a bounded composite adjustment (± weight × 100 pts around neutral 50) | Deliberately kept out of the setup engine; no point-in-time history to validate against yet | | Fundamentals | Feeds composite + confidence only | Latest values only, no history — same limitation | | Short setups | **Excluded while the momentum gate is active** | Backtest showed shorts fight the trend and drag expectancy | | Expected-value gate (removed June 2026) | Degenerate — do not resurrect | Structurally favored distant lottery targets; selected *worse*-than-random setups | -Caveats on the momentum result: in-sample, roughly one market regime, no transaction costs or slippage modeled, and the factor is beta-heavy (6-month volatility posted the top IC — that's beta, not alpha). The **out-of-sample proof is the forward paper-trade record**: Signals → Track Record compares live qualified expectancy against the backtest. +Caveats on the momentum result: in-sample, roughly one market regime, costs/slippage approximated at 0.1% per side, and the factor is beta-heavy (6-month volatility often posts the top IC — that's beta, not alpha). The **out-of-sample proof is the forward paper-trade record**: Signals → Track Record compares live qualified expectancy against the backtest. ### The iron rule for strategy changes @@ -64,16 +64,15 @@ Corollaries: never let an unvalidated score gate setups; the outcome evaluator m ### Highest-value next experiments (in order) -1. **Volatility-scaled momentum** — add `mom_12_1 / vol_6m` to `_signal_values`; risk-adjusted momentum typically beats raw and dampens momentum crashes. -2. **Regime filter on the gate** — momentum crashes cluster in post-bear rebounds; `market_regime_service` already computes the SPY 50/200 trend, so test "qualify only in Risk-On" in the backtest before wiring it live. -3. **Cost haircut in the backtest** — subtract a fixed per-trade cost (e.g. 0.1% per side) in the outcome aggregation so expectancy is net; a thin edge must survive costs. +1. **Residual momentum portfolio variants** — compare raw vs beta-adjusted 12-1 momentum in the strategy-variant simulator before changing production ranking. +2. **Regime/risk scaling** — test whether SPY-200 risk scaling reduces drawdown enough to justify lower exposure. +3. **Signal context snapshots** — accumulate point-in-time composite/sentiment/fundamental context for every new setup so the discretionary overlay can be tested forward-only. 4. **More breadth, not more history** — widening the ranked universe (e.g. `nasdaq_all`) strengthens each week's cross-section and the IC t-stat, even if only the top slice is traded. (Deeper history was considered and declined.) -5. **Exit tuning with the existing sweeps** — the report already sweeps fixed take-profits and trailing stops against the S/R-target model; momentum's edge lives in the right tail, so wide trailing exits (already the paper-trade default) tend to beat nearby S/R targets. Also worth testing: a pure time-based exit (hold ~1 month, re-rank) instead of the 30-day target/stop race. ## Key Use Cases - **Find today's best long setup.** On the **Dashboard**, the *Top Setups* table lists qualified setups ranked by momentum with the #1 flagged "Top pick". Each row opens the ticker page for the chart, scores, S/R targets and entry/stop. -- **Track a trade you took.** Mark a setup as a **paper trade**: it's marked-to-market against the latest close, auto-closed on stop/target, and its sentiment stays fresh while open. *Signals → Track Record* shows the realized edge. +- **Track a trade you took.** Mark a setup as a **paper trade**: it's marked-to-market against the latest close, auto-closed by the active exit policy (default: 30 trading days with the initial stop), and its sentiment stays fresh while open. *Signals → Track Record* shows the realized edge. ## Stack @@ -405,6 +404,7 @@ Context for whoever — human or AI — continues this work. The owner pushes st - **The outcome evaluator evaluates ALL setups**, not just qualified ones — unqualified setups are the control group that makes the Track Record meaningful. - **`SystemSetting` access goes through `app/services/settings_store.py`** — don't query the model directly. - **Time-series data gets a real table** (see `benchmark_prices`, `regime_snapshots`); `SystemSetting` JSON is only for config and cached reports. +- **Discretionary overlay data is forward-only.** `signal_context_snapshots` captures composite/dimension/sentiment/fundamental context for new setups. Do not approximate historical sentiment/fundamental snapshots from today's data. - Style: surgical changes, minimal new files; extend existing services rather than adding parallel ones. ### Where the strategy lives @@ -419,7 +419,8 @@ Context for whoever — human or AI — continues this work. The owner pushes st | Gate defaults / admin config | `app/services/admin_service.py` (`ACTIVATION_DEFAULTS`) | | Backtest + factor rank-IC harness ("Signal edge") | `app/services/backtest_service.py` | | Outcome resolution (target/stop/expired/ambiguous) | `app/services/outcome_service.py` | -| Paper trades + trailing auto-exit | `app/services/paper_trade_service.py` | +| Paper trades + time/trailing/target auto-exit | `app/services/paper_trade_service.py` | +| Point-in-time setup context snapshots | `app/models/signal_context_snapshot.py` + `app/services/rr_scanner_service.py` | | S/R detection & zone clustering | `app/services/sr_service.py` | | SPY benchmark for paper-trade alpha | `app/services/benchmark_service.py` | | Pipelines & job registration | `app/scheduler.py` | diff --git a/alembic/versions/016_add_signal_context_snapshots.py b/alembic/versions/016_add_signal_context_snapshots.py new file mode 100644 index 0000000..5f28d1e --- /dev/null +++ b/alembic/versions/016_add_signal_context_snapshots.py @@ -0,0 +1,55 @@ +"""add signal context snapshots + +Revision ID: 016 +Revises: 015 +Create Date: 2026-07-02 00:00:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +revision: str = "016" +down_revision: Union[str, None] = "015" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "signal_context_snapshots", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("trade_setup_id", sa.Integer(), nullable=False), + sa.Column("ticker_id", sa.Integer(), nullable=False), + sa.Column("detected_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("strategy_version", sa.String(length=80), nullable=False), + sa.Column("direction", sa.String(length=10), nullable=False), + sa.Column("entry_price", sa.Float(), nullable=False), + sa.Column("stop_loss", sa.Float(), nullable=False), + sa.Column("target", sa.Float(), nullable=False), + sa.Column("rr_ratio", sa.Float(), nullable=False), + sa.Column("confidence_score", sa.Float(), nullable=True), + sa.Column("recommended_action", sa.String(length=20), nullable=True), + sa.Column("risk_level", sa.String(length=10), nullable=True), + sa.Column("momentum_percentile", sa.Float(), nullable=True), + sa.Column("score_context_json", sa.Text(), nullable=False), + sa.Column("sentiment_context_json", sa.Text(), nullable=False), + sa.Column("fundamental_context_json", sa.Text(), nullable=False), + sa.ForeignKeyConstraint(["ticker_id"], ["tickers.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["trade_setup_id"], ["trade_setups.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("trade_setup_id", name="uq_signal_context_trade_setup"), + ) + op.create_index( + "ix_signal_context_ticker_detected", + "signal_context_snapshots", + ["ticker_id", "detected_at"], + ) + + +def downgrade() -> None: + op.drop_index("ix_signal_context_ticker_detected", table_name="signal_context_snapshots") + op.drop_table("signal_context_snapshots") diff --git a/app/models/__init__.py b/app/models/__init__.py index 84cdaab..cad6683 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -12,6 +12,7 @@ from app.models.alert import AlertLog from app.models.paper_trade import PaperTrade from app.models.regime_snapshot import RegimeSnapshot from app.models.benchmark_price import BenchmarkPrice +from app.models.signal_context_snapshot import SignalContextSnapshot __all__ = [ "Ticker", @@ -30,4 +31,5 @@ __all__ = [ "PaperTrade", "RegimeSnapshot", "BenchmarkPrice", + "SignalContextSnapshot", ] diff --git a/app/models/signal_context_snapshot.py b/app/models/signal_context_snapshot.py new file mode 100644 index 0000000..779edd1 --- /dev/null +++ b/app/models/signal_context_snapshot.py @@ -0,0 +1,45 @@ +from datetime import datetime + +from sqlalchemy import DateTime, Float, ForeignKey, String, Text +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class SignalContextSnapshot(Base): + """Point-in-time context captured when a trade setup is generated. + + This stores the discretionary overlay inputs (scores, sentiment, + fundamentals) as they looked at detection time, so future analysis can test + whether human filtering improved or hurt the qualified-list strategy. + """ + + __tablename__ = "signal_context_snapshots" + + id: Mapped[int] = mapped_column(primary_key=True) + trade_setup_id: Mapped[int] = mapped_column( + ForeignKey("trade_setups.id", ondelete="CASCADE"), nullable=False, unique=True + ) + ticker_id: Mapped[int] = mapped_column( + ForeignKey("tickers.id", ondelete="CASCADE"), nullable=False + ) + detected_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False) + strategy_version: Mapped[str] = mapped_column(String(80), nullable=False) + + direction: Mapped[str] = mapped_column(String(10), nullable=False) + entry_price: Mapped[float] = mapped_column(Float, nullable=False) + stop_loss: Mapped[float] = mapped_column(Float, nullable=False) + target: Mapped[float] = mapped_column(Float, nullable=False) + rr_ratio: Mapped[float] = mapped_column(Float, nullable=False) + confidence_score: Mapped[float | None] = mapped_column(Float, nullable=True) + recommended_action: Mapped[str | None] = mapped_column(String(20), nullable=True) + risk_level: Mapped[str | None] = mapped_column(String(10), nullable=True) + momentum_percentile: Mapped[float | None] = mapped_column(Float, nullable=True) + + score_context_json: Mapped[str] = mapped_column(Text, nullable=False, default="{}") + sentiment_context_json: Mapped[str] = mapped_column(Text, nullable=False, default="{}") + fundamental_context_json: Mapped[str] = mapped_column(Text, nullable=False, default="{}") + + trade_setup = relationship("TradeSetup") + ticker = relationship("Ticker") diff --git a/app/services/backtest_service.py b/app/services/backtest_service.py index 275dfa9..277413c 100644 --- a/app/services/backtest_service.py +++ b/app/services/backtest_service.py @@ -296,7 +296,13 @@ def _time_exits( return result -def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) -> list[dict]: +def _replay_ticker( + symbol: str, + records: list, + config: dict, + activation: dict, + benchmark_closes: dict[date, float] | None = None, +) -> list[dict]: """Walk one ticker's history weekly, building setups and their realized outcomes.""" candidates: list[dict] = [] n = len(records) @@ -307,6 +313,11 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) - window = records[: i + 1] forward = records[i + 1 :] forward_bars = [Bar(date=r.date, high=r.high, low=r.low) for r in forward] + closes = [float(r.close) for r in window] + dates = [r.date for r in window] + residual_momentum = _residual_momentum_12_1( + dates, closes, len(window) - 1, benchmark_closes + ) for s in _window_setups(window, config, activation): outcome, outcome_date = evaluate_setup_against_bars( @@ -349,6 +360,7 @@ def _replay_ticker(symbol: str, records: list, config: dict, activation: dict) - "primary_prob": s["primary_prob"], "best_prob": s["best_prob"], "momentum": s["momentum"], + "residual_momentum": residual_momentum, "meets_core": s["meets_core"], # Gate fields the ablation recomputes floors from — without them # every candidate looks NEUTRAL and the ablation rows collapse. @@ -759,7 +771,10 @@ def _replay_and_signals( ) for o, op, hi, lo, cl, vo in zip(date_ords, opens, highs, lows, closes, volumes) ] - return _replay_ticker(symbol, bars, config, activation), _signal_series(bars, benchmark_closes) + return ( + _replay_ticker(symbol, bars, config, activation, benchmark_closes), + _signal_series(bars, benchmark_closes), + ) def _backtest_worker_count() -> int: @@ -800,22 +815,40 @@ async def _fetch_columns(db: AsyncSession, symbol: str) -> tuple | None: ) -def _assign_momentum_percentiles(candidates: list[dict]) -> None: - """Per ISO week, rank candidates by their ticker's 12-1 momentum and attach a - 0–100 ``momentum_percentile`` (100 = highest momentum in the universe that - week). Candidates whose momentum is unknown (insufficient lookback) get None - and therefore can't clear a momentum gate. Mutates ``candidates``.""" +def _assign_signal_percentiles( + candidates: list[dict], + value_key: str, + percentile_key: str, +) -> None: + """Per ISO week, rank candidates by ``value_key`` and attach a 0-100 + percentile under ``percentile_key`` (100 = strongest). Missing values get + None and therefore cannot clear a gate based on that signal.""" by_week: dict = defaultdict(list) for c in candidates: - if c.get("momentum") is not None: + if c.get(value_key) is not None: by_week[c["iso_week"]].append(c) for group in by_week.values(): - ordered = sorted(group, key=lambda c: c["momentum"]) + ordered = sorted(group, key=lambda c: c[value_key]) n = len(ordered) for rank, c in enumerate(ordered): - c["momentum_percentile"] = (rank / (n - 1) * 100.0) if n > 1 else 100.0 + c[percentile_key] = (rank / (n - 1) * 100.0) if n > 1 else 100.0 for c in candidates: - c.setdefault("momentum_percentile", None) + c.setdefault(percentile_key, None) + + +def _assign_momentum_percentiles(candidates: list[dict]) -> None: + """Per ISO week, rank candidates by their ticker's 12-1 momentum and attach a + 0-100 ``momentum_percentile`` (100 = highest momentum in the universe that + week). Candidates whose momentum is unknown (insufficient lookback) get None + and therefore can't clear a momentum gate. Mutates ``candidates``.""" + _assign_signal_percentiles(candidates, "momentum", "momentum_percentile") + + +def _assign_residual_momentum_percentiles(candidates: list[dict]) -> None: + """Research-only residual-momentum percentile used by strategy variants.""" + _assign_signal_percentiles( + candidates, "residual_momentum", "residual_momentum_percentile" + ) def _momentum_qualifies(cand: dict, threshold: float) -> bool: @@ -930,6 +963,12 @@ def _simulate_portfolio( spy_closes: dict | None, exit_policy: str, hold_days: int, + *, + qualified_fn: Callable[[dict], bool] | None = None, + ranking_key: str = "momentum_percentile", + max_positions: int = SIM_MAX_POSITIONS, + risk_per_trade: float = SIM_RISK_PER_TRADE, + risk_scale_by_ord: dict[int, float] | None = None, ) -> dict | None: """Replay the qualified setups as ONE capital-constrained book and report portfolio economics from the daily equity curve (return, CAGR, drawdown, @@ -942,9 +981,15 @@ def _simulate_portfolio( modeled); positions still open at the end are closed at their last mark. Returns None when there is nothing to trade. """ + if qualified_fn is None: + def _default_qualified(c: dict) -> bool: + return bool(c.get("qualified")) + + qualified_fn = _default_qualified + entries_by_ord: dict[int, list[dict]] = defaultdict(list) for c in candidates: - if not c.get("qualified") or c.get("direction") != "long": + if not qualified_fn(c) or c.get("direction") != "long": continue if not c.get("entry") or not c.get("stop"): continue @@ -1018,22 +1063,26 @@ def _simulate_portfolio( equity = _marked_equity() todays = sorted( entries_by_ord.get(o, ()), - key=lambda c: c.get("momentum_percentile") or 0.0, + key=lambda c: c.get(ranking_key) or 0.0, reverse=True, ) for c in todays: sym = c["symbol"] if sym in positions: continue - if len(positions) >= SIM_MAX_POSITIONS: + if len(positions) >= max_positions: skipped_full += 1 continue entry, stop = float(c["entry"]), float(c["stop"]) risk_ps = entry - stop if risk_ps <= 0 or entry <= 0: continue + risk_scale = (risk_scale_by_ord or {}).get(o, 1.0) + effective_risk = risk_per_trade * risk_scale + if effective_risk <= 0: + continue shares = min( - (equity * SIM_RISK_PER_TRADE) / risk_ps, + (equity * effective_risk) / risk_ps, (equity * SIM_NOTIONAL_CAP) / entry, max(cash, 0.0) / (entry * (1.0 + COST_PER_SIDE)), ) @@ -1143,6 +1192,247 @@ def _simulate_portfolio( } +STRATEGY_VARIANTS: tuple[dict, ...] = ( + { + "variant": "production_raw_80_fixed10", + "label": "Production raw 80 / max 10", + "percentile_key": "momentum_percentile", + "cutoff": 80.0, + "max_positions": 10, + "risk_per_trade": 0.01, + "risk_scale": None, + }, + { + "variant": "raw_90_fixed10", + "label": "Raw 90 / max 10", + "percentile_key": "momentum_percentile", + "cutoff": 90.0, + "max_positions": 10, + "risk_per_trade": 0.01, + "risk_scale": None, + }, + { + "variant": "residual_80_fixed10", + "label": "Residual 80 / max 10", + "percentile_key": "residual_momentum_percentile", + "cutoff": 80.0, + "max_positions": 10, + "risk_per_trade": 0.01, + "risk_scale": None, + }, + { + "variant": "residual_90_fixed10", + "label": "Residual 90 / max 10", + "percentile_key": "residual_momentum_percentile", + "cutoff": 90.0, + "max_positions": 10, + "risk_per_trade": 0.01, + "risk_scale": None, + }, + { + "variant": "raw_80_fixed15", + "label": "Raw 80 / max 15", + "percentile_key": "momentum_percentile", + "cutoff": 80.0, + "max_positions": 15, + "risk_per_trade": 0.01, + "risk_scale": None, + }, + { + "variant": "raw_80_regime_scaled", + "label": "Raw 80 / SPY-200 risk scale", + "percentile_key": "momentum_percentile", + "cutoff": 80.0, + "max_positions": 10, + "risk_per_trade": 0.01, + "risk_scale": "spy_200", + }, + { + "variant": "residual_80_regime_scaled", + "label": "Residual 80 / SPY-200 risk scale", + "percentile_key": "residual_momentum_percentile", + "cutoff": 80.0, + "max_positions": 10, + "risk_per_trade": 0.01, + "risk_scale": "spy_200", + }, +) + + +def _qualifies_by_percentile(cand: dict, percentile_key: str, threshold: float) -> bool: + """Variant qualification: production floors + long-only signal percentile. + This does not mutate or replace the production ``qualified`` field.""" + if not cand.get("meets_core"): + return False + if threshold <= 0: + return True + if cand.get("direction") == "short": + return False + pct = cand.get(percentile_key) + return pct is not None and pct >= threshold + + +def _spy_200_risk_scale(spy_closes: dict[date, float] | None) -> dict[int, float]: + """Entry-date risk scale: 0.5 when SPY closes below its 200-day SMA, else 1.0. + Missing/short benchmark history returns an empty map, which the simulator + treats as unscaled 1.0 risk.""" + if not spy_closes: + return {} + rows = sorted((d, c) for d, c in spy_closes.items() if c and c > 0) + out: dict[int, float] = {} + closes: list[float] = [] + for d, close in rows: + closes.append(float(close)) + if len(closes) < 200: + continue + sma = sum(closes[-200:]) / 200.0 + out[d.toordinal()] = 0.5 if close < sma else 1.0 + return out + + +def _strategy_variant_sims( + candidates: list[dict], + prices: dict[str, tuple], + spy_closes: dict[date, float] | None, + hold_days: int, +) -> list[dict]: + """Research-only portfolio variants for comparing rank signals, cutoff, book + capacity, and simple SPY-200 risk scaling. Live qualification is untouched.""" + risk_scales = {"spy_200": _spy_200_risk_scale(spy_closes)} + rows: list[dict] = [] + for cfg in STRATEGY_VARIANTS: + percentile_key = str(cfg["percentile_key"]) + cutoff = float(cfg["cutoff"]) + sim = _simulate_portfolio( + candidates, + prices, + spy_closes, + "hold", + hold_days, + qualified_fn=lambda c, pk=percentile_key, th=cutoff: _qualifies_by_percentile(c, pk, th), + ranking_key=percentile_key, + max_positions=int(cfg["max_positions"]), + risk_per_trade=float(cfg["risk_per_trade"]), + risk_scale_by_ord=risk_scales.get(cfg["risk_scale"]), + ) + if sim is None: + continue + rows.append({ + "variant": cfg["variant"], + "label": cfg["label"], + "ranking": "residual" if "residual" in percentile_key else "raw", + "cutoff": cutoff, + "max_positions": int(cfg["max_positions"]), + "risk_per_trade_pct": round(float(cfg["risk_per_trade"]) * 100, 2), + "risk_scale": cfg["risk_scale"], + **sim, + }) + return rows + + +def _pct_loss(base: float | None, candidate: float | None) -> float | None: + if base is None or candidate is None or base <= 0: + return None + return (base - candidate) / base + + +def _build_research_recommendation(report: dict) -> dict: + """Advisory rules for research variants. These are deliberately conservative: + production only changes later if a portfolio variant beats the baseline under + transparent drawdown/Sharpe/CAGR constraints.""" + variants = { + v.get("variant"): v + for v in (report.get("strategy_variants") or {}).get("variants", []) + } + base = variants.get("production_raw_80_fixed10") + items: list[dict] = [] + if base is None: + return { + "items": [], + "note": "Strategy variants unavailable; re-run the backtest after benchmark data is present.", + } + + base_sharpe = base.get("sharpe") + base_dd = base.get("max_drawdown_pct") + base_cagr = base.get("cagr_pct") + + residuals = [ + v for key, v in variants.items() + if key.startswith("residual_") and v.get("risk_scale") is None + ] + residual = max(residuals, key=lambda v: v.get("sharpe") or -999, default=None) + if ( + residual and base_sharpe is not None and residual.get("sharpe") is not None + and base_dd is not None and residual.get("max_drawdown_pct") is not None + ): + sharpe_delta = residual["sharpe"] - base_sharpe + dd_delta = residual["max_drawdown_pct"] - base_dd + candidate = sharpe_delta >= 0.10 and dd_delta <= 2.0 + items.append({ + "topic": "residual_momentum", + "candidate": candidate, + "text": ( + f"Residual momentum {'is a promotion candidate' if candidate else 'stays research-only'}: " + f"{residual['label']} Sharpe {residual['sharpe']:.2f} vs {base_sharpe:.2f}, " + f"drawdown {residual['max_drawdown_pct']:.1f}% vs {base_dd:.1f}%." + ), + }) + + raw_regime = variants.get("raw_80_regime_scaled") + if ( + raw_regime and base_dd is not None and base_cagr is not None + and raw_regime.get("cagr_pct") is not None + and raw_regime.get("max_drawdown_pct") is not None + ): + dd_reduction = (base_dd - raw_regime["max_drawdown_pct"]) / base_dd if base_dd > 0 else None + cagr_loss = _pct_loss(base_cagr, raw_regime.get("cagr_pct")) + candidate = ( + dd_reduction is not None and cagr_loss is not None + and dd_reduction >= 0.20 and cagr_loss <= 0.15 + ) + items.append({ + "topic": "regime_scaling", + "candidate": candidate, + "text": ( + f"SPY-200 risk scaling {'is a promotion candidate' if candidate else 'stays research-only'}: " + f"drawdown {raw_regime['max_drawdown_pct']:.1f}% vs {base_dd:.1f}%, " + f"CAGR {raw_regime.get('cagr_pct'):+.1f}% vs {base_cagr:+.1f}%." + ), + }) + + raw_90 = variants.get("raw_90_fixed10") + if ( + raw_90 and base_sharpe is not None and base_dd is not None and base_cagr is not None + and raw_90.get("sharpe") is not None and raw_90.get("cagr_pct") is not None + ): + cagr_loss = _pct_loss(base_cagr, raw_90.get("cagr_pct")) + raw_90_sharpe = raw_90.get("sharpe") + candidate = ( + raw_90_sharpe is not None + and raw_90_sharpe > base_sharpe + and raw_90["max_drawdown_pct"] < base_dd + and cagr_loss is not None and cagr_loss < 0.10 + ) + items.append({ + "topic": "cutoff_90", + "candidate": candidate, + "text": ( + f"Cutoff 90 {'is a promotion candidate' if candidate else 'stays research-only'}: " + f"Sharpe {raw_90_sharpe:.2f} vs {base_sharpe:.2f}, " + f"drawdown {raw_90['max_drawdown_pct']:.1f}% vs {base_dd:.1f}%, " + f"CAGR {raw_90.get('cagr_pct'):+.1f}% vs {base_cagr:+.1f}%." + ), + }) + + return { + "items": items, + "note": ( + "Advisory only. Production changes require a variant to pass the rule " + "and then be adopted explicitly in a later strategy-version change." + ), + } + + # --------------------------------------------------------------------------- # Data-driven recommendation # --------------------------------------------------------------------------- @@ -1407,6 +1697,7 @@ async def run_backtest( # Cross-sectional momentum: rank every week's universe, then "qualified" means # floors + top ``min_momentum_percentile`` by 12-1 momentum. _assign_momentum_percentiles(candidates) + _assign_residual_momentum_percentiles(candidates) current_min_pct = float(activation.get("min_momentum_percentile", 80.0)) for c in candidates: c["qualified"] = _momentum_qualifies(c, current_min_pct) @@ -1428,8 +1719,19 @@ async def run_backtest( # the book once per exit policy. Best-effort — the report stands without it. hold_horizon = max(TIME_EXIT_DAYS) sim_policies: list[dict] = [] + strategy_variant_rows: list[dict] = [] try: - qual_symbols = sorted({c["symbol"] for c in candidates if c.get("qualified")}) + qual_symbols = sorted({ + c["symbol"] + for c in candidates + if c.get("qualified") + or any( + _qualifies_by_percentile( + c, str(cfg["percentile_key"]), float(cfg["cutoff"]) + ) + for cfg in STRATEGY_VARIANTS + ) + }) price_columns: dict[str, tuple] = {} for sym in qual_symbols: cols = await _fetch_columns(db, sym) @@ -1457,6 +1759,9 @@ async def run_backtest( ) if sim is not None: sim_policies.append({"policy": policy, **sim}) + strategy_variant_rows = _strategy_variant_sims( + candidates, price_columns, spy_closes, hold_horizon + ) except Exception: logger.exception("Portfolio simulation failed") @@ -1513,6 +1818,15 @@ async def run_backtest( "same window. In-sample; no dividends." ), }, + "strategy_variants": { + "variants": strategy_variant_rows, + "note": ( + "Research-only hold-to-horizon portfolio variants. These compare " + "raw vs residual momentum ranking, cutoff 80 vs 90, max 10 vs 15 " + "positions, and SPY-200 risk scaling. They do not change live " + "qualification or paper-trade behavior." + ), + }, "signal_eval": _signal_evaluation(collected), "signal_eval_note": ( "Cross-sectional rank-IC of price-only signals vs the forward " @@ -1533,6 +1847,7 @@ async def run_backtest( ), } report["recommendation"] = _build_recommendation(report) + report["research_recommendation"] = _build_research_recommendation(report) return report diff --git a/app/services/rr_scanner_service.py b/app/services/rr_scanner_service.py index d4f0103..c8a0e5f 100644 --- a/app/services/rr_scanner_service.py +++ b/app/services/rr_scanner_service.py @@ -11,15 +11,17 @@ from __future__ import annotations import json import logging from collections.abc import Callable -from datetime import datetime, timezone +from datetime import date, datetime, timezone from sqlalchemy import and_, func, select from sqlalchemy.ext.asyncio import AsyncSession from app.exceptions import NotFoundError +from app.models.fundamental import FundamentalData from app.models.ohlcv import OHLCVRecord from app.models.score import CompositeScore, DimensionScore from app.models.sentiment import SentimentScore +from app.models.signal_context_snapshot import SignalContextSnapshot from app.models.sr_level import SRLevel from app.models.ticker import Ticker from app.models.trade_setup import TradeSetup @@ -29,6 +31,8 @@ from app.services.recommendation_service import enhance_trade_setup logger = logging.getLogger(__name__) +STRATEGY_VERSION = "momentum_12_1_rr_time_v1" + async def _get_ticker(db: AsyncSession, symbol: str) -> Ticker: normalised = symbol.strip().upper() @@ -76,6 +80,136 @@ async def _get_latest_sentiment(db: AsyncSession, ticker_id: int) -> str | None: return row.classification if row else None +def _json_default(value): + if isinstance(value, (datetime, date)): + return value.isoformat() + return str(value) + + +async def _create_signal_context_snapshots( + db: AsyncSession, + setups: list[TradeSetup], + *, + strategy_version: str = STRATEGY_VERSION, +) -> None: + """Capture point-in-time discretionary context for freshly generated setups. + + The scanner stores the setup itself first so each snapshot can be keyed by + ``trade_setup_id``. This is intentionally forward-only: old sentiment, + fundamentals and composite scores are not reconstructed from today's data. + """ + if not setups: + return + + ticker_ids = {s.ticker_id for s in setups} + + dims: dict[int, dict[str, dict]] = {} + dim_rows = ( + await db.execute(select(DimensionScore).where(DimensionScore.ticker_id.in_(ticker_ids))) + ).scalars().all() + for row in dim_rows: + dims.setdefault(row.ticker_id, {})[row.dimension] = { + "score": float(row.score), + "is_stale": bool(row.is_stale), + "computed_at": row.computed_at, + } + + composites: dict[int, CompositeScore] = {} + comp_rows = ( + await db.execute( + select(CompositeScore) + .where(CompositeScore.ticker_id.in_(ticker_ids)) + .order_by(CompositeScore.ticker_id, CompositeScore.computed_at.desc()) + ) + ).scalars().all() + for row in comp_rows: + composites.setdefault(row.ticker_id, row) + + sentiments: dict[int, SentimentScore] = {} + sent_rows = ( + await db.execute( + select(SentimentScore) + .where(SentimentScore.ticker_id.in_(ticker_ids)) + .order_by(SentimentScore.ticker_id, SentimentScore.timestamp.desc()) + ) + ).scalars().all() + for row in sent_rows: + sentiments.setdefault(row.ticker_id, row) + + fundamentals: dict[int, FundamentalData] = {} + fund_rows = ( + await db.execute( + select(FundamentalData) + .where(FundamentalData.ticker_id.in_(ticker_ids)) + .order_by(FundamentalData.ticker_id, FundamentalData.fetched_at.desc()) + ) + ).scalars().all() + for row in fund_rows: + fundamentals.setdefault(row.ticker_id, row) + + now = datetime.now(timezone.utc) + for setup in setups: + comp = composites.get(setup.ticker_id) + sent = sentiments.get(setup.ticker_id) + fund = fundamentals.get(setup.ticker_id) + score_context = { + "composite_score": float(comp.score) if comp else float(setup.composite_score), + "composite_is_stale": bool(comp.is_stale) if comp else None, + "composite_computed_at": comp.computed_at if comp else None, + "dimensions": dims.get(setup.ticker_id, {}), + } + sentiment_context = ( + { + "classification": sent.classification, + "confidence": int(sent.confidence), + "recommendation": sent.recommendation, + "timestamp": sent.timestamp, + "source": sent.source, + } + if sent + else {} + ) + fundamental_context = ( + { + "pe_ratio": fund.pe_ratio, + "revenue_growth": fund.revenue_growth, + "earnings_surprise": fund.earnings_surprise, + "market_cap": fund.market_cap, + "next_earnings_date": fund.next_earnings_date, + "fetched_at": fund.fetched_at, + } + if fund + else {} + ) + db.add( + SignalContextSnapshot( + trade_setup_id=setup.id, + ticker_id=setup.ticker_id, + detected_at=setup.detected_at, + created_at=now, + strategy_version=strategy_version, + direction=setup.direction, + entry_price=float(setup.entry_price), + stop_loss=float(setup.stop_loss), + target=float(setup.target), + rr_ratio=float(setup.rr_ratio), + confidence_score=( + float(setup.confidence_score) if setup.confidence_score is not None else None + ), + recommended_action=setup.recommended_action, + risk_level=setup.risk_level, + momentum_percentile=( + float(setup.momentum_percentile) + if setup.momentum_percentile is not None + else None + ), + score_context_json=json.dumps(score_context, default=_json_default), + sentiment_context_json=json.dumps(sentiment_context, default=_json_default), + fundamental_context_json=json.dumps(fundamental_context, default=_json_default), + ) + ) + + async def scan_ticker( db: AsyncSession, symbol: str, @@ -238,6 +372,9 @@ async def scan_ticker( for s in enhanced_setups: await db.refresh(s) + await _create_signal_context_snapshots(db, enhanced_setups) + await db.commit() + return enhanced_setups diff --git a/frontend/src/components/signals/BacktestPanel.tsx b/frontend/src/components/signals/BacktestPanel.tsx index df029a7..465c7cf 100644 --- a/frontend/src/components/signals/BacktestPanel.tsx +++ b/frontend/src/components/signals/BacktestPanel.tsx @@ -6,7 +6,7 @@ import { Callout } from '../ui/Callout'; import { Disclosure } from '../ui/Disclosure'; import { Section } from '../ui/Section'; import { useToast } from '../ui/Toast'; -import type { BacktestBucket, BacktestPortfolioPolicy } from '../../lib/types'; +import type { BacktestBucket, BacktestPortfolioPolicy, BacktestStrategyVariant } from '../../lib/types'; function fmtR(v: number | null | undefined): string { if (v === null || v === undefined) return '—'; @@ -206,6 +206,25 @@ export function BacktestPanel() { )} + {report.research_recommendation && report.research_recommendation.items.length > 0 && ( +
+

Research candidates

+
    + {report.research_recommendation.items.map((item) => ( +
  • + {item.text} +
  • + ))} +
+ {report.research_recommendation.note && ( +

{report.research_recommendation.note}

+ )} +
+ )} +
)} + {report.strategy_variants && report.strategy_variants.variants.length > 0 && ( +
+

+ Strategy variants +

+

+ {report.strategy_variants.note ?? 'Research-only portfolio variants.'}{' '} + + Residual momentum stays research-only until a variant beats production under the promotion rules. + +

+
+ + + + + + + + + + + + + + + + + + {report.strategy_variants.variants.map((row: BacktestStrategyVariant) => ( + + + + + + + + + + + + + + ))} + +
VariantRankCutoffMax PosRiskCAGRMax DDSharpeTotal RetTradesSkipped
{row.label}{row.ranking}{row.cutoff.toFixed(0)}{row.max_positions} + {row.risk_scale === 'spy_200' ? '0.5-1.0%' : `${row.risk_per_trade_pct.toFixed(1)}%`} + {fmtSignedPct(row.cagr_pct)}−{row.max_drawdown_pct.toFixed(1)}% + {row.sharpe === null ? '—' : row.sharpe.toFixed(2)} + {fmtSignedPct(row.total_return_pct)}{row.trades}{row.skipped_book_full}
+
+
+ )} + {report.signal_eval && report.signal_eval.length > 0 && (

diff --git a/frontend/src/lib/types.ts b/frontend/src/lib/types.ts index b3170c3..542260b 100644 --- a/frontend/src/lib/types.ts +++ b/frontend/src/lib/types.ts @@ -294,6 +294,11 @@ export interface BacktestRecommendation { note?: string; } +export interface BacktestResearchRecommendation { + items: { topic: string; text: string; candidate?: boolean }[]; + note?: string; +} + export interface BacktestPortfolioSim { params: { starting_capital: number; @@ -307,6 +312,21 @@ export interface BacktestPortfolioSim { note?: string; } +export interface BacktestStrategyVariant extends BacktestPortfolioPolicy { + variant: string; + label: string; + ranking: 'raw' | 'residual' | string; + cutoff: number; + max_positions: number; + risk_per_trade_pct: number; + risk_scale: string | null; +} + +export interface BacktestStrategyVariants { + variants: BacktestStrategyVariant[]; + note?: string; +} + export interface BacktestGateAblationRow extends BacktestBucket { variant: string; // The same variant graded under the hold-to-horizon time exit. @@ -347,7 +367,9 @@ export interface BacktestReport { gate_ablation_note?: string; time_exit_sweep?: BacktestTimeExitRow[]; portfolio_sim?: BacktestPortfolioSim; + strategy_variants?: BacktestStrategyVariants; recommendation?: BacktestRecommendation; + research_recommendation?: BacktestResearchRecommendation; signal_eval?: BacktestSignalEvalRow[]; signal_eval_note?: string; note: string; diff --git a/tests/unit/test_backtest_service.py b/tests/unit/test_backtest_service.py index cdb2712..3c48a8d 100644 --- a/tests/unit/test_backtest_service.py +++ b/tests/unit/test_backtest_service.py @@ -100,6 +100,101 @@ def test_residual_momentum_removes_market_beta_but_keeps_specific_drift(): assert drift["mom_12_1_resid"] > pure["mom_12_1_resid"] + 0.12 +def test_assigns_raw_and_residual_percentiles_independently(): + cands = [ + {"iso_week": (2026, 1), "momentum": 0.10, "residual_momentum": 0.30}, + {"iso_week": (2026, 1), "momentum": 0.30, "residual_momentum": 0.10}, + {"iso_week": (2026, 1), "momentum": 0.20, "residual_momentum": 0.20}, + ] + + bt._assign_momentum_percentiles(cands) + bt._assign_residual_momentum_percentiles(cands) + + by_raw = {c["momentum"]: c["momentum_percentile"] for c in cands} + by_resid = {c["residual_momentum"]: c["residual_momentum_percentile"] for c in cands} + assert by_raw[0.30] == 100.0 + assert by_raw[0.10] == 0.0 + assert by_resid[0.30] == 100.0 + assert by_resid[0.10] == 0.0 + + +def test_spy_200_risk_scale_halves_risk_below_sma(): + base = date(2025, 1, 1) + closes = {base + timedelta(days=i): 100.0 for i in range(210)} + closes[base + timedelta(days=210)] = 80.0 + + scale = bt._spy_200_risk_scale(closes) + + assert scale[(base + timedelta(days=199)).toordinal()] == 1.0 + assert scale[(base + timedelta(days=210)).toordinal()] == 0.5 + + +def test_strategy_variant_sims_emit_fixed_variants_without_mutating_qualified(monkeypatch): + cands = [{ + "qualified": False, + "meets_core": True, + "direction": "long", + "momentum_percentile": 90.0, + "residual_momentum_percentile": 91.0, + }] + calls = [] + + def fake_sim(candidates, prices, spy_closes, exit_policy, hold_days, **kwargs): + calls.append({"exit_policy": exit_policy, "hold_days": hold_days, **kwargs}) + return { + "starting_capital": bt.SIM_STARTING_CAPITAL, + "final_equity": 11_000.0, + "total_return_pct": 10.0, + "cagr_pct": 9.0, + "max_drawdown_pct": 5.0, + "sharpe": 1.1, + "trades": 1, + "win_rate": 100.0, + "avg_trade_pnl": 100.0, + "best_trade_r": 1.0, + "worst_trade_r": 1.0, + "best_trade_pnl": 100.0, + "worst_trade_pnl": 100.0, + "avg_hold_days": 30.0, + "skipped_book_full": 0, + "spy_return_pct": 1.0, + "yearly_returns": [], + "start_date": "2026-01-01", + "end_date": "2026-02-01", + } + + monkeypatch.setattr(bt, "_simulate_portfolio", fake_sim) + rows = bt._strategy_variant_sims(cands, {}, {}, 30) + + assert [r["variant"] for r in rows] == [cfg["variant"] for cfg in bt.STRATEGY_VARIANTS] + assert all(call["exit_policy"] == "hold" for call in calls) + assert any(call["ranking_key"] == "residual_momentum_percentile" for call in calls) + assert any(call["max_positions"] == 15 for call in calls) + assert cands[0]["qualified"] is False + + +def test_build_research_recommendation_applies_promotion_rules(): + report = { + "strategy_variants": {"variants": [ + {"variant": "production_raw_80_fixed10", "label": "Base", "sharpe": 1.20, + "max_drawdown_pct": 20.0, "cagr_pct": 30.0}, + {"variant": "residual_80_fixed10", "label": "Residual", "sharpe": 1.35, + "max_drawdown_pct": 21.0, "cagr_pct": 31.0, "risk_scale": None}, + {"variant": "raw_80_regime_scaled", "label": "Scaled", "sharpe": 1.1, + "max_drawdown_pct": 15.0, "cagr_pct": 27.0}, + {"variant": "raw_90_fixed10", "label": "Cutoff 90", "sharpe": 1.25, + "max_drawdown_pct": 19.0, "cagr_pct": 28.0}, + ]}, + } + + rec = bt._build_research_recommendation(report) + by_topic = {item["topic"]: item for item in rec["items"]} + + assert by_topic["residual_momentum"]["candidate"] is True + assert by_topic["regime_scaling"]["candidate"] is True + assert by_topic["cutoff_90"]["candidate"] is True + + class TestStopFillR: def test_intraday_fill_at_stop(self): assert bt._stop_fill_r("long", 100.0, 95.0, _bar(101, 94, 96)) == pytest.approx(-1.0) @@ -491,7 +586,8 @@ async def test_run_backtest_smoke(session): assert isinstance(report["candidates"], int) for key in ( "overall_qualified", "overall_all", "by_direction", "sweep", - "gate_ablation", "time_exit_sweep", "portfolio_sim", "recommendation", + "gate_ablation", "time_exit_sweep", "portfolio_sim", "strategy_variants", + "recommendation", "research_recommendation", ): assert key in report # the oscillating series should yield at least some resolved setups @@ -516,6 +612,7 @@ async def test_run_backtest_smoke(session): assert "portfolio_sim" in report assert isinstance(report["portfolio_sim"]["policies"], list) assert report["portfolio_sim"]["params"]["max_positions"] == bt.SIM_MAX_POSITIONS + assert isinstance(report["strategy_variants"]["variants"], list) # sweep: lowering the momentum-percentile cutoff can only add qualifiers sweep = sorted(report["sweep"], key=lambda r: r["min_momentum_percentile"], reverse=True) diff --git a/tests/unit/test_signal_context_snapshot.py b/tests/unit/test_signal_context_snapshot.py new file mode 100644 index 0000000..e98d565 --- /dev/null +++ b/tests/unit/test_signal_context_snapshot.py @@ -0,0 +1,110 @@ +"""Tests for point-in-time signal context snapshots.""" + +from __future__ import annotations + +import json +from datetime import date, datetime, timezone + +import pytest + +from app.models.fundamental import FundamentalData +from app.models.score import CompositeScore, DimensionScore +from app.models.sentiment import SentimentScore +from app.models.signal_context_snapshot import SignalContextSnapshot +from app.models.ticker import Ticker +from app.models.trade_setup import TradeSetup +from app.services import rr_scanner_service as rr +from tests.conftest import _test_session_factory # type: ignore + + +@pytest.fixture +async def session(): + async with _test_session_factory() as s: + yield s + + +async def test_create_signal_context_snapshot_captures_latest_context(session): + now = datetime(2026, 7, 2, 12, tzinfo=timezone.utc) + ticker = Ticker(symbol="CTX") + session.add(ticker) + await session.flush() + + session.add_all([ + DimensionScore( + ticker_id=ticker.id, + dimension="technical", + score=71.0, + is_stale=False, + computed_at=now, + ), + DimensionScore( + ticker_id=ticker.id, + dimension="momentum", + score=82.0, + is_stale=False, + computed_at=now, + ), + CompositeScore( + ticker_id=ticker.id, + score=76.5, + is_stale=False, + weights_json='{"technical": 0.25}', + computed_at=now, + ), + SentimentScore( + ticker_id=ticker.id, + classification="BULLISH", + confidence=78, + source="test", + timestamp=now, + reasoning="", + citations_json="[]", + recommendation="BUY", + ), + FundamentalData( + ticker_id=ticker.id, + pe_ratio=25.0, + revenue_growth=0.18, + earnings_surprise=0.05, + market_cap=1_000_000_000.0, + next_earnings_date=date(2026, 8, 1), + fetched_at=now, + unavailable_fields_json="{}", + ), + ]) + setup = TradeSetup( + ticker_id=ticker.id, + direction="long", + entry_price=100.0, + stop_loss=95.0, + target=120.0, + rr_ratio=4.0, + composite_score=76.5, + detected_at=now, + confidence_score=64.0, + momentum_percentile=88.0, + recommended_action="LONG_HIGH", + risk_level="Low", + ) + session.add(setup) + await session.flush() + + await rr._create_signal_context_snapshots(session, [setup]) + await session.commit() + + row = (await session.get(SignalContextSnapshot, 1)) + assert row is not None + assert row.trade_setup_id == setup.id + assert row.strategy_version == rr.STRATEGY_VERSION + assert row.momentum_percentile == 88.0 + + score = json.loads(row.score_context_json) + sentiment = json.loads(row.sentiment_context_json) + fundamental = json.loads(row.fundamental_context_json) + + assert score["composite_score"] == 76.5 + assert score["dimensions"]["technical"]["score"] == 71.0 + assert sentiment["classification"] == "BULLISH" + assert sentiment["confidence"] == 78 + assert fundamental["pe_ratio"] == 25.0 + assert fundamental["next_earnings_date"] == "2026-08-01"