From 2f21c685e8bed3e538d02992024dbca3bbd38c99 Mon Sep 17 00:00:00 2001
From: Dennis Thiessen <dennis@thiessen.io>
Date: Sat, 27 Jun 2026 15:59:58 +0200
Subject: [PATCH] feat: always-fresh sentiment for top picks, watchlist & open
 trades
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tiered, uncapped sentiment scope so the names that matter are never shown
without sentiment.

- Priority (always fully refreshed): top-pick feeders — momentum leaders with a
  tradeable long setup over the R:R floor (the tickers that are, or could become
  with positive sentiment, the dashboard top pick) — plus the curated watchlist
  and open paper trades.
- Filler: top-N by composite, a discovery net, fetched after the priority set so
  a mid-run rate limit lands the important names first.
- Removed the per-run cap (sentiment_max_per_run): the relevant set is naturally
  bounded (watchlist <= 20, composite <= top_composite), so a full refresh stays
  inside the free tier. extra="ignore" keeps a stale env var from breaking startup.
- Refresh window 72h -> 120h (5 days): sentiment shifts slowly, score window is 7d.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 app/config.py                         |  14 +-
 app/scheduler.py                      | 165 +++++++++++------
 tests/unit/test_sentiment_priority.py | 246 +++++++++++++++++++++-----
 3 files changed, 329 insertions(+), 96 deletions(-)

diff --git a/app/config.py b/app/config.py
index 0266bef..efae61a 100644
--- a/app/config.py
+++ b/app/config.py
@@ -2,7 +2,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
 
 
 class Settings(BaseSettings):
-    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
 
     # Database
     database_url: str = "postgresql+asyncpg://stock_backend:changeme@localhost:5432/stock_data_backend"
@@ -49,10 +49,14 @@ class Settings(BaseSettings):
     data_collector_frequency: str = "daily"
     sentiment_poll_interval_minutes: int = 30
     # Sentiment search-budget controls (Gemini grounding free tier = 5000/month).
-    # Only fetch sentiment for relevant tickers (watchlist + open trades + top-N by
-    # composite), skip ones refreshed within fresh_hours, and cap per run.
-    sentiment_fresh_hours: int = 72
-    sentiment_max_per_run: int = 25
+    # Scope (see _get_sentiment_priority_tickers): everything that matters is always
+    # refreshed in full — open paper trades + the curated watchlist + top-pick
+    # feeders (momentum leaders with a tradeable long setup) — plus a top-N composite
+    # discovery net. No per-run cap: the set is naturally bounded (watchlist <= 20,
+    # composite <= top_composite), so a full refresh stays well inside the free tier.
+    # Skip anything refreshed within fresh_hours (5 days: sentiment shifts slowly and
+    # the score window is 7 days).
+    sentiment_fresh_hours: int = 120
     sentiment_top_composite: int = 30
     fundamental_fetch_frequency: str = "weekly"  # quarterly-ish data; weekly conserves API quota
     rr_scan_frequency: str = "daily"
diff --git a/app/scheduler.py b/app/scheduler.py
index 38b310a..360e8b8 100644
--- a/app/scheduler.py
+++ b/app/scheduler.py
@@ -20,7 +20,7 @@ from datetime import date, datetime, timedelta, timezone
 
 from apscheduler.schedulers.asyncio import AsyncIOScheduler
 from apscheduler.triggers.cron import CronTrigger
-from sqlalchemy import case, func, or_, select
+from sqlalchemy import and_, case, func, or_, select
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.config import settings
@@ -218,78 +218,141 @@ async def _get_ohlcv_priority_tickers(db: AsyncSession) -> list[str]:
     return list(result.scalars().all())
 
 
-async def _get_sentiment_priority_tickers(db: AsyncSession) -> list[str]:
-    """Symbols to fetch sentiment for, budgeted to stay in the free search tier.
+async def _get_top_pick_feeder_ids(db: AsyncSession) -> set[int]:
+    """Ticker ids whose latest LONG setup makes them a top-pick feeder.
 
-    Scope: only tickers that matter — watchlist + open paper trades + top-N by
-    composite score + the momentum leaders the activation gate qualifies on. Skip
-    any refreshed within ``sentiment_fresh_hours``. Cap the run at
-    ``sentiment_max_per_run``, oldest/missing first. Once the relevant set is
-    fresh, runs make zero grounded searches until it ages out.
+    A dashboard 'top pick' is the highest-momentum *qualified* setup. Sentiment
+    can never move a ticker's momentum percentile (the gate's core axis) — only
+    its confidence and EV ranking. So the only tickers that are, or could become
+    with positive sentiment, a top pick are momentum leaders that already have a
+    tradeable long setup clearing the R:R floor. That set is exactly:
+
+        latest long setup with momentum_percentile >= gate AND rr_ratio >= floor.
+
+    It contains both the currently-qualified setups and the near-miss ones held
+    back only by a neutral/missing sentiment — the cases the user saw surface as
+    top picks with no sentiment. Only meaningful with the momentum gate on
+    (min_momentum_percentile > 0); off, there is no leader axis to anchor on and we
+    defer to the filler set. Best-effort: a config failure must not stop collection.
+    """
+    from app.models.trade_setup import TradeSetup
+
+    try:
+        from app.services.admin_service import get_activation_config
+
+        activation = await get_activation_config(db)
+        min_pct = float(activation.get("min_momentum_percentile", 0.0))
+        min_rr = float(activation.get("min_rr", 0.0))
+    except Exception:
+        logger.exception("Sentiment top-pick scoping failed; using filler set only")
+        return set()
+
+    if min_pct <= 0:
+        return set()
+
+    # Latest long setup per ticker, then keep those clearing the gate's momentum
+    # percentile and R:R floor. (Sentiment runs before the day's scan, so this
+    # reads the previous scan's setups — momentum is a slow, cross-sectional signal,
+    # so yesterday's leaders are the right anchor.)
+    latest_long = (
+        select(TradeSetup.ticker_id, func.max(TradeSetup.detected_at).label("md"))
+        .where(TradeSetup.direction == "long")
+        .group_by(TradeSetup.ticker_id)
+        .subquery()
+    )
+    rows = await db.execute(
+        select(TradeSetup.ticker_id)
+        .join(
+            latest_long,
+            and_(
+                TradeSetup.ticker_id == latest_long.c.ticker_id,
+                TradeSetup.detected_at == latest_long.c.md,
+            ),
+        )
+        .where(
+            TradeSetup.direction == "long",
+            TradeSetup.rr_ratio >= min_rr,
+            TradeSetup.momentum_percentile.is_not(None),
+            TradeSetup.momentum_percentile >= min_pct,
+        )
+    )
+    return {r[0] for r in rows.all()}
+
+
+async def _stale_sentiment_symbols(
+    db: AsyncSession, ticker_ids: set[int], cutoff: datetime
+) -> list[str]:
+    """Symbols among ``ticker_ids`` whose newest sentiment is missing or older than
+    ``cutoff``, ordered missing-first → oldest → alphabetical."""
+    if not ticker_ids:
+        return []
+    latest_ts = func.max(SentimentScore.timestamp)
+    missing_first = case((latest_ts.is_(None), 0), else_=1)
+    stmt = (
+        select(Ticker.symbol)
+        .outerjoin(SentimentScore, SentimentScore.ticker_id == Ticker.id)
+        .where(Ticker.id.in_(ticker_ids))
+        .group_by(Ticker.id, Ticker.symbol)
+        .having(or_(latest_ts.is_(None), latest_ts < cutoff))
+        .order_by(missing_first.asc(), latest_ts.asc(), Ticker.symbol.asc())
+    )
+    result = await db.execute(stmt)
+    return list(result.scalars().all())
+
+
+async def _get_sentiment_priority_tickers(db: AsyncSession) -> list[str]:
+    """Symbols to fetch sentiment for, skipping anything refreshed within
+    ``sentiment_fresh_hours``.
+
+    No per-run cap: the relevant set is naturally bounded (curated watchlist <= 20,
+    a handful of open trades and top-pick feeders, top-N composite), so refreshing
+    all of it stays well inside the free search tier — and everything that matters
+    is always fully covered. The two tiers only affect ORDER, so a mid-run provider
+    rate limit still lands the names we care about first:
+
+      Priority: top-pick feeders (momentum leaders with a tradeable long setup, see
+        ``_get_top_pick_feeder_ids``) + the curated watchlist + open paper trades —
+        the set we never want shown without sentiment.
+      Filler: top-N by composite — a cheap discovery net for names not yet covered.
+
+    Once the set is fresh, runs make zero grounded searches until it ages out.
     """
     from app.models.paper_trade import PaperTrade
     from app.models.score import CompositeScore
     from app.models.watchlist import WatchlistEntry
 
-    relevant: set[int] = set()
+    cutoff = datetime.now(timezone.utc) - timedelta(hours=settings.sentiment_fresh_hours)
+
+    # Priority: the set we always want fresh — top-pick feeders, the curated
+    # watchlist, and open positions.
+    priority_ids = await _get_top_pick_feeder_ids(db)
     wl = await db.execute(
         select(WatchlistEntry.ticker_id)
         .where(WatchlistEntry.entry_type != "dismissed")
         .distinct()
     )
-    relevant.update(r[0] for r in wl.all())
+    priority_ids.update(r[0] for r in wl.all())
     pt = await db.execute(
         select(PaperTrade.ticker_id).where(PaperTrade.status == "open").distinct()
     )
-    relevant.update(r[0] for r in pt.all())
+    priority_ids.update(r[0] for r in pt.all())
+
+    # Filler: top-N by composite, a discovery net for names not already covered.
     top = await db.execute(
         select(CompositeScore.ticker_id)
         .order_by(CompositeScore.score.desc())
         .limit(settings.sentiment_top_composite)
     )
-    relevant.update(r[0] for r in top.all())
+    filler_ids = {r[0] for r in top.all()} - priority_ids
 
-    # Momentum leaders: the tickers that can clear the activation gate, which
-    # selects the top ``min_momentum_percentile`` slice by 12-1 momentum — a
-    # different axis than composite score. The gate qualifies setups on this
-    # percentile, so without including them a freshly-qualifying ticker carries no
-    # sentiment and gets enhanced as neutral. Pre-fetching their sentiment here (in
-    # the daily pipeline, sentiment runs right after the OHLCV refresh) means the
-    # following R:R scan reads real sentiment for the setups it qualifies.
-    # Best-effort: a momentum/config failure must not stop sentiment collection.
-    try:
-        from app.services import momentum_service
-        from app.services.admin_service import get_activation_config
-
-        activation = await get_activation_config(db)
-        min_pct = float(activation.get("min_momentum_percentile", 0.0))
-        if min_pct > 0:
-            percentiles = await momentum_service.compute_momentum_percentiles(db)
-            leaders = [sym for sym, pct in percentiles.items() if pct >= min_pct]
-            if leaders:
-                rows = await db.execute(
-                    select(Ticker.id).where(Ticker.symbol.in_(leaders))
-                )
-                relevant.update(r[0] for r in rows.all())
-    except Exception:
-        logger.exception("Sentiment momentum-leader scoping failed; using base relevant set")
-
-    if not relevant:
+    if not priority_ids and not filler_ids:
         return []
 
-    cutoff = datetime.now(timezone.utc) - timedelta(hours=settings.sentiment_fresh_hours)
-    latest_ts = func.max(SentimentScore.timestamp)
-    missing_first = case((latest_ts.is_(None), 0), else_=1)
-    result = await db.execute(
-        select(Ticker.symbol)
-        .outerjoin(SentimentScore, SentimentScore.ticker_id == Ticker.id)
-        .where(Ticker.id.in_(relevant))
-        .group_by(Ticker.id, Ticker.symbol)
-        .having(or_(latest_ts.is_(None), latest_ts < cutoff))
-        .order_by(missing_first.asc(), latest_ts.asc(), Ticker.symbol.asc())
-        .limit(settings.sentiment_max_per_run)
-    )
-    return list(result.scalars().all())
+    # No cap — fetch every stale name. Priority first so a rate limit mid-run still
+    # covers the curated/at-risk set before the discovery net.
+    priority_syms = await _stale_sentiment_symbols(db, priority_ids, cutoff)
+    filler_syms = await _stale_sentiment_symbols(db, filler_ids, cutoff)
+    return priority_syms + filler_syms
 
 
 async def _get_fundamental_priority_tickers(db: AsyncSession) -> list[str]:
diff --git a/tests/unit/test_sentiment_priority.py b/tests/unit/test_sentiment_priority.py
index 376e68d..0dbcf5b 100644
--- a/tests/unit/test_sentiment_priority.py
+++ b/tests/unit/test_sentiment_priority.py
@@ -1,21 +1,27 @@
 """Tests for sentiment-collection scoping (``_get_sentiment_priority_tickers``).
 
-The activation gate qualifies setups on 12-1 momentum percentile, a different
-axis than composite score. These tests pin the fix that adds the gate's momentum
-leaders to the sentiment relevant-set so a freshly-qualifying ticker isn't left
-without sentiment.
+A dashboard 'top pick' is the highest-momentum *qualified* long setup. Sentiment
+can never move a ticker's momentum percentile (the gate's core axis) — only its
+confidence and EV ranking. So the tickers that are, or could become with positive
+sentiment, a top pick are exactly the momentum leaders that already carry a
+tradeable long setup over the R:R floor. These tests pin that priority tier
+(always refreshed, cap-exempt) and the capped filler tier behind it.
 """
 
 from __future__ import annotations
 
-from datetime import date, datetime, timedelta, timezone
+from datetime import datetime, timedelta, timezone
 
 import pytest
 
 from app import scheduler
-from app.models.ohlcv import OHLCVRecord
+from app.models.paper_trade import PaperTrade
+from app.models.score import CompositeScore
+from app.models.sentiment import SentimentScore
 from app.models.settings import SystemSetting
 from app.models.ticker import Ticker
+from app.models.trade_setup import TradeSetup
+from app.models.watchlist import WatchlistEntry
 
 
 @pytest.fixture
@@ -26,56 +32,216 @@ async def session():
         yield s
 
 
-async def _seed_history(session, symbol: str, rate: float, n: int = 280) -> Ticker:
-    """Seed a ticker with a full year+ of daily closes growing at ``rate``."""
+async def _add_ticker(session, symbol: str) -> Ticker:
     t = Ticker(symbol=symbol)
     session.add(t)
     await session.flush()
-    base = date(2024, 1, 1)
-    for i in range(n):
-        close = 100.0 * (rate ** i)
-        session.add(OHLCVRecord(
-            ticker_id=t.id,
-            date=base + timedelta(days=i),
-            open=close, high=close, low=close, close=close,
-            volume=1_000_000,
-        ))
-    await session.commit()
     return t
 
 
-async def _set_min_momentum(session, value: str) -> None:
-    session.add(SystemSetting(
-        key="activation_min_momentum_percentile",
-        value=value,
-        updated_at=datetime.now(timezone.utc),
+async def _add_setup(
+    session,
+    ticker: Ticker,
+    *,
+    direction: str = "long",
+    momentum_percentile: float | None = 95.0,
+    rr_ratio: float = 2.0,
+    detected_at: datetime | None = None,
+) -> TradeSetup:
+    session.add(TradeSetup(
+        ticker_id=ticker.id,
+        direction=direction,
+        entry_price=100.0,
+        stop_loss=95.0,
+        target=110.0,
+        rr_ratio=rr_ratio,
+        composite_score=60.0,
+        momentum_percentile=momentum_percentile,
+        detected_at=detected_at or datetime.now(timezone.utc),
     ))
     await session.commit()
 
 
-async def test_momentum_leader_is_included_without_composite_or_watchlist(session):
-    """A top-percentile momentum ticker is fetched even when it has no composite
-    score, no watchlist entry, and no open trade — the case that previously left
-    qualifying setups with no sentiment."""
-    await _seed_history(session, "LEADER", rate=1.010)   # strong uptrend → pct 100
-    await _seed_history(session, "LAGGARD", rate=0.999)   # declining → pct 0
-    await _set_min_momentum(session, "80")
+async def _add_composite(session, ticker: Ticker, score: float) -> None:
+    session.add(CompositeScore(
+        ticker_id=ticker.id,
+        score=score,
+        is_stale=False,
+        weights_json="{}",
+        computed_at=datetime.now(timezone.utc),
+    ))
+    await session.commit()
+
+
+async def _add_watchlist(session, ticker: Ticker) -> None:
+    session.add(WatchlistEntry(
+        user_id=1,
+        ticker_id=ticker.id,
+        entry_type="manual",
+        added_at=datetime.now(timezone.utc),
+    ))
+    await session.commit()
+
+
+async def _add_open_trade(session, ticker: Ticker) -> None:
+    session.add(PaperTrade(
+        user_id=1,
+        ticker_id=ticker.id,
+        direction="long",
+        entry_price=100.0,
+        shares=10.0,
+        stop_loss=95.0,
+        target=110.0,
+        status="open",
+        opened_at=datetime.now(timezone.utc),
+    ))
+    await session.commit()
+
+
+async def _add_sentiment(session, ticker: Ticker, hours_ago: float) -> None:
+    session.add(SentimentScore(
+        ticker_id=ticker.id,
+        classification="bullish",
+        confidence=80,
+        source="test",
+        timestamp=datetime.now(timezone.utc) - timedelta(hours=hours_ago),
+    ))
+    await session.commit()
+
+
+async def _set_setting(session, key: str, value: str) -> None:
+    session.add(SystemSetting(key=key, value=value, updated_at=datetime.now(timezone.utc)))
+    await session.commit()
+
+
+async def test_top_pick_feeder_included_below_cutoff_excluded(session):
+    """A momentum leader with a tradeable long setup over the R:R floor is fetched;
+    one whose setup is below the gate's percentile is not."""
+    feeder = await _add_ticker(session, "FEEDER")
+    await _add_setup(session, feeder, momentum_percentile=95.0)
+    laggard = await _add_ticker(session, "LAGGARD")
+    await _add_setup(session, laggard, momentum_percentile=50.0)  # below the gate
+    await _set_setting(session, "activation_min_momentum_percentile", "80")
 
     symbols = await scheduler._get_sentiment_priority_tickers(session)
 
-    assert "LEADER" in symbols
-    # Below the gate's percentile and not otherwise relevant → not fetched.
+    assert "FEEDER" in symbols
     assert "LAGGARD" not in symbols
 
 
-async def test_momentum_leaders_skipped_when_gate_disabled(session):
-    """With the momentum gate off (min percentile 0), the leader is no longer
-    pulled in solely on momentum — scoping falls back to the base relevant set."""
-    await _seed_history(session, "LEADER", rate=1.010)
-    await _seed_history(session, "LAGGARD", rate=0.999)
-    await _set_min_momentum(session, "0")
+async def test_leader_without_a_setup_excluded(session):
+    """A ticker with no long setup can't be a top pick, so it's no longer pulled in
+    on momentum alone — the budget goes to actual top-pick feeders."""
+    await _add_ticker(session, "NOSETUP")
+    await _set_setting(session, "activation_min_momentum_percentile", "80")
 
     symbols = await scheduler._get_sentiment_priority_tickers(session)
 
-    assert "LEADER" not in symbols
-    assert "LAGGARD" not in symbols
+    assert "NOSETUP" not in symbols
+
+
+async def test_short_only_setup_excluded(session):
+    """The gate is long-only while active; a short setup can never be a top pick,
+    so positive sentiment can't promote it and it stays out of scope."""
+    t = await _add_ticker(session, "SHORTY")
+    await _add_setup(session, t, direction="short", momentum_percentile=95.0)
+    await _set_setting(session, "activation_min_momentum_percentile", "80")
+
+    symbols = await scheduler._get_sentiment_priority_tickers(session)
+
+    assert "SHORTY" not in symbols
+
+
+async def test_long_setup_below_rr_floor_excluded(session):
+    """A long leader whose setup doesn't clear the R:R floor isn't tradeable as a
+    top pick regardless of sentiment."""
+    t = await _add_ticker(session, "THINRR")
+    await _add_setup(session, t, momentum_percentile=95.0, rr_ratio=0.5)
+    await _set_setting(session, "activation_min_momentum_percentile", "80")
+    await _set_setting(session, "activation_min_rr", "1.2")
+
+    symbols = await scheduler._get_sentiment_priority_tickers(session)
+
+    assert "THINRR" not in symbols
+
+
+async def test_gate_disabled_no_priority_tier(session):
+    """With the momentum gate off there is no leader axis to anchor on, so a strong
+    long setup is not pulled in on its own — scope falls back to the filler set."""
+    t = await _add_ticker(session, "FEEDER")
+    await _add_setup(session, t, momentum_percentile=95.0)
+    await _set_setting(session, "activation_min_momentum_percentile", "0")
+
+    symbols = await scheduler._get_sentiment_priority_tickers(session)
+
+    assert "FEEDER" not in symbols
+
+
+async def test_fresh_feeder_skipped_stale_refetched(session):
+    """A feeder refreshed within the fresh window is skipped; one past it is
+    re-fetched."""
+    fresh = await _add_ticker(session, "FRESH")
+    await _add_setup(session, fresh, momentum_percentile=95.0)
+    await _add_sentiment(session, fresh, hours_ago=1.0)
+    stale = await _add_ticker(session, "STALE")
+    await _add_setup(session, stale, momentum_percentile=95.0)
+    await _add_sentiment(session, stale, hours_ago=settings_fresh_hours() + 50)
+    await _set_setting(session, "activation_min_momentum_percentile", "80")
+
+    symbols = await scheduler._get_sentiment_priority_tickers(session)
+
+    assert "FRESH" not in symbols
+    assert "STALE" in symbols
+
+
+async def test_watchlist_and_open_trades_always_included(session):
+    """The curated watchlist and open paper trades are always in scope — they're
+    the set we never want shown without sentiment, independent of any top pick."""
+    await _set_setting(session, "activation_min_momentum_percentile", "80")
+    wl = await _add_ticker(session, "WATCHED")
+    await _add_watchlist(session, wl)
+    held = await _add_ticker(session, "HELD")
+    await _add_open_trade(session, held)
+
+    symbols = await scheduler._get_sentiment_priority_tickers(session)
+
+    assert "WATCHED" in symbols
+    assert "HELD" in symbols
+
+
+async def test_dismissed_watchlist_entry_excluded(session):
+    """A dismissed watchlist entry is not refreshed."""
+    await _set_setting(session, "activation_min_momentum_percentile", "80")
+    t = await _add_ticker(session, "DISMISSED")
+    session.add(WatchlistEntry(
+        user_id=1,
+        ticker_id=t.id,
+        entry_type="dismissed",
+        added_at=datetime.now(timezone.utc),
+    ))
+    await session.commit()
+
+    symbols = await scheduler._get_sentiment_priority_tickers(session)
+
+    assert "DISMISSED" not in symbols
+
+
+async def test_no_per_run_cap_everything_stale_is_fetched(session, monkeypatch):
+    """No truncation: every stale name in the relevant set is returned, however
+    many there are (the cap was removed)."""
+    await _set_setting(session, "activation_min_momentum_percentile", "80")
+    feeders = [f"F{i:02d}" for i in range(30)]  # well past the old cap of 25
+    for sym in feeders:
+        t = await _add_ticker(session, sym)
+        await _add_setup(session, t, momentum_percentile=95.0)
+    filler = await _add_ticker(session, "FILL")
+    await _add_composite(session, filler, score=99.0)
+
+    symbols = await scheduler._get_sentiment_priority_tickers(session)
+
+    assert set(feeders).issubset(set(symbols))  # all feeders, no truncation
+    assert "FILL" in symbols  # filler fetched too — nothing crowded out
+
+
+def settings_fresh_hours() -> float:
+    return float(scheduler.settings.sentiment_fresh_hours)