From 2f21c685e8bed3e538d02992024dbca3bbd38c99 Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Sat, 27 Jun 2026 15:59:58 +0200 Subject: [PATCH] feat: always-fresh sentiment for top picks, watchlist & open trades MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tiered, uncapped sentiment scope so the names that matter are never shown without sentiment. - Priority (always fully refreshed): top-pick feeders — momentum leaders with a tradeable long setup over the R:R floor (the tickers that are, or could become with positive sentiment, the dashboard top pick) — plus the curated watchlist and open paper trades. - Filler: top-N by composite, a discovery net, fetched after the priority set so a mid-run rate limit lands the important names first. - Removed the per-run cap (sentiment_max_per_run): the relevant set is naturally bounded (watchlist <= 20, composite <= top_composite), so a full refresh stays inside the free tier. extra="ignore" keeps a stale env var from breaking startup. - Refresh window 72h -> 120h (5 days): sentiment shifts slowly, score window is 7d. Co-Authored-By: Claude Opus 4.8 --- app/config.py | 14 +- app/scheduler.py | 165 +++++++++++------ tests/unit/test_sentiment_priority.py | 246 +++++++++++++++++++++----- 3 files changed, 329 insertions(+), 96 deletions(-) diff --git a/app/config.py b/app/config.py index 0266bef..efae61a 100644 --- a/app/config.py +++ b/app/config.py @@ -2,7 +2,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): - model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8") + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") # Database database_url: str = "postgresql+asyncpg://stock_backend:changeme@localhost:5432/stock_data_backend" @@ -49,10 +49,14 @@ class Settings(BaseSettings): data_collector_frequency: str = "daily" sentiment_poll_interval_minutes: int = 30 # Sentiment search-budget controls (Gemini grounding free tier = 5000/month). - # Only fetch sentiment for relevant tickers (watchlist + open trades + top-N by - # composite), skip ones refreshed within fresh_hours, and cap per run. - sentiment_fresh_hours: int = 72 - sentiment_max_per_run: int = 25 + # Scope (see _get_sentiment_priority_tickers): everything that matters is always + # refreshed in full — open paper trades + the curated watchlist + top-pick + # feeders (momentum leaders with a tradeable long setup) — plus a top-N composite + # discovery net. No per-run cap: the set is naturally bounded (watchlist <= 20, + # composite <= top_composite), so a full refresh stays well inside the free tier. + # Skip anything refreshed within fresh_hours (5 days: sentiment shifts slowly and + # the score window is 7 days). + sentiment_fresh_hours: int = 120 sentiment_top_composite: int = 30 fundamental_fetch_frequency: str = "weekly" # quarterly-ish data; weekly conserves API quota rr_scan_frequency: str = "daily" diff --git a/app/scheduler.py b/app/scheduler.py index 38b310a..360e8b8 100644 --- a/app/scheduler.py +++ b/app/scheduler.py @@ -20,7 +20,7 @@ from datetime import date, datetime, timedelta, timezone from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.cron import CronTrigger -from sqlalchemy import case, func, or_, select +from sqlalchemy import and_, case, func, or_, select from sqlalchemy.ext.asyncio import AsyncSession from app.config import settings @@ -218,78 +218,141 @@ async def _get_ohlcv_priority_tickers(db: AsyncSession) -> list[str]: return list(result.scalars().all()) -async def _get_sentiment_priority_tickers(db: AsyncSession) -> list[str]: - """Symbols to fetch sentiment for, budgeted to stay in the free search tier. +async def _get_top_pick_feeder_ids(db: AsyncSession) -> set[int]: + """Ticker ids whose latest LONG setup makes them a top-pick feeder. - Scope: only tickers that matter — watchlist + open paper trades + top-N by - composite score + the momentum leaders the activation gate qualifies on. Skip - any refreshed within ``sentiment_fresh_hours``. Cap the run at - ``sentiment_max_per_run``, oldest/missing first. Once the relevant set is - fresh, runs make zero grounded searches until it ages out. + A dashboard 'top pick' is the highest-momentum *qualified* setup. Sentiment + can never move a ticker's momentum percentile (the gate's core axis) — only + its confidence and EV ranking. So the only tickers that are, or could become + with positive sentiment, a top pick are momentum leaders that already have a + tradeable long setup clearing the R:R floor. That set is exactly: + + latest long setup with momentum_percentile >= gate AND rr_ratio >= floor. + + It contains both the currently-qualified setups and the near-miss ones held + back only by a neutral/missing sentiment — the cases the user saw surface as + top picks with no sentiment. Only meaningful with the momentum gate on + (min_momentum_percentile > 0); off, there is no leader axis to anchor on and we + defer to the filler set. Best-effort: a config failure must not stop collection. + """ + from app.models.trade_setup import TradeSetup + + try: + from app.services.admin_service import get_activation_config + + activation = await get_activation_config(db) + min_pct = float(activation.get("min_momentum_percentile", 0.0)) + min_rr = float(activation.get("min_rr", 0.0)) + except Exception: + logger.exception("Sentiment top-pick scoping failed; using filler set only") + return set() + + if min_pct <= 0: + return set() + + # Latest long setup per ticker, then keep those clearing the gate's momentum + # percentile and R:R floor. (Sentiment runs before the day's scan, so this + # reads the previous scan's setups — momentum is a slow, cross-sectional signal, + # so yesterday's leaders are the right anchor.) + latest_long = ( + select(TradeSetup.ticker_id, func.max(TradeSetup.detected_at).label("md")) + .where(TradeSetup.direction == "long") + .group_by(TradeSetup.ticker_id) + .subquery() + ) + rows = await db.execute( + select(TradeSetup.ticker_id) + .join( + latest_long, + and_( + TradeSetup.ticker_id == latest_long.c.ticker_id, + TradeSetup.detected_at == latest_long.c.md, + ), + ) + .where( + TradeSetup.direction == "long", + TradeSetup.rr_ratio >= min_rr, + TradeSetup.momentum_percentile.is_not(None), + TradeSetup.momentum_percentile >= min_pct, + ) + ) + return {r[0] for r in rows.all()} + + +async def _stale_sentiment_symbols( + db: AsyncSession, ticker_ids: set[int], cutoff: datetime +) -> list[str]: + """Symbols among ``ticker_ids`` whose newest sentiment is missing or older than + ``cutoff``, ordered missing-first → oldest → alphabetical.""" + if not ticker_ids: + return [] + latest_ts = func.max(SentimentScore.timestamp) + missing_first = case((latest_ts.is_(None), 0), else_=1) + stmt = ( + select(Ticker.symbol) + .outerjoin(SentimentScore, SentimentScore.ticker_id == Ticker.id) + .where(Ticker.id.in_(ticker_ids)) + .group_by(Ticker.id, Ticker.symbol) + .having(or_(latest_ts.is_(None), latest_ts < cutoff)) + .order_by(missing_first.asc(), latest_ts.asc(), Ticker.symbol.asc()) + ) + result = await db.execute(stmt) + return list(result.scalars().all()) + + +async def _get_sentiment_priority_tickers(db: AsyncSession) -> list[str]: + """Symbols to fetch sentiment for, skipping anything refreshed within + ``sentiment_fresh_hours``. + + No per-run cap: the relevant set is naturally bounded (curated watchlist <= 20, + a handful of open trades and top-pick feeders, top-N composite), so refreshing + all of it stays well inside the free search tier — and everything that matters + is always fully covered. The two tiers only affect ORDER, so a mid-run provider + rate limit still lands the names we care about first: + + Priority: top-pick feeders (momentum leaders with a tradeable long setup, see + ``_get_top_pick_feeder_ids``) + the curated watchlist + open paper trades — + the set we never want shown without sentiment. + Filler: top-N by composite — a cheap discovery net for names not yet covered. + + Once the set is fresh, runs make zero grounded searches until it ages out. """ from app.models.paper_trade import PaperTrade from app.models.score import CompositeScore from app.models.watchlist import WatchlistEntry - relevant: set[int] = set() + cutoff = datetime.now(timezone.utc) - timedelta(hours=settings.sentiment_fresh_hours) + + # Priority: the set we always want fresh — top-pick feeders, the curated + # watchlist, and open positions. + priority_ids = await _get_top_pick_feeder_ids(db) wl = await db.execute( select(WatchlistEntry.ticker_id) .where(WatchlistEntry.entry_type != "dismissed") .distinct() ) - relevant.update(r[0] for r in wl.all()) + priority_ids.update(r[0] for r in wl.all()) pt = await db.execute( select(PaperTrade.ticker_id).where(PaperTrade.status == "open").distinct() ) - relevant.update(r[0] for r in pt.all()) + priority_ids.update(r[0] for r in pt.all()) + + # Filler: top-N by composite, a discovery net for names not already covered. top = await db.execute( select(CompositeScore.ticker_id) .order_by(CompositeScore.score.desc()) .limit(settings.sentiment_top_composite) ) - relevant.update(r[0] for r in top.all()) + filler_ids = {r[0] for r in top.all()} - priority_ids - # Momentum leaders: the tickers that can clear the activation gate, which - # selects the top ``min_momentum_percentile`` slice by 12-1 momentum — a - # different axis than composite score. The gate qualifies setups on this - # percentile, so without including them a freshly-qualifying ticker carries no - # sentiment and gets enhanced as neutral. Pre-fetching their sentiment here (in - # the daily pipeline, sentiment runs right after the OHLCV refresh) means the - # following R:R scan reads real sentiment for the setups it qualifies. - # Best-effort: a momentum/config failure must not stop sentiment collection. - try: - from app.services import momentum_service - from app.services.admin_service import get_activation_config - - activation = await get_activation_config(db) - min_pct = float(activation.get("min_momentum_percentile", 0.0)) - if min_pct > 0: - percentiles = await momentum_service.compute_momentum_percentiles(db) - leaders = [sym for sym, pct in percentiles.items() if pct >= min_pct] - if leaders: - rows = await db.execute( - select(Ticker.id).where(Ticker.symbol.in_(leaders)) - ) - relevant.update(r[0] for r in rows.all()) - except Exception: - logger.exception("Sentiment momentum-leader scoping failed; using base relevant set") - - if not relevant: + if not priority_ids and not filler_ids: return [] - cutoff = datetime.now(timezone.utc) - timedelta(hours=settings.sentiment_fresh_hours) - latest_ts = func.max(SentimentScore.timestamp) - missing_first = case((latest_ts.is_(None), 0), else_=1) - result = await db.execute( - select(Ticker.symbol) - .outerjoin(SentimentScore, SentimentScore.ticker_id == Ticker.id) - .where(Ticker.id.in_(relevant)) - .group_by(Ticker.id, Ticker.symbol) - .having(or_(latest_ts.is_(None), latest_ts < cutoff)) - .order_by(missing_first.asc(), latest_ts.asc(), Ticker.symbol.asc()) - .limit(settings.sentiment_max_per_run) - ) - return list(result.scalars().all()) + # No cap — fetch every stale name. Priority first so a rate limit mid-run still + # covers the curated/at-risk set before the discovery net. + priority_syms = await _stale_sentiment_symbols(db, priority_ids, cutoff) + filler_syms = await _stale_sentiment_symbols(db, filler_ids, cutoff) + return priority_syms + filler_syms async def _get_fundamental_priority_tickers(db: AsyncSession) -> list[str]: diff --git a/tests/unit/test_sentiment_priority.py b/tests/unit/test_sentiment_priority.py index 376e68d..0dbcf5b 100644 --- a/tests/unit/test_sentiment_priority.py +++ b/tests/unit/test_sentiment_priority.py @@ -1,21 +1,27 @@ """Tests for sentiment-collection scoping (``_get_sentiment_priority_tickers``). -The activation gate qualifies setups on 12-1 momentum percentile, a different -axis than composite score. These tests pin the fix that adds the gate's momentum -leaders to the sentiment relevant-set so a freshly-qualifying ticker isn't left -without sentiment. +A dashboard 'top pick' is the highest-momentum *qualified* long setup. Sentiment +can never move a ticker's momentum percentile (the gate's core axis) — only its +confidence and EV ranking. So the tickers that are, or could become with positive +sentiment, a top pick are exactly the momentum leaders that already carry a +tradeable long setup over the R:R floor. These tests pin that priority tier +(always refreshed, cap-exempt) and the capped filler tier behind it. """ from __future__ import annotations -from datetime import date, datetime, timedelta, timezone +from datetime import datetime, timedelta, timezone import pytest from app import scheduler -from app.models.ohlcv import OHLCVRecord +from app.models.paper_trade import PaperTrade +from app.models.score import CompositeScore +from app.models.sentiment import SentimentScore from app.models.settings import SystemSetting from app.models.ticker import Ticker +from app.models.trade_setup import TradeSetup +from app.models.watchlist import WatchlistEntry @pytest.fixture @@ -26,56 +32,216 @@ async def session(): yield s -async def _seed_history(session, symbol: str, rate: float, n: int = 280) -> Ticker: - """Seed a ticker with a full year+ of daily closes growing at ``rate``.""" +async def _add_ticker(session, symbol: str) -> Ticker: t = Ticker(symbol=symbol) session.add(t) await session.flush() - base = date(2024, 1, 1) - for i in range(n): - close = 100.0 * (rate ** i) - session.add(OHLCVRecord( - ticker_id=t.id, - date=base + timedelta(days=i), - open=close, high=close, low=close, close=close, - volume=1_000_000, - )) - await session.commit() return t -async def _set_min_momentum(session, value: str) -> None: - session.add(SystemSetting( - key="activation_min_momentum_percentile", - value=value, - updated_at=datetime.now(timezone.utc), +async def _add_setup( + session, + ticker: Ticker, + *, + direction: str = "long", + momentum_percentile: float | None = 95.0, + rr_ratio: float = 2.0, + detected_at: datetime | None = None, +) -> TradeSetup: + session.add(TradeSetup( + ticker_id=ticker.id, + direction=direction, + entry_price=100.0, + stop_loss=95.0, + target=110.0, + rr_ratio=rr_ratio, + composite_score=60.0, + momentum_percentile=momentum_percentile, + detected_at=detected_at or datetime.now(timezone.utc), )) await session.commit() -async def test_momentum_leader_is_included_without_composite_or_watchlist(session): - """A top-percentile momentum ticker is fetched even when it has no composite - score, no watchlist entry, and no open trade — the case that previously left - qualifying setups with no sentiment.""" - await _seed_history(session, "LEADER", rate=1.010) # strong uptrend → pct 100 - await _seed_history(session, "LAGGARD", rate=0.999) # declining → pct 0 - await _set_min_momentum(session, "80") +async def _add_composite(session, ticker: Ticker, score: float) -> None: + session.add(CompositeScore( + ticker_id=ticker.id, + score=score, + is_stale=False, + weights_json="{}", + computed_at=datetime.now(timezone.utc), + )) + await session.commit() + + +async def _add_watchlist(session, ticker: Ticker) -> None: + session.add(WatchlistEntry( + user_id=1, + ticker_id=ticker.id, + entry_type="manual", + added_at=datetime.now(timezone.utc), + )) + await session.commit() + + +async def _add_open_trade(session, ticker: Ticker) -> None: + session.add(PaperTrade( + user_id=1, + ticker_id=ticker.id, + direction="long", + entry_price=100.0, + shares=10.0, + stop_loss=95.0, + target=110.0, + status="open", + opened_at=datetime.now(timezone.utc), + )) + await session.commit() + + +async def _add_sentiment(session, ticker: Ticker, hours_ago: float) -> None: + session.add(SentimentScore( + ticker_id=ticker.id, + classification="bullish", + confidence=80, + source="test", + timestamp=datetime.now(timezone.utc) - timedelta(hours=hours_ago), + )) + await session.commit() + + +async def _set_setting(session, key: str, value: str) -> None: + session.add(SystemSetting(key=key, value=value, updated_at=datetime.now(timezone.utc))) + await session.commit() + + +async def test_top_pick_feeder_included_below_cutoff_excluded(session): + """A momentum leader with a tradeable long setup over the R:R floor is fetched; + one whose setup is below the gate's percentile is not.""" + feeder = await _add_ticker(session, "FEEDER") + await _add_setup(session, feeder, momentum_percentile=95.0) + laggard = await _add_ticker(session, "LAGGARD") + await _add_setup(session, laggard, momentum_percentile=50.0) # below the gate + await _set_setting(session, "activation_min_momentum_percentile", "80") symbols = await scheduler._get_sentiment_priority_tickers(session) - assert "LEADER" in symbols - # Below the gate's percentile and not otherwise relevant → not fetched. + assert "FEEDER" in symbols assert "LAGGARD" not in symbols -async def test_momentum_leaders_skipped_when_gate_disabled(session): - """With the momentum gate off (min percentile 0), the leader is no longer - pulled in solely on momentum — scoping falls back to the base relevant set.""" - await _seed_history(session, "LEADER", rate=1.010) - await _seed_history(session, "LAGGARD", rate=0.999) - await _set_min_momentum(session, "0") +async def test_leader_without_a_setup_excluded(session): + """A ticker with no long setup can't be a top pick, so it's no longer pulled in + on momentum alone — the budget goes to actual top-pick feeders.""" + await _add_ticker(session, "NOSETUP") + await _set_setting(session, "activation_min_momentum_percentile", "80") symbols = await scheduler._get_sentiment_priority_tickers(session) - assert "LEADER" not in symbols - assert "LAGGARD" not in symbols + assert "NOSETUP" not in symbols + + +async def test_short_only_setup_excluded(session): + """The gate is long-only while active; a short setup can never be a top pick, + so positive sentiment can't promote it and it stays out of scope.""" + t = await _add_ticker(session, "SHORTY") + await _add_setup(session, t, direction="short", momentum_percentile=95.0) + await _set_setting(session, "activation_min_momentum_percentile", "80") + + symbols = await scheduler._get_sentiment_priority_tickers(session) + + assert "SHORTY" not in symbols + + +async def test_long_setup_below_rr_floor_excluded(session): + """A long leader whose setup doesn't clear the R:R floor isn't tradeable as a + top pick regardless of sentiment.""" + t = await _add_ticker(session, "THINRR") + await _add_setup(session, t, momentum_percentile=95.0, rr_ratio=0.5) + await _set_setting(session, "activation_min_momentum_percentile", "80") + await _set_setting(session, "activation_min_rr", "1.2") + + symbols = await scheduler._get_sentiment_priority_tickers(session) + + assert "THINRR" not in symbols + + +async def test_gate_disabled_no_priority_tier(session): + """With the momentum gate off there is no leader axis to anchor on, so a strong + long setup is not pulled in on its own — scope falls back to the filler set.""" + t = await _add_ticker(session, "FEEDER") + await _add_setup(session, t, momentum_percentile=95.0) + await _set_setting(session, "activation_min_momentum_percentile", "0") + + symbols = await scheduler._get_sentiment_priority_tickers(session) + + assert "FEEDER" not in symbols + + +async def test_fresh_feeder_skipped_stale_refetched(session): + """A feeder refreshed within the fresh window is skipped; one past it is + re-fetched.""" + fresh = await _add_ticker(session, "FRESH") + await _add_setup(session, fresh, momentum_percentile=95.0) + await _add_sentiment(session, fresh, hours_ago=1.0) + stale = await _add_ticker(session, "STALE") + await _add_setup(session, stale, momentum_percentile=95.0) + await _add_sentiment(session, stale, hours_ago=settings_fresh_hours() + 50) + await _set_setting(session, "activation_min_momentum_percentile", "80") + + symbols = await scheduler._get_sentiment_priority_tickers(session) + + assert "FRESH" not in symbols + assert "STALE" in symbols + + +async def test_watchlist_and_open_trades_always_included(session): + """The curated watchlist and open paper trades are always in scope — they're + the set we never want shown without sentiment, independent of any top pick.""" + await _set_setting(session, "activation_min_momentum_percentile", "80") + wl = await _add_ticker(session, "WATCHED") + await _add_watchlist(session, wl) + held = await _add_ticker(session, "HELD") + await _add_open_trade(session, held) + + symbols = await scheduler._get_sentiment_priority_tickers(session) + + assert "WATCHED" in symbols + assert "HELD" in symbols + + +async def test_dismissed_watchlist_entry_excluded(session): + """A dismissed watchlist entry is not refreshed.""" + await _set_setting(session, "activation_min_momentum_percentile", "80") + t = await _add_ticker(session, "DISMISSED") + session.add(WatchlistEntry( + user_id=1, + ticker_id=t.id, + entry_type="dismissed", + added_at=datetime.now(timezone.utc), + )) + await session.commit() + + symbols = await scheduler._get_sentiment_priority_tickers(session) + + assert "DISMISSED" not in symbols + + +async def test_no_per_run_cap_everything_stale_is_fetched(session, monkeypatch): + """No truncation: every stale name in the relevant set is returned, however + many there are (the cap was removed).""" + await _set_setting(session, "activation_min_momentum_percentile", "80") + feeders = [f"F{i:02d}" for i in range(30)] # well past the old cap of 25 + for sym in feeders: + t = await _add_ticker(session, sym) + await _add_setup(session, t, momentum_percentile=95.0) + filler = await _add_ticker(session, "FILL") + await _add_composite(session, filler, score=99.0) + + symbols = await scheduler._get_sentiment_priority_tickers(session) + + assert set(feeders).issubset(set(symbols)) # all feeders, no truncation + assert "FILL" in symbols # filler fetched too — nothing crowded out + + +def settings_fresh_hours() -> float: + return float(scheduler.settings.sentiment_fresh_hours)