feat: always-fresh sentiment for top picks, watchlist & open trades

Tiered, uncapped sentiment scope so the names that matter are never shown
without sentiment.

- Priority (always fully refreshed): top-pick feeders — momentum leaders with a
  tradeable long setup over the R:R floor (the tickers that are, or could become
  with positive sentiment, the dashboard top pick) — plus the curated watchlist
  and open paper trades.
- Filler: top-N by composite, a discovery net, fetched after the priority set so
  a mid-run rate limit lands the important names first.
- Removed the per-run cap (sentiment_max_per_run): the relevant set is naturally
  bounded (watchlist <= 20, composite <= top_composite), so a full refresh stays
  inside the free tier. extra="ignore" keeps a stale env var from breaking startup.
- Refresh window 72h -> 120h (5 days): sentiment shifts slowly, score window is 7d.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-27 15:59:58 +02:00
parent 65dd53baa3
commit 2f21c685e8
3 changed files with 329 additions and 96 deletions
+9 -5
View File
@@ -2,7 +2,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
# Database
database_url: str = "postgresql+asyncpg://stock_backend:changeme@localhost:5432/stock_data_backend"
@@ -49,10 +49,14 @@ class Settings(BaseSettings):
data_collector_frequency: str = "daily"
sentiment_poll_interval_minutes: int = 30
# Sentiment search-budget controls (Gemini grounding free tier = 5000/month).
# Only fetch sentiment for relevant tickers (watchlist + open trades + top-N by
# composite), skip ones refreshed within fresh_hours, and cap per run.
sentiment_fresh_hours: int = 72
sentiment_max_per_run: int = 25
# Scope (see _get_sentiment_priority_tickers): everything that matters is always
# refreshed in full — open paper trades + the curated watchlist + top-pick
# feeders (momentum leaders with a tradeable long setup) — plus a top-N composite
# discovery net. No per-run cap: the set is naturally bounded (watchlist <= 20,
# composite <= top_composite), so a full refresh stays well inside the free tier.
# Skip anything refreshed within fresh_hours (5 days: sentiment shifts slowly and
# the score window is 7 days).
sentiment_fresh_hours: int = 120
sentiment_top_composite: int = 30
fundamental_fetch_frequency: str = "weekly" # quarterly-ish data; weekly conserves API quota
rr_scan_frequency: str = "daily"
+114 -51
View File
@@ -20,7 +20,7 @@ from datetime import date, datetime, timedelta, timezone
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from sqlalchemy import case, func, or_, select
from sqlalchemy import and_, case, func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
@@ -218,78 +218,141 @@ async def _get_ohlcv_priority_tickers(db: AsyncSession) -> list[str]:
return list(result.scalars().all())
async def _get_sentiment_priority_tickers(db: AsyncSession) -> list[str]:
"""Symbols to fetch sentiment for, budgeted to stay in the free search tier.
async def _get_top_pick_feeder_ids(db: AsyncSession) -> set[int]:
"""Ticker ids whose latest LONG setup makes them a top-pick feeder.
Scope: only tickers that matter — watchlist + open paper trades + top-N by
composite score + the momentum leaders the activation gate qualifies on. Skip
any refreshed within ``sentiment_fresh_hours``. Cap the run at
``sentiment_max_per_run``, oldest/missing first. Once the relevant set is
fresh, runs make zero grounded searches until it ages out.
A dashboard 'top pick' is the highest-momentum *qualified* setup. Sentiment
can never move a ticker's momentum percentile (the gate's core axis) — only
its confidence and EV ranking. So the only tickers that are, or could become
with positive sentiment, a top pick are momentum leaders that already have a
tradeable long setup clearing the R:R floor. That set is exactly:
latest long setup with momentum_percentile >= gate AND rr_ratio >= floor.
It contains both the currently-qualified setups and the near-miss ones held
back only by a neutral/missing sentiment — the cases the user saw surface as
top picks with no sentiment. Only meaningful with the momentum gate on
(min_momentum_percentile > 0); off, there is no leader axis to anchor on and we
defer to the filler set. Best-effort: a config failure must not stop collection.
"""
from app.models.trade_setup import TradeSetup
try:
from app.services.admin_service import get_activation_config
activation = await get_activation_config(db)
min_pct = float(activation.get("min_momentum_percentile", 0.0))
min_rr = float(activation.get("min_rr", 0.0))
except Exception:
logger.exception("Sentiment top-pick scoping failed; using filler set only")
return set()
if min_pct <= 0:
return set()
# Latest long setup per ticker, then keep those clearing the gate's momentum
# percentile and R:R floor. (Sentiment runs before the day's scan, so this
# reads the previous scan's setups — momentum is a slow, cross-sectional signal,
# so yesterday's leaders are the right anchor.)
latest_long = (
select(TradeSetup.ticker_id, func.max(TradeSetup.detected_at).label("md"))
.where(TradeSetup.direction == "long")
.group_by(TradeSetup.ticker_id)
.subquery()
)
rows = await db.execute(
select(TradeSetup.ticker_id)
.join(
latest_long,
and_(
TradeSetup.ticker_id == latest_long.c.ticker_id,
TradeSetup.detected_at == latest_long.c.md,
),
)
.where(
TradeSetup.direction == "long",
TradeSetup.rr_ratio >= min_rr,
TradeSetup.momentum_percentile.is_not(None),
TradeSetup.momentum_percentile >= min_pct,
)
)
return {r[0] for r in rows.all()}
async def _stale_sentiment_symbols(
db: AsyncSession, ticker_ids: set[int], cutoff: datetime
) -> list[str]:
"""Symbols among ``ticker_ids`` whose newest sentiment is missing or older than
``cutoff``, ordered missing-first → oldest → alphabetical."""
if not ticker_ids:
return []
latest_ts = func.max(SentimentScore.timestamp)
missing_first = case((latest_ts.is_(None), 0), else_=1)
stmt = (
select(Ticker.symbol)
.outerjoin(SentimentScore, SentimentScore.ticker_id == Ticker.id)
.where(Ticker.id.in_(ticker_ids))
.group_by(Ticker.id, Ticker.symbol)
.having(or_(latest_ts.is_(None), latest_ts < cutoff))
.order_by(missing_first.asc(), latest_ts.asc(), Ticker.symbol.asc())
)
result = await db.execute(stmt)
return list(result.scalars().all())
async def _get_sentiment_priority_tickers(db: AsyncSession) -> list[str]:
"""Symbols to fetch sentiment for, skipping anything refreshed within
``sentiment_fresh_hours``.
No per-run cap: the relevant set is naturally bounded (curated watchlist <= 20,
a handful of open trades and top-pick feeders, top-N composite), so refreshing
all of it stays well inside the free search tier — and everything that matters
is always fully covered. The two tiers only affect ORDER, so a mid-run provider
rate limit still lands the names we care about first:
Priority: top-pick feeders (momentum leaders with a tradeable long setup, see
``_get_top_pick_feeder_ids``) + the curated watchlist + open paper trades —
the set we never want shown without sentiment.
Filler: top-N by composite — a cheap discovery net for names not yet covered.
Once the set is fresh, runs make zero grounded searches until it ages out.
"""
from app.models.paper_trade import PaperTrade
from app.models.score import CompositeScore
from app.models.watchlist import WatchlistEntry
relevant: set[int] = set()
cutoff = datetime.now(timezone.utc) - timedelta(hours=settings.sentiment_fresh_hours)
# Priority: the set we always want fresh — top-pick feeders, the curated
# watchlist, and open positions.
priority_ids = await _get_top_pick_feeder_ids(db)
wl = await db.execute(
select(WatchlistEntry.ticker_id)
.where(WatchlistEntry.entry_type != "dismissed")
.distinct()
)
relevant.update(r[0] for r in wl.all())
priority_ids.update(r[0] for r in wl.all())
pt = await db.execute(
select(PaperTrade.ticker_id).where(PaperTrade.status == "open").distinct()
)
relevant.update(r[0] for r in pt.all())
priority_ids.update(r[0] for r in pt.all())
# Filler: top-N by composite, a discovery net for names not already covered.
top = await db.execute(
select(CompositeScore.ticker_id)
.order_by(CompositeScore.score.desc())
.limit(settings.sentiment_top_composite)
)
relevant.update(r[0] for r in top.all())
filler_ids = {r[0] for r in top.all()} - priority_ids
# Momentum leaders: the tickers that can clear the activation gate, which
# selects the top ``min_momentum_percentile`` slice by 12-1 momentum — a
# different axis than composite score. The gate qualifies setups on this
# percentile, so without including them a freshly-qualifying ticker carries no
# sentiment and gets enhanced as neutral. Pre-fetching their sentiment here (in
# the daily pipeline, sentiment runs right after the OHLCV refresh) means the
# following R:R scan reads real sentiment for the setups it qualifies.
# Best-effort: a momentum/config failure must not stop sentiment collection.
try:
from app.services import momentum_service
from app.services.admin_service import get_activation_config
activation = await get_activation_config(db)
min_pct = float(activation.get("min_momentum_percentile", 0.0))
if min_pct > 0:
percentiles = await momentum_service.compute_momentum_percentiles(db)
leaders = [sym for sym, pct in percentiles.items() if pct >= min_pct]
if leaders:
rows = await db.execute(
select(Ticker.id).where(Ticker.symbol.in_(leaders))
)
relevant.update(r[0] for r in rows.all())
except Exception:
logger.exception("Sentiment momentum-leader scoping failed; using base relevant set")
if not relevant:
if not priority_ids and not filler_ids:
return []
cutoff = datetime.now(timezone.utc) - timedelta(hours=settings.sentiment_fresh_hours)
latest_ts = func.max(SentimentScore.timestamp)
missing_first = case((latest_ts.is_(None), 0), else_=1)
result = await db.execute(
select(Ticker.symbol)
.outerjoin(SentimentScore, SentimentScore.ticker_id == Ticker.id)
.where(Ticker.id.in_(relevant))
.group_by(Ticker.id, Ticker.symbol)
.having(or_(latest_ts.is_(None), latest_ts < cutoff))
.order_by(missing_first.asc(), latest_ts.asc(), Ticker.symbol.asc())
.limit(settings.sentiment_max_per_run)
)
return list(result.scalars().all())
# No cap — fetch every stale name. Priority first so a rate limit mid-run still
# covers the curated/at-risk set before the discovery net.
priority_syms = await _stale_sentiment_symbols(db, priority_ids, cutoff)
filler_syms = await _stale_sentiment_symbols(db, filler_ids, cutoff)
return priority_syms + filler_syms
async def _get_fundamental_priority_tickers(db: AsyncSession) -> list[str]:
+206 -40
View File
@@ -1,21 +1,27 @@
"""Tests for sentiment-collection scoping (``_get_sentiment_priority_tickers``).
The activation gate qualifies setups on 12-1 momentum percentile, a different
axis than composite score. These tests pin the fix that adds the gate's momentum
leaders to the sentiment relevant-set so a freshly-qualifying ticker isn't left
without sentiment.
A dashboard 'top pick' is the highest-momentum *qualified* long setup. Sentiment
can never move a ticker's momentum percentile (the gate's core axis) — only its
confidence and EV ranking. So the tickers that are, or could become with positive
sentiment, a top pick are exactly the momentum leaders that already carry a
tradeable long setup over the R:R floor. These tests pin that priority tier
(always refreshed, cap-exempt) and the capped filler tier behind it.
"""
from __future__ import annotations
from datetime import date, datetime, timedelta, timezone
from datetime import datetime, timedelta, timezone
import pytest
from app import scheduler
from app.models.ohlcv import OHLCVRecord
from app.models.paper_trade import PaperTrade
from app.models.score import CompositeScore
from app.models.sentiment import SentimentScore
from app.models.settings import SystemSetting
from app.models.ticker import Ticker
from app.models.trade_setup import TradeSetup
from app.models.watchlist import WatchlistEntry
@pytest.fixture
@@ -26,56 +32,216 @@ async def session():
yield s
async def _seed_history(session, symbol: str, rate: float, n: int = 280) -> Ticker:
"""Seed a ticker with a full year+ of daily closes growing at ``rate``."""
async def _add_ticker(session, symbol: str) -> Ticker:
t = Ticker(symbol=symbol)
session.add(t)
await session.flush()
base = date(2024, 1, 1)
for i in range(n):
close = 100.0 * (rate ** i)
session.add(OHLCVRecord(
ticker_id=t.id,
date=base + timedelta(days=i),
open=close, high=close, low=close, close=close,
volume=1_000_000,
))
await session.commit()
return t
async def _set_min_momentum(session, value: str) -> None:
session.add(SystemSetting(
key="activation_min_momentum_percentile",
value=value,
updated_at=datetime.now(timezone.utc),
async def _add_setup(
session,
ticker: Ticker,
*,
direction: str = "long",
momentum_percentile: float | None = 95.0,
rr_ratio: float = 2.0,
detected_at: datetime | None = None,
) -> TradeSetup:
session.add(TradeSetup(
ticker_id=ticker.id,
direction=direction,
entry_price=100.0,
stop_loss=95.0,
target=110.0,
rr_ratio=rr_ratio,
composite_score=60.0,
momentum_percentile=momentum_percentile,
detected_at=detected_at or datetime.now(timezone.utc),
))
await session.commit()
async def test_momentum_leader_is_included_without_composite_or_watchlist(session):
"""A top-percentile momentum ticker is fetched even when it has no composite
score, no watchlist entry, and no open trade — the case that previously left
qualifying setups with no sentiment."""
await _seed_history(session, "LEADER", rate=1.010) # strong uptrend → pct 100
await _seed_history(session, "LAGGARD", rate=0.999) # declining → pct 0
await _set_min_momentum(session, "80")
async def _add_composite(session, ticker: Ticker, score: float) -> None:
session.add(CompositeScore(
ticker_id=ticker.id,
score=score,
is_stale=False,
weights_json="{}",
computed_at=datetime.now(timezone.utc),
))
await session.commit()
async def _add_watchlist(session, ticker: Ticker) -> None:
session.add(WatchlistEntry(
user_id=1,
ticker_id=ticker.id,
entry_type="manual",
added_at=datetime.now(timezone.utc),
))
await session.commit()
async def _add_open_trade(session, ticker: Ticker) -> None:
session.add(PaperTrade(
user_id=1,
ticker_id=ticker.id,
direction="long",
entry_price=100.0,
shares=10.0,
stop_loss=95.0,
target=110.0,
status="open",
opened_at=datetime.now(timezone.utc),
))
await session.commit()
async def _add_sentiment(session, ticker: Ticker, hours_ago: float) -> None:
session.add(SentimentScore(
ticker_id=ticker.id,
classification="bullish",
confidence=80,
source="test",
timestamp=datetime.now(timezone.utc) - timedelta(hours=hours_ago),
))
await session.commit()
async def _set_setting(session, key: str, value: str) -> None:
session.add(SystemSetting(key=key, value=value, updated_at=datetime.now(timezone.utc)))
await session.commit()
async def test_top_pick_feeder_included_below_cutoff_excluded(session):
"""A momentum leader with a tradeable long setup over the R:R floor is fetched;
one whose setup is below the gate's percentile is not."""
feeder = await _add_ticker(session, "FEEDER")
await _add_setup(session, feeder, momentum_percentile=95.0)
laggard = await _add_ticker(session, "LAGGARD")
await _add_setup(session, laggard, momentum_percentile=50.0) # below the gate
await _set_setting(session, "activation_min_momentum_percentile", "80")
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert "LEADER" in symbols
# Below the gate's percentile and not otherwise relevant → not fetched.
assert "FEEDER" in symbols
assert "LAGGARD" not in symbols
async def test_momentum_leaders_skipped_when_gate_disabled(session):
"""With the momentum gate off (min percentile 0), the leader is no longer
pulled in solely on momentum — scoping falls back to the base relevant set."""
await _seed_history(session, "LEADER", rate=1.010)
await _seed_history(session, "LAGGARD", rate=0.999)
await _set_min_momentum(session, "0")
async def test_leader_without_a_setup_excluded(session):
"""A ticker with no long setup can't be a top pick, so it's no longer pulled in
on momentum alone — the budget goes to actual top-pick feeders."""
await _add_ticker(session, "NOSETUP")
await _set_setting(session, "activation_min_momentum_percentile", "80")
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert "LEADER" not in symbols
assert "LAGGARD" not in symbols
assert "NOSETUP" not in symbols
async def test_short_only_setup_excluded(session):
"""The gate is long-only while active; a short setup can never be a top pick,
so positive sentiment can't promote it and it stays out of scope."""
t = await _add_ticker(session, "SHORTY")
await _add_setup(session, t, direction="short", momentum_percentile=95.0)
await _set_setting(session, "activation_min_momentum_percentile", "80")
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert "SHORTY" not in symbols
async def test_long_setup_below_rr_floor_excluded(session):
"""A long leader whose setup doesn't clear the R:R floor isn't tradeable as a
top pick regardless of sentiment."""
t = await _add_ticker(session, "THINRR")
await _add_setup(session, t, momentum_percentile=95.0, rr_ratio=0.5)
await _set_setting(session, "activation_min_momentum_percentile", "80")
await _set_setting(session, "activation_min_rr", "1.2")
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert "THINRR" not in symbols
async def test_gate_disabled_no_priority_tier(session):
"""With the momentum gate off there is no leader axis to anchor on, so a strong
long setup is not pulled in on its own — scope falls back to the filler set."""
t = await _add_ticker(session, "FEEDER")
await _add_setup(session, t, momentum_percentile=95.0)
await _set_setting(session, "activation_min_momentum_percentile", "0")
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert "FEEDER" not in symbols
async def test_fresh_feeder_skipped_stale_refetched(session):
"""A feeder refreshed within the fresh window is skipped; one past it is
re-fetched."""
fresh = await _add_ticker(session, "FRESH")
await _add_setup(session, fresh, momentum_percentile=95.0)
await _add_sentiment(session, fresh, hours_ago=1.0)
stale = await _add_ticker(session, "STALE")
await _add_setup(session, stale, momentum_percentile=95.0)
await _add_sentiment(session, stale, hours_ago=settings_fresh_hours() + 50)
await _set_setting(session, "activation_min_momentum_percentile", "80")
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert "FRESH" not in symbols
assert "STALE" in symbols
async def test_watchlist_and_open_trades_always_included(session):
"""The curated watchlist and open paper trades are always in scope — they're
the set we never want shown without sentiment, independent of any top pick."""
await _set_setting(session, "activation_min_momentum_percentile", "80")
wl = await _add_ticker(session, "WATCHED")
await _add_watchlist(session, wl)
held = await _add_ticker(session, "HELD")
await _add_open_trade(session, held)
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert "WATCHED" in symbols
assert "HELD" in symbols
async def test_dismissed_watchlist_entry_excluded(session):
"""A dismissed watchlist entry is not refreshed."""
await _set_setting(session, "activation_min_momentum_percentile", "80")
t = await _add_ticker(session, "DISMISSED")
session.add(WatchlistEntry(
user_id=1,
ticker_id=t.id,
entry_type="dismissed",
added_at=datetime.now(timezone.utc),
))
await session.commit()
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert "DISMISSED" not in symbols
async def test_no_per_run_cap_everything_stale_is_fetched(session, monkeypatch):
"""No truncation: every stale name in the relevant set is returned, however
many there are (the cap was removed)."""
await _set_setting(session, "activation_min_momentum_percentile", "80")
feeders = [f"F{i:02d}" for i in range(30)] # well past the old cap of 25
for sym in feeders:
t = await _add_ticker(session, sym)
await _add_setup(session, t, momentum_percentile=95.0)
filler = await _add_ticker(session, "FILL")
await _add_composite(session, filler, score=99.0)
symbols = await scheduler._get_sentiment_priority_tickers(session)
assert set(feeders).issubset(set(symbols)) # all feeders, no truncation
assert "FILL" in symbols # filler fetched too — nothing crowded out
def settings_fresh_hours() -> float:
return float(scheduler.settings.sentiment_fresh_hours)