From e5166ed668a05fcf94034a115725060a026516b1 Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Tue, 16 Jun 2026 16:34:19 +0200 Subject: [PATCH] sentiment: LLM buy/hold/avoid + full analysis, and search-budget scoping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Richer LLM output (same grounded call, ~no extra cost): - All providers now also return a recommendation (buy/hold/avoid) and a thorough reasoning paragraph; Gemini now actually captures reasoning + grounding citations (it was dropping them). Stored on sentiment_scores (migration 008), exposed in the API; display-only — NOT fed into the composite/EV. - Ticker Sentiment panel shows an "LLM view" badge and a "Full analysis & sources" expander with the complete reasoning + citations. Search-budget scoping (Gemini grounding free tier = 5000/mo): - collect_sentiment now targets only watchlist + open paper trades + top-N by composite, skips tickers refreshed within sentiment_fresh_hours (72h), and caps per run (sentiment_max_per_run). Once the relevant set is fresh, runs spend 0 searches until it ages out — bounding monthly usage well under the free tier. - Widened sentiment lookback to 7d (scoring + display) so sparser collection still feeds the dimension score. Deploy: alembic upgrade (sentiment_scores.recommendation). Switch provider to Gemini Flash in Admin for the cost win (grounded, cheapest). Co-Authored-By: Claude Opus 4.8 --- .../008_add_sentiment_recommendation.py | 29 ++++++++++++ app/config.py | 6 +++ app/models/sentiment.py | 1 + app/providers/gemini_sentiment.py | 44 +++++++++++++++--- app/providers/openai_compatible_sentiment.py | 17 +++++-- app/providers/openai_sentiment.py | 34 +++++++++----- app/providers/protocol.py | 1 + app/routers/sentiment.py | 3 +- app/scheduler.py | 44 +++++++++++++++--- app/schemas/sentiment.py | 1 + app/services/scoring_service.py | 2 +- app/services/sentiment_service.py | 2 + .../src/components/ticker/SentimentPanel.tsx | 20 ++++++-- frontend/src/lib/types.ts | 1 + tests/unit/test_scoring_service_sentiment.py | 4 +- tests/unit/test_sentiment_recommendation.py | 46 +++++++++++++++++++ 16 files changed, 219 insertions(+), 36 deletions(-) create mode 100644 alembic/versions/008_add_sentiment_recommendation.py create mode 100644 tests/unit/test_sentiment_recommendation.py diff --git a/alembic/versions/008_add_sentiment_recommendation.py b/alembic/versions/008_add_sentiment_recommendation.py new file mode 100644 index 0000000..ef8ccd0 --- /dev/null +++ b/alembic/versions/008_add_sentiment_recommendation.py @@ -0,0 +1,29 @@ +"""add recommendation to sentiment_scores + +Revision ID: 008 +Revises: 007 +Create Date: 2026-06-16 00:00:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "008" +down_revision: Union[str, None] = "007" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "sentiment_scores", + sa.Column("recommendation", sa.String(length=10), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("sentiment_scores", "recommendation") diff --git a/app/config.py b/app/config.py index e492099..005cb0f 100644 --- a/app/config.py +++ b/app/config.py @@ -44,6 +44,12 @@ class Settings(BaseSettings): # Scheduled Jobs data_collector_frequency: str = "daily" sentiment_poll_interval_minutes: int = 30 + # Sentiment search-budget controls (Gemini grounding free tier = 5000/month). + # Only fetch sentiment for relevant tickers (watchlist + open trades + top-N by + # composite), skip ones refreshed within fresh_hours, and cap per run. + sentiment_fresh_hours: int = 72 + sentiment_max_per_run: int = 25 + sentiment_top_composite: int = 30 fundamental_fetch_frequency: str = "daily" rr_scan_frequency: str = "daily" alerts_frequency: str = "hourly" diff --git a/app/models/sentiment.py b/app/models/sentiment.py index 334cb17..5ccf4b8 100644 --- a/app/models/sentiment.py +++ b/app/models/sentiment.py @@ -22,5 +22,6 @@ class SentimentScore(Base): reasoning: Mapped[str] = mapped_column(Text, nullable=False, default="") citations_json: Mapped[str] = mapped_column(Text, nullable=False, default="[]") + recommendation: Mapped[str | None] = mapped_column(String(10), nullable=True) ticker = relationship("Ticker", back_populates="sentiment_scores") diff --git a/app/providers/gemini_sentiment.py b/app/providers/gemini_sentiment.py index ac130fb..c4787be 100644 --- a/app/providers/gemini_sentiment.py +++ b/app/providers/gemini_sentiment.py @@ -30,19 +30,48 @@ if _CA_BUNDLE and Path(_CA_BUNDLE).exists(): logger.warning("Could not patch aiohttp SSL context", exc_info=True) _SENTIMENT_PROMPT = """\ -Analyze the current market sentiment for the stock ticker {ticker}. -Search the web for recent news articles, social media mentions, and analyst opinions. +Search the web for the latest news, analyst ratings/opinions, and retail/social \ +discussion (e.g. Reddit, StockTwits) about the stock ticker {ticker} from roughly \ +the past 1-2 weeks. -Respond ONLY with a JSON object in this exact format (no markdown, no extra text): -{{"classification": "", "confidence": <0-100>, "reasoning": ""}} +Assess (1) the current market sentiment and (2) whether BUYING here looks advisable now. + +Respond ONLY with a JSON object (no markdown, no extra text): +{{"classification": "", "confidence": <0-100>, "recommendation": "", "reasoning": ""}} Rules: -- classification must be exactly one of: bullish, bearish, neutral +- classification = overall mood/tone (bullish, bearish, neutral) +- recommendation = actionable view on buying now (buy, hold, avoid) - confidence must be an integer from 0 to 100 -- reasoning should be a brief one-sentence explanation +- reasoning should be several sentences citing specific, recent findings """ VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"} +VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"} + + +def _parse_recommendation(value: object) -> str | None: + v = str(value or "").strip().lower() + return v if v in VALID_RECOMMENDATIONS else None + + +def _extract_citations(response: object) -> list[dict[str, str]]: + """Pull source URLs/titles from Gemini's grounding metadata.""" + citations: list[dict[str, str]] = [] + try: + candidates = getattr(response, "candidates", None) or [] + for cand in candidates: + meta = getattr(cand, "grounding_metadata", None) + for chunk in (getattr(meta, "grounding_chunks", None) or []): + web = getattr(chunk, "web", None) + if web is not None: + citations.append({ + "url": getattr(web, "uri", "") or "", + "title": getattr(web, "title", "") or "", + }) + except Exception: + pass + return citations class GeminiSentimentProvider: @@ -90,6 +119,9 @@ class GeminiSentimentProvider: confidence=confidence, source="gemini", timestamp=datetime.now(timezone.utc), + reasoning=reasoning, + citations=_extract_citations(response), + recommendation=_parse_recommendation(parsed.get("recommendation")), ) except json.JSONDecodeError as exc: diff --git a/app/providers/openai_compatible_sentiment.py b/app/providers/openai_compatible_sentiment.py index 78f187e..ec4f7b4 100644 --- a/app/providers/openai_compatible_sentiment.py +++ b/app/providers/openai_compatible_sentiment.py @@ -28,18 +28,26 @@ _CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "") _SENTIMENT_PROMPT = """\ Assess the CURRENT market sentiment for the stock ticker {ticker} based on your \ -knowledge of the company, its sector, and recent developments you are aware of. +knowledge of the company, its sector, and recent developments you are aware of, \ +and whether BUYING here looks advisable. -Respond ONLY with a JSON object in this exact format (no markdown, no extra text): -{{"classification": "", "confidence": <0-100>, "reasoning": ""}} +Respond ONLY with a JSON object (no markdown, no extra text): +{{"classification": "", "confidence": <0-100>, "recommendation": "", "reasoning": ""}} Rules: - classification must be exactly one of: bullish, bearish, neutral +- recommendation must be exactly one of: buy, hold, avoid - confidence must be an integer from 0 to 100 -- reasoning should be a brief one-sentence explanation +- reasoning should be several sentences """ VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"} +VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"} + + +def _parse_recommendation(value: object) -> str | None: + v = str(value or "").strip().lower() + return v if v in VALID_RECOMMENDATIONS else None def _clean_json_text(raw: str) -> str: @@ -116,6 +124,7 @@ class OpenAICompatibleSentimentProvider: source=self._source, timestamp=datetime.now(timezone.utc), reasoning=reasoning, + recommendation=_parse_recommendation(parsed.get("recommendation")), ) except json.JSONDecodeError as exc: diff --git a/app/providers/openai_sentiment.py b/app/providers/openai_sentiment.py index b1fd17c..806ae7e 100644 --- a/app/providers/openai_sentiment.py +++ b/app/providers/openai_sentiment.py @@ -19,39 +19,48 @@ logger = logging.getLogger(__name__) _CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "") _SENTIMENT_PROMPT = """\ -Search the web for the LATEST news, analyst opinions, and market developments \ -about the stock ticker {ticker} from the past 24-48 hours. +Search the web for the latest news, analyst ratings/opinions, and retail/social \ +discussion (e.g. Reddit, StockTwits) about the stock ticker {ticker} from roughly \ +the past 1-2 weeks. -Based on your web search findings, analyze the CURRENT market sentiment. +Assess (1) the current market sentiment and (2) whether BUYING here looks advisable now. -Respond ONLY with a JSON object in this exact format (no markdown, no extra text): -{{"classification": "", "confidence": <0-100>, "reasoning": ""}} +Respond ONLY with a JSON object (no markdown, no extra text): +{{"classification": "", "confidence": <0-100>, "recommendation": "", "reasoning": ""}} Rules: -- classification must be exactly one of: bullish, bearish, neutral +- classification = overall mood/tone of the coverage (bullish, bearish, neutral) +- recommendation = actionable view on buying at the current price (buy, hold, avoid) - confidence must be an integer from 0 to 100 -- reasoning should cite specific recent news or events you found +- reasoning should be several sentences citing specific, recent findings """ _SENTIMENT_BATCH_PROMPT = """\ -Search the web for the LATEST news, analyst opinions, and market developments \ -about each stock ticker from the past 24-48 hours. +Search the web for the latest news, analyst ratings/opinions, and retail/social \ +discussion about each stock ticker from roughly the past 1-2 weeks. Tickers: {tickers_csv} Respond ONLY with a JSON array (no markdown, no extra text), one object per ticker: -[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"reasoning":"brief explanation"}}] +[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"recommendation":"buy|hold|avoid","reasoning":"thorough explanation citing findings"}}] Rules: -- Include every ticker exactly once -- ticker must be uppercase symbol +- Include every ticker exactly once; ticker must be the uppercase symbol - classification must be exactly one of: bullish, bearish, neutral +- recommendation must be exactly one of: buy, hold, avoid - confidence must be an integer from 0 to 100 - reasoning should cite specific recent news or events you found """ VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"} +VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"} + + +def parse_recommendation(value: object) -> str | None: + """Normalise a recommendation to buy/hold/avoid, or None if absent/invalid.""" + v = str(value or "").strip().lower() + return v if v in VALID_RECOMMENDATIONS else None class OpenAISentimentProvider: @@ -135,6 +144,7 @@ class OpenAISentimentProvider: timestamp=datetime.now(timezone.utc), reasoning=reasoning, citations=citations, + recommendation=parse_recommendation(parsed.get("recommendation")), ) async def fetch_sentiment(self, ticker: str) -> SentimentData: diff --git a/app/providers/protocol.py b/app/providers/protocol.py index 8911011..9ce892f 100644 --- a/app/providers/protocol.py +++ b/app/providers/protocol.py @@ -41,6 +41,7 @@ class SentimentData: timestamp: datetime reasoning: str = "" citations: list[dict[str, str]] = field(default_factory=list) # [{"url": ..., "title": ...}] + recommendation: str | None = None # "buy" | "hold" | "avoid" — actionable LLM view @dataclass(frozen=True, slots=True) diff --git a/app/routers/sentiment.py b/app/routers/sentiment.py index 653d65d..60d4026 100644 --- a/app/routers/sentiment.py +++ b/app/routers/sentiment.py @@ -30,7 +30,7 @@ def _parse_citations(citations_json: str) -> list[CitationItem]: @router.get("/sentiment/{symbol}", response_model=APIEnvelope) async def read_sentiment( symbol: str, - lookback_hours: float = Query(24, gt=0, description="Lookback window in hours"), + lookback_hours: float = Query(168, gt=0, description="Lookback window in hours"), _user=Depends(require_access), db: AsyncSession = Depends(get_db), ) -> APIEnvelope: @@ -51,6 +51,7 @@ async def read_sentiment( timestamp=s.timestamp, reasoning=s.reasoning, citations=_parse_citations(s.citations_json), + recommendation=s.recommendation, ) for s in scores ], diff --git a/app/scheduler.py b/app/scheduler.py index 2058dda..35526fc 100644 --- a/app/scheduler.py +++ b/app/scheduler.py @@ -16,10 +16,10 @@ from __future__ import annotations import json import logging import asyncio -from datetime import date, datetime, timezone +from datetime import date, datetime, timedelta, timezone from apscheduler.schedulers.asyncio import AsyncIOScheduler -from sqlalchemy import case, func, select +from sqlalchemy import case, func, or_, select from sqlalchemy.ext.asyncio import AsyncSession from app.config import settings @@ -281,20 +281,49 @@ async def _get_ohlcv_priority_tickers(db: AsyncSession) -> list[str]: async def _get_sentiment_priority_tickers(db: AsyncSession) -> list[str]: - """Return symbols prioritized for sentiment collection. + """Symbols to fetch sentiment for, budgeted to stay in the free search tier. - Priority: - 1) Tickers with no sentiment records - 2) Tickers with records, oldest latest sentiment timestamp first - 3) Alphabetical tiebreaker + Scope: only tickers that matter — watchlist + open paper trades + top-N by + composite score. Skip any refreshed within ``sentiment_fresh_hours``. Cap the + run at ``sentiment_max_per_run``, oldest/missing first. Once the relevant set + is fresh, runs make zero grounded searches until it ages out. """ + from app.models.paper_trade import PaperTrade + from app.models.score import CompositeScore + from app.models.watchlist import WatchlistEntry + + relevant: set[int] = set() + wl = await db.execute( + select(WatchlistEntry.ticker_id) + .where(WatchlistEntry.entry_type != "dismissed") + .distinct() + ) + relevant.update(r[0] for r in wl.all()) + pt = await db.execute( + select(PaperTrade.ticker_id).where(PaperTrade.status == "open").distinct() + ) + relevant.update(r[0] for r in pt.all()) + top = await db.execute( + select(CompositeScore.ticker_id) + .order_by(CompositeScore.score.desc()) + .limit(settings.sentiment_top_composite) + ) + relevant.update(r[0] for r in top.all()) + + if not relevant: + return [] + + cutoff = datetime.now(timezone.utc) - timedelta(hours=settings.sentiment_fresh_hours) latest_ts = func.max(SentimentScore.timestamp) missing_first = case((latest_ts.is_(None), 0), else_=1) result = await db.execute( select(Ticker.symbol) .outerjoin(SentimentScore, SentimentScore.ticker_id == Ticker.id) + .where(Ticker.id.in_(relevant)) .group_by(Ticker.id, Ticker.symbol) + .having(or_(latest_ts.is_(None), latest_ts < cutoff)) .order_by(missing_first.asc(), latest_ts.asc(), Ticker.symbol.asc()) + .limit(settings.sentiment_max_per_run) ) return list(result.scalars().all()) @@ -531,6 +560,7 @@ async def collect_sentiment() -> None: timestamp=data.timestamp, reasoning=data.reasoning, citations=data.citations, + recommendation=data.recommendation, ) _last_successful[job_name] = symbol processed += 1 diff --git a/app/schemas/sentiment.py b/app/schemas/sentiment.py index 0eae4f6..76d418d 100644 --- a/app/schemas/sentiment.py +++ b/app/schemas/sentiment.py @@ -25,6 +25,7 @@ class SentimentScoreResult(BaseModel): timestamp: datetime reasoning: str = "" citations: list[CitationItem] = [] + recommendation: Literal["buy", "hold", "avoid"] | None = None class SentimentResponse(BaseModel): diff --git a/app/services/scoring_service.py b/app/services/scoring_service.py index 16f05b4..507176d 100644 --- a/app/services/scoring_service.py +++ b/app/services/scoring_service.py @@ -347,7 +347,7 @@ async def _compute_sentiment_score( get_sentiment_scores, ) - lookback_hours: float = 24 + lookback_hours: float = 168 # 7 days — sentiment is collected sparsely to stay in free tier decay_rate: float = 0.1 try: diff --git a/app/services/sentiment_service.py b/app/services/sentiment_service.py index bd7252b..4c7d237 100644 --- a/app/services/sentiment_service.py +++ b/app/services/sentiment_service.py @@ -37,6 +37,7 @@ async def store_sentiment( timestamp: datetime | None = None, reasoning: str = "", citations: list[dict] | None = None, + recommendation: str | None = None, ) -> SentimentScore: """Store a new sentiment record for a ticker.""" ticker = await _get_ticker(db, symbol) @@ -55,6 +56,7 @@ async def store_sentiment( timestamp=timestamp, reasoning=reasoning, citations_json=json.dumps(citations), + recommendation=recommendation, ) db.add(record) await db.commit() diff --git a/frontend/src/components/ticker/SentimentPanel.tsx b/frontend/src/components/ticker/SentimentPanel.tsx index fbe0644..62d7652 100644 --- a/frontend/src/components/ticker/SentimentPanel.tsx +++ b/frontend/src/components/ticker/SentimentPanel.tsx @@ -12,6 +12,12 @@ const classificationColors: Record = { neutral: 'text-gray-300', }; +const recommendationStyle: Record = { + buy: 'bg-emerald-500/15 text-emerald-300 border-emerald-500/30', + hold: 'bg-amber-500/15 text-amber-300 border-amber-500/30', + avoid: 'bg-red-500/15 text-red-300 border-red-500/30', +}; + export function SentimentPanel({ data }: SentimentPanelProps) { const [expanded, setExpanded] = useState(false); const latest = data.scores[0]; @@ -21,6 +27,14 @@ export function SentimentPanel({ data }: SentimentPanelProps) {

Sentiment

{latest ? ( <> + {latest.recommendation && ( +
+ LLM view + + {latest.recommendation} + +
+ )}
Classification @@ -45,12 +59,12 @@ export function SentimentPanel({ data }: SentimentPanelProps) {