sentiment: LLM buy/hold/avoid + full analysis, and search-budget scoping

Richer LLM output (same grounded call, ~no extra cost): - All providers now also return a recommendation (buy/hold/avoid) and a thorough reasoning paragraph; Gemini now actually captures reasoning + grounding citations (it was dropping them). Stored on sentiment_scores (migration 008), exposed in the API; display-only — NOT fed into the composite/EV. - Ticker Sentiment panel shows an "LLM view" badge and a "Full analysis & sources" expander with the complete reasoning + citations. Search-budget scoping (Gemini grounding free tier = 5000/mo): - collect_sentiment now targets only watchlist + open paper trades + top-N by composite, skips tickers refreshed within sentiment_fresh_hours (72h), and caps per run (sentiment_max_per_run). Once the relevant set is fresh, runs spend 0 searches until it ages out — bounding monthly usage well under the free tier. - Widened sentiment lookback to 7d (scoring + display) so sparser collection still feeds the dimension score. Deploy: alembic upgrade (sentiment_scores.recommendation). Switch provider to Gemini Flash in Admin for the cost win (grounded, cheapest). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 16:34:19 +02:00
parent a69557f5d8
commit e5166ed668
16 changed files with 219 additions and 36 deletions
@@ -44,6 +44,12 @@ class Settings(BaseSettings):
    # Scheduled Jobs
    data_collector_frequency: str = "daily"
    sentiment_poll_interval_minutes: int = 30
+    # Sentiment search-budget controls (Gemini grounding free tier = 5000/month).
+    # Only fetch sentiment for relevant tickers (watchlist + open trades + top-N by
+    # composite), skip ones refreshed within fresh_hours, and cap per run.
+    sentiment_fresh_hours: int = 72
+    sentiment_max_per_run: int = 25
+    sentiment_top_composite: int = 30
    fundamental_fetch_frequency: str = "daily"
    rr_scan_frequency: str = "daily"
    alerts_frequency: str = "hourly"
@@ -22,5 +22,6 @@ class SentimentScore(Base):

    reasoning: Mapped[str] = mapped_column(Text, nullable=False, default="")
    citations_json: Mapped[str] = mapped_column(Text, nullable=False, default="[]")
+    recommendation: Mapped[str | None] = mapped_column(String(10), nullable=True)

    ticker = relationship("Ticker", back_populates="sentiment_scores")
@@ -30,19 +30,48 @@ if _CA_BUNDLE and Path(_CA_BUNDLE).exists():
        logger.warning("Could not patch aiohttp SSL context", exc_info=True)

 _SENTIMENT_PROMPT = """\
-Analyze the current market sentiment for the stock ticker {ticker}.
-Search the web for recent news articles, social media mentions, and analyst opinions.
+Search the web for the latest news, analyst ratings/opinions, and retail/social \
+discussion (e.g. Reddit, StockTwits) about the stock ticker {ticker} from roughly \
+the past 1-2 weeks.

-Respond ONLY with a JSON object in this exact format (no markdown, no extra text):
-{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "reasoning": "<brief explanation>"}}
+Assess (1) the current market sentiment and (2) whether BUYING here looks advisable now.
+
+Respond ONLY with a JSON object (no markdown, no extra text):
+{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "recommendation": "<buy|hold|avoid>", "reasoning": "<a thorough paragraph citing specific analyst views, news, and retail sentiment you found, and what drives the recommendation>"}}

 Rules:
- classification must be exactly one of: bullish, bearish, neutral
+- classification = overall mood/tone (bullish, bearish, neutral)
+- recommendation = actionable view on buying now (buy, hold, avoid)
 - confidence must be an integer from 0 to 100
- reasoning should be a brief one-sentence explanation
+- reasoning should be several sentences citing specific, recent findings
 """

 VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
+VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"}
+
+
+def _parse_recommendation(value: object) -> str | None:
+    v = str(value or "").strip().lower()
+    return v if v in VALID_RECOMMENDATIONS else None
+
+
+def _extract_citations(response: object) -> list[dict[str, str]]:
+    """Pull source URLs/titles from Gemini's grounding metadata."""
+    citations: list[dict[str, str]] = []
+    try:
+        candidates = getattr(response, "candidates", None) or []
+        for cand in candidates:
+            meta = getattr(cand, "grounding_metadata", None)
+            for chunk in (getattr(meta, "grounding_chunks", None) or []):
+                web = getattr(chunk, "web", None)
+                if web is not None:
+                    citations.append({
+                        "url": getattr(web, "uri", "") or "",
+                        "title": getattr(web, "title", "") or "",
+                    })
+    except Exception:
+        pass
+    return citations


 class GeminiSentimentProvider:
@@ -90,6 +119,9 @@ class GeminiSentimentProvider:
                confidence=confidence,
                source="gemini",
                timestamp=datetime.now(timezone.utc),
+                reasoning=reasoning,
+                citations=_extract_citations(response),
+                recommendation=_parse_recommendation(parsed.get("recommendation")),
            )

        except json.JSONDecodeError as exc:
@@ -28,18 +28,26 @@ _CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")

 _SENTIMENT_PROMPT = """\
 Assess the CURRENT market sentiment for the stock ticker {ticker} based on your \
-knowledge of the company, its sector, and recent developments you are aware of.
+knowledge of the company, its sector, and recent developments you are aware of, \
+and whether BUYING here looks advisable.

-Respond ONLY with a JSON object in this exact format (no markdown, no extra text):
-{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "reasoning": "<brief explanation>"}}
+Respond ONLY with a JSON object (no markdown, no extra text):
+{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "recommendation": "<buy|hold|avoid>", "reasoning": "<a thorough explanation of the drivers>"}}

 Rules:
 - classification must be exactly one of: bullish, bearish, neutral
+- recommendation must be exactly one of: buy, hold, avoid
 - confidence must be an integer from 0 to 100
- reasoning should be a brief one-sentence explanation
+- reasoning should be several sentences
 """

 VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
+VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"}
+
+
+def _parse_recommendation(value: object) -> str | None:
+    v = str(value or "").strip().lower()
+    return v if v in VALID_RECOMMENDATIONS else None


 def _clean_json_text(raw: str) -> str:
@@ -116,6 +124,7 @@ class OpenAICompatibleSentimentProvider:
                source=self._source,
                timestamp=datetime.now(timezone.utc),
                reasoning=reasoning,
+                recommendation=_parse_recommendation(parsed.get("recommendation")),
            )

        except json.JSONDecodeError as exc:
@@ -19,39 +19,48 @@ logger = logging.getLogger(__name__)
 _CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")

 _SENTIMENT_PROMPT = """\
-Search the web for the LATEST news, analyst opinions, and market developments \
-about the stock ticker {ticker} from the past 24-48 hours.
+Search the web for the latest news, analyst ratings/opinions, and retail/social \
+discussion (e.g. Reddit, StockTwits) about the stock ticker {ticker} from roughly \
+the past 1-2 weeks.

-Based on your web search findings, analyze the CURRENT market sentiment.
+Assess (1) the current market sentiment and (2) whether BUYING here looks advisable now.

-Respond ONLY with a JSON object in this exact format (no markdown, no extra text):
-{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "reasoning": "<brief explanation citing recent news>"}}
+Respond ONLY with a JSON object (no markdown, no extra text):
+{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "recommendation": "<buy|hold|avoid>", "reasoning": "<a thorough paragraph citing specific analyst views, news, and retail sentiment you found, and what drives the recommendation>"}}

 Rules:
- classification must be exactly one of: bullish, bearish, neutral
+- classification = overall mood/tone of the coverage (bullish, bearish, neutral)
+- recommendation = actionable view on buying at the current price (buy, hold, avoid)
 - confidence must be an integer from 0 to 100
- reasoning should cite specific recent news or events you found
+- reasoning should be several sentences citing specific, recent findings
 """

 _SENTIMENT_BATCH_PROMPT = """\
-Search the web for the LATEST news, analyst opinions, and market developments \
-about each stock ticker from the past 24-48 hours.
+Search the web for the latest news, analyst ratings/opinions, and retail/social \
+discussion about each stock ticker from roughly the past 1-2 weeks.

 Tickers:
 {tickers_csv}

 Respond ONLY with a JSON array (no markdown, no extra text), one object per ticker:
-[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"reasoning":"brief explanation"}}]
+[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"recommendation":"buy|hold|avoid","reasoning":"thorough explanation citing findings"}}]

 Rules:
- Include every ticker exactly once
- ticker must be uppercase symbol
+- Include every ticker exactly once; ticker must be the uppercase symbol
 - classification must be exactly one of: bullish, bearish, neutral
+- recommendation must be exactly one of: buy, hold, avoid
 - confidence must be an integer from 0 to 100
 - reasoning should cite specific recent news or events you found
 """

 VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
+VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"}
+
+
+def parse_recommendation(value: object) -> str | None:
+    """Normalise a recommendation to buy/hold/avoid, or None if absent/invalid."""
+    v = str(value or "").strip().lower()
+    return v if v in VALID_RECOMMENDATIONS else None


 class OpenAISentimentProvider:
@@ -135,6 +144,7 @@ class OpenAISentimentProvider:
            timestamp=datetime.now(timezone.utc),
            reasoning=reasoning,
            citations=citations,
+            recommendation=parse_recommendation(parsed.get("recommendation")),
        )

    async def fetch_sentiment(self, ticker: str) -> SentimentData:
@@ -41,6 +41,7 @@ class SentimentData:
    timestamp: datetime
    reasoning: str = ""
    citations: list[dict[str, str]] = field(default_factory=list)  # [{"url": ..., "title": ...}]
+    recommendation: str | None = None  # "buy" | "hold" | "avoid" — actionable LLM view


@dataclass(frozen=True, slots=True)
@@ -30,7 +30,7 @@ def _parse_citations(citations_json: str) -> list[CitationItem]:
@router.get("/sentiment/{symbol}", response_model=APIEnvelope)
 async def read_sentiment(
    symbol: str,
-    lookback_hours: float = Query(24, gt=0, description="Lookback window in hours"),
+    lookback_hours: float = Query(168, gt=0, description="Lookback window in hours"),
    _user=Depends(require_access),
    db: AsyncSession = Depends(get_db),
 ) -> APIEnvelope:
@@ -51,6 +51,7 @@ async def read_sentiment(
                timestamp=s.timestamp,
                reasoning=s.reasoning,
                citations=_parse_citations(s.citations_json),
+                recommendation=s.recommendation,
            )
            for s in scores
        ],
@@ -16,10 +16,10 @@ from __future__ import annotations
 import json
 import logging
 import asyncio
-from datetime import date, datetime, timezone
+from datetime import date, datetime, timedelta, timezone

 from apscheduler.schedulers.asyncio import AsyncIOScheduler
-from sqlalchemy import case, func, select
+from sqlalchemy import case, func, or_, select
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.config import settings
@@ -281,20 +281,49 @@ async def _get_ohlcv_priority_tickers(db: AsyncSession) -> list[str]:


 async def _get_sentiment_priority_tickers(db: AsyncSession) -> list[str]:
-    """Return symbols prioritized for sentiment collection.
+    """Symbols to fetch sentiment for, budgeted to stay in the free search tier.

-    Priority:
-      1) Tickers with no sentiment records
-      2) Tickers with records, oldest latest sentiment timestamp first
-      3) Alphabetical tiebreaker
+    Scope: only tickers that matter — watchlist + open paper trades + top-N by
+    composite score. Skip any refreshed within ``sentiment_fresh_hours``. Cap the
+    run at ``sentiment_max_per_run``, oldest/missing first. Once the relevant set
+    is fresh, runs make zero grounded searches until it ages out.
    """
+    from app.models.paper_trade import PaperTrade
+    from app.models.score import CompositeScore
+    from app.models.watchlist import WatchlistEntry
+
+    relevant: set[int] = set()
+    wl = await db.execute(
+        select(WatchlistEntry.ticker_id)
+        .where(WatchlistEntry.entry_type != "dismissed")
+        .distinct()
+    )
+    relevant.update(r[0] for r in wl.all())
+    pt = await db.execute(
+        select(PaperTrade.ticker_id).where(PaperTrade.status == "open").distinct()
+    )
+    relevant.update(r[0] for r in pt.all())
+    top = await db.execute(
+        select(CompositeScore.ticker_id)
+        .order_by(CompositeScore.score.desc())
+        .limit(settings.sentiment_top_composite)
+    )
+    relevant.update(r[0] for r in top.all())
+
+    if not relevant:
+        return []
+
+    cutoff = datetime.now(timezone.utc) - timedelta(hours=settings.sentiment_fresh_hours)
    latest_ts = func.max(SentimentScore.timestamp)
    missing_first = case((latest_ts.is_(None), 0), else_=1)
    result = await db.execute(
        select(Ticker.symbol)
        .outerjoin(SentimentScore, SentimentScore.ticker_id == Ticker.id)
+        .where(Ticker.id.in_(relevant))
        .group_by(Ticker.id, Ticker.symbol)
+        .having(or_(latest_ts.is_(None), latest_ts < cutoff))
        .order_by(missing_first.asc(), latest_ts.asc(), Ticker.symbol.asc())
+        .limit(settings.sentiment_max_per_run)
    )
    return list(result.scalars().all())

@@ -531,6 +560,7 @@ async def collect_sentiment() -> None:
                            timestamp=data.timestamp,
                            reasoning=data.reasoning,
                            citations=data.citations,
+                            recommendation=data.recommendation,
                        )
                        _last_successful[job_name] = symbol
                        processed += 1
@@ -25,6 +25,7 @@ class SentimentScoreResult(BaseModel):
    timestamp: datetime
    reasoning: str = ""
    citations: list[CitationItem] = []
+    recommendation: Literal["buy", "hold", "avoid"] | None = None


 class SentimentResponse(BaseModel):
@@ -347,7 +347,7 @@ async def _compute_sentiment_score(
        get_sentiment_scores,
    )

-    lookback_hours: float = 24
+    lookback_hours: float = 168  # 7 days — sentiment is collected sparsely to stay in free tier
    decay_rate: float = 0.1

    try:
@@ -37,6 +37,7 @@ async def store_sentiment(
    timestamp: datetime | None = None,
    reasoning: str = "",
    citations: list[dict] | None = None,
+    recommendation: str | None = None,
 ) -> SentimentScore:
    """Store a new sentiment record for a ticker."""
    ticker = await _get_ticker(db, symbol)
@@ -55,6 +56,7 @@ async def store_sentiment(
        timestamp=timestamp,
        reasoning=reasoning,
        citations_json=json.dumps(citations),
+        recommendation=recommendation,
    )
    db.add(record)
    await db.commit()