Files
signal-platform/app/providers/gemini_sentiment.py
T
dennisthiessen e5166ed668
Deploy / lint (push) Successful in 6s
Deploy / test (push) Successful in 34s
Deploy / deploy (push) Successful in 21s
sentiment: LLM buy/hold/avoid + full analysis, and search-budget scoping
Richer LLM output (same grounded call, ~no extra cost):
- All providers now also return a recommendation (buy/hold/avoid) and a thorough
  reasoning paragraph; Gemini now actually captures reasoning + grounding
  citations (it was dropping them). Stored on sentiment_scores (migration 008),
  exposed in the API; display-only — NOT fed into the composite/EV.
- Ticker Sentiment panel shows an "LLM view" badge and a "Full analysis & sources"
  expander with the complete reasoning + citations.

Search-budget scoping (Gemini grounding free tier = 5000/mo):
- collect_sentiment now targets only watchlist + open paper trades + top-N by
  composite, skips tickers refreshed within sentiment_fresh_hours (72h), and caps
  per run (sentiment_max_per_run). Once the relevant set is fresh, runs spend 0
  searches until it ages out — bounding monthly usage well under the free tier.
- Widened sentiment lookback to 7d (scoring + display) so sparser collection
  still feeds the dimension score.

Deploy: alembic upgrade (sentiment_scores.recommendation). Switch provider to
Gemini Flash in Admin for the cost win (grounded, cheapest).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 16:34:19 +02:00

138 lines
5.7 KiB
Python

"""Gemini sentiment provider using google-genai with search grounding."""
from __future__ import annotations
import json
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
from google import genai
from google.genai import types
from app.exceptions import ProviderError, RateLimitError
from app.providers.protocol import SentimentData
logger = logging.getLogger(__name__)
# Ensure aiohttp's cached SSL context includes our corporate CA bundle.
# aiohttp creates _SSL_CONTEXT_VERIFIED at import time; we must patch it
# after import so that google-genai's aiohttp session trusts our proxy CA.
_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
if _CA_BUNDLE and Path(_CA_BUNDLE).exists():
try:
import aiohttp.connector as _aio_conn
if hasattr(_aio_conn, "_SSL_CONTEXT_VERIFIED") and _aio_conn._SSL_CONTEXT_VERIFIED is not None:
_aio_conn._SSL_CONTEXT_VERIFIED.load_verify_locations(cafile=_CA_BUNDLE)
logger.debug("Patched aiohttp _SSL_CONTEXT_VERIFIED with %s", _CA_BUNDLE)
except Exception:
logger.warning("Could not patch aiohttp SSL context", exc_info=True)
_SENTIMENT_PROMPT = """\
Search the web for the latest news, analyst ratings/opinions, and retail/social \
discussion (e.g. Reddit, StockTwits) about the stock ticker {ticker} from roughly \
the past 1-2 weeks.
Assess (1) the current market sentiment and (2) whether BUYING here looks advisable now.
Respond ONLY with a JSON object (no markdown, no extra text):
{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "recommendation": "<buy|hold|avoid>", "reasoning": "<a thorough paragraph citing specific analyst views, news, and retail sentiment you found, and what drives the recommendation>"}}
Rules:
- classification = overall mood/tone (bullish, bearish, neutral)
- recommendation = actionable view on buying now (buy, hold, avoid)
- confidence must be an integer from 0 to 100
- reasoning should be several sentences citing specific, recent findings
"""
VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"}
def _parse_recommendation(value: object) -> str | None:
v = str(value or "").strip().lower()
return v if v in VALID_RECOMMENDATIONS else None
def _extract_citations(response: object) -> list[dict[str, str]]:
"""Pull source URLs/titles from Gemini's grounding metadata."""
citations: list[dict[str, str]] = []
try:
candidates = getattr(response, "candidates", None) or []
for cand in candidates:
meta = getattr(cand, "grounding_metadata", None)
for chunk in (getattr(meta, "grounding_chunks", None) or []):
web = getattr(chunk, "web", None)
if web is not None:
citations.append({
"url": getattr(web, "uri", "") or "",
"title": getattr(web, "title", "") or "",
})
except Exception:
pass
return citations
class GeminiSentimentProvider:
"""Fetches sentiment analysis from Gemini with search grounding."""
def __init__(self, api_key: str, model: str = "gemini-2.0-flash") -> None:
if not api_key:
raise ProviderError("Gemini API key is required")
self._client = genai.Client(api_key=api_key)
self._model = model
async def fetch_sentiment(self, ticker: str) -> SentimentData:
"""Send a structured prompt to Gemini and parse the JSON response."""
try:
response = await self._client.aio.models.generate_content(
model=self._model,
contents=_SENTIMENT_PROMPT.format(ticker=ticker),
config=types.GenerateContentConfig(
tools=[types.Tool(google_search=types.GoogleSearch())],
response_mime_type="application/json",
),
)
raw_text = response.text.strip()
logger.debug("Gemini raw response for %s: %s", ticker, raw_text)
parsed = json.loads(raw_text)
classification = parsed.get("classification", "").lower()
if classification not in VALID_CLASSIFICATIONS:
raise ProviderError(
f"Invalid classification '{classification}' from Gemini for {ticker}"
)
confidence = int(parsed.get("confidence", 50))
confidence = max(0, min(100, confidence))
reasoning = parsed.get("reasoning", "")
if reasoning:
logger.info("Gemini sentiment for %s: %s (confidence=%d) — %s",
ticker, classification, confidence, reasoning)
return SentimentData(
ticker=ticker,
classification=classification,
confidence=confidence,
source="gemini",
timestamp=datetime.now(timezone.utc),
reasoning=reasoning,
citations=_extract_citations(response),
recommendation=_parse_recommendation(parsed.get("recommendation")),
)
except json.JSONDecodeError as exc:
logger.error("Failed to parse Gemini JSON for %s: %s", ticker, exc)
raise ProviderError(f"Invalid JSON from Gemini for {ticker}") from exc
except ProviderError:
raise
except Exception as exc:
msg = str(exc).lower()
if "429" in msg or "resource exhausted" in msg or "quota" in msg or ("rate" in msg and "limit" in msg):
raise RateLimitError(f"Gemini rate limit hit for {ticker}") from exc
logger.error("Gemini provider error for %s: %s", ticker, exc)
raise ProviderError(f"Gemini provider error for {ticker}: {exc}") from exc