e5166ed668
Richer LLM output (same grounded call, ~no extra cost): - All providers now also return a recommendation (buy/hold/avoid) and a thorough reasoning paragraph; Gemini now actually captures reasoning + grounding citations (it was dropping them). Stored on sentiment_scores (migration 008), exposed in the API; display-only — NOT fed into the composite/EV. - Ticker Sentiment panel shows an "LLM view" badge and a "Full analysis & sources" expander with the complete reasoning + citations. Search-budget scoping (Gemini grounding free tier = 5000/mo): - collect_sentiment now targets only watchlist + open paper trades + top-N by composite, skips tickers refreshed within sentiment_fresh_hours (72h), and caps per run (sentiment_max_per_run). Once the relevant set is fresh, runs spend 0 searches until it ages out — bounding monthly usage well under the free tier. - Widened sentiment lookback to 7d (scoring + display) so sparser collection still feeds the dimension score. Deploy: alembic upgrade (sentiment_scores.recommendation). Switch provider to Gemini Flash in Admin for the cost win (grounded, cheapest). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
234 lines
9.7 KiB
Python
234 lines
9.7 KiB
Python
"""OpenAI sentiment provider using the Responses API with web search."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from openai import AsyncOpenAI
|
|
|
|
from app.exceptions import ProviderError, RateLimitError
|
|
from app.providers.protocol import SentimentData
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
|
|
|
|
_SENTIMENT_PROMPT = """\
|
|
Search the web for the latest news, analyst ratings/opinions, and retail/social \
|
|
discussion (e.g. Reddit, StockTwits) about the stock ticker {ticker} from roughly \
|
|
the past 1-2 weeks.
|
|
|
|
Assess (1) the current market sentiment and (2) whether BUYING here looks advisable now.
|
|
|
|
Respond ONLY with a JSON object (no markdown, no extra text):
|
|
{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "recommendation": "<buy|hold|avoid>", "reasoning": "<a thorough paragraph citing specific analyst views, news, and retail sentiment you found, and what drives the recommendation>"}}
|
|
|
|
Rules:
|
|
- classification = overall mood/tone of the coverage (bullish, bearish, neutral)
|
|
- recommendation = actionable view on buying at the current price (buy, hold, avoid)
|
|
- confidence must be an integer from 0 to 100
|
|
- reasoning should be several sentences citing specific, recent findings
|
|
"""
|
|
|
|
_SENTIMENT_BATCH_PROMPT = """\
|
|
Search the web for the latest news, analyst ratings/opinions, and retail/social \
|
|
discussion about each stock ticker from roughly the past 1-2 weeks.
|
|
|
|
Tickers:
|
|
{tickers_csv}
|
|
|
|
Respond ONLY with a JSON array (no markdown, no extra text), one object per ticker:
|
|
[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"recommendation":"buy|hold|avoid","reasoning":"thorough explanation citing findings"}}]
|
|
|
|
Rules:
|
|
- Include every ticker exactly once; ticker must be the uppercase symbol
|
|
- classification must be exactly one of: bullish, bearish, neutral
|
|
- recommendation must be exactly one of: buy, hold, avoid
|
|
- confidence must be an integer from 0 to 100
|
|
- reasoning should cite specific recent news or events you found
|
|
"""
|
|
|
|
VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
|
|
VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"}
|
|
|
|
|
|
def parse_recommendation(value: object) -> str | None:
|
|
"""Normalise a recommendation to buy/hold/avoid, or None if absent/invalid."""
|
|
v = str(value or "").strip().lower()
|
|
return v if v in VALID_RECOMMENDATIONS else None
|
|
|
|
|
|
class OpenAISentimentProvider:
|
|
"""Sentiment via the Responses API + web-search tool, with live grounding.
|
|
|
|
Works against any provider implementing the OpenAI Responses API. OpenAI
|
|
uses the ``web_search_preview`` tool; xAI Grok uses ``web_search`` at the
|
|
``https://api.x.ai/v1`` base URL.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
api_key: str,
|
|
model: str = "gpt-4o-mini",
|
|
base_url: str | None = None,
|
|
tool_type: str = "web_search_preview",
|
|
source: str = "openai",
|
|
) -> None:
|
|
if not api_key:
|
|
raise ProviderError(f"{source} API key is required")
|
|
http_kwargs: dict = {}
|
|
if _CA_BUNDLE and Path(_CA_BUNDLE).exists():
|
|
http_kwargs["verify"] = _CA_BUNDLE
|
|
http_client = httpx.AsyncClient(**http_kwargs)
|
|
client_kwargs: dict = {"api_key": api_key, "http_client": http_client}
|
|
if base_url:
|
|
client_kwargs["base_url"] = base_url
|
|
self._client = AsyncOpenAI(**client_kwargs)
|
|
self._model = model
|
|
self._tool_type = tool_type
|
|
self._source = source
|
|
|
|
@staticmethod
|
|
def _extract_raw_text(response: object, ticker_context: str) -> str:
|
|
raw_text = ""
|
|
for item in response.output:
|
|
if item.type == "message" and item.content:
|
|
for block in item.content:
|
|
if hasattr(block, "text") and block.text:
|
|
raw_text = block.text
|
|
break
|
|
if raw_text:
|
|
break
|
|
|
|
if not raw_text:
|
|
raise ProviderError(f"No text output from OpenAI for {ticker_context}")
|
|
|
|
clean = raw_text.strip()
|
|
if clean.startswith("```"):
|
|
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
|
|
if clean.endswith("```"):
|
|
clean = clean[:-3]
|
|
return clean.strip()
|
|
|
|
def _normalize_single_result(self, parsed: dict, ticker: str, citations: list[dict[str, str]]) -> SentimentData:
|
|
classification = str(parsed.get("classification", "")).lower()
|
|
if classification not in VALID_CLASSIFICATIONS:
|
|
raise ProviderError(
|
|
f"Invalid classification '{classification}' from {self._source} for {ticker}"
|
|
)
|
|
|
|
confidence = int(parsed.get("confidence", 50))
|
|
confidence = max(0, min(100, confidence))
|
|
reasoning = str(parsed.get("reasoning", ""))
|
|
|
|
if reasoning:
|
|
logger.info(
|
|
"%s sentiment for %s: %s (confidence=%d) — %s",
|
|
self._source,
|
|
ticker,
|
|
classification,
|
|
confidence,
|
|
reasoning,
|
|
)
|
|
|
|
return SentimentData(
|
|
ticker=ticker,
|
|
classification=classification,
|
|
confidence=confidence,
|
|
source=self._source,
|
|
timestamp=datetime.now(timezone.utc),
|
|
reasoning=reasoning,
|
|
citations=citations,
|
|
recommendation=parse_recommendation(parsed.get("recommendation")),
|
|
)
|
|
|
|
async def fetch_sentiment(self, ticker: str) -> SentimentData:
|
|
"""Use the Responses API with web_search_preview to get live sentiment."""
|
|
try:
|
|
response = await self._client.responses.create(
|
|
model=self._model,
|
|
tools=[{"type": self._tool_type}],
|
|
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
|
|
input=_SENTIMENT_PROMPT.format(ticker=ticker),
|
|
)
|
|
clean = self._extract_raw_text(response, ticker)
|
|
logger.debug("OpenAI raw response for %s: %s", ticker, clean)
|
|
parsed = json.loads(clean)
|
|
|
|
# Extract url_citation annotations from response output
|
|
citations: list[dict[str, str]] = []
|
|
for item in response.output:
|
|
if item.type == "message" and item.content:
|
|
for block in item.content:
|
|
if hasattr(block, "annotations") and block.annotations:
|
|
for annotation in block.annotations:
|
|
if getattr(annotation, "type", None) == "url_citation":
|
|
citations.append({
|
|
"url": getattr(annotation, "url", ""),
|
|
"title": getattr(annotation, "title", ""),
|
|
})
|
|
return self._normalize_single_result(parsed, ticker, citations)
|
|
|
|
except json.JSONDecodeError as exc:
|
|
logger.error("Failed to parse OpenAI JSON for %s: %s", ticker, exc)
|
|
raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc
|
|
except ProviderError:
|
|
raise
|
|
except Exception as exc:
|
|
msg = str(exc).lower()
|
|
if "429" in msg or "rate" in msg or "quota" in msg:
|
|
raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc
|
|
logger.error("OpenAI provider error for %s: %s", ticker, exc)
|
|
raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc
|
|
|
|
async def fetch_sentiment_batch(self, tickers: list[str]) -> dict[str, SentimentData]:
|
|
"""Fetch sentiment for multiple tickers in one OpenAI request.
|
|
|
|
Returns a map keyed by uppercase ticker symbol. Invalid/missing rows are skipped.
|
|
"""
|
|
normalized = [t.strip().upper() for t in tickers if t and t.strip()]
|
|
if not normalized:
|
|
return {}
|
|
|
|
ticker_context = ",".join(normalized)
|
|
try:
|
|
response = await self._client.responses.create(
|
|
model=self._model,
|
|
tools=[{"type": self._tool_type}],
|
|
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
|
|
input=_SENTIMENT_BATCH_PROMPT.format(tickers_csv=", ".join(normalized)),
|
|
)
|
|
clean = self._extract_raw_text(response, ticker_context)
|
|
logger.debug("OpenAI batch raw response for %s: %s", ticker_context, clean)
|
|
parsed = json.loads(clean)
|
|
if not isinstance(parsed, list):
|
|
raise ProviderError("Batch sentiment response must be a JSON array")
|
|
|
|
out: dict[str, SentimentData] = {}
|
|
requested = set(normalized)
|
|
for row in parsed:
|
|
if not isinstance(row, dict):
|
|
continue
|
|
symbol = str(row.get("ticker", "")).strip().upper()
|
|
if symbol not in requested:
|
|
continue
|
|
try:
|
|
out[symbol] = self._normalize_single_result(row, symbol, citations=[])
|
|
except Exception:
|
|
continue
|
|
return out
|
|
except json.JSONDecodeError as exc:
|
|
raise ProviderError(f"Invalid batch JSON from OpenAI for {ticker_context}") from exc
|
|
except ProviderError:
|
|
raise
|
|
except Exception as exc:
|
|
msg = str(exc).lower()
|
|
if "429" in msg or "rate" in msg or "quota" in msg:
|
|
raise RateLimitError(f"OpenAI rate limit hit for batch {ticker_context}") from exc
|
|
raise ProviderError(f"OpenAI batch provider error for {ticker_context}: {exc}") from exc
|