Files
signal-platform/app/providers/openai_sentiment.py
T
dennisthiessen e5166ed668
Deploy / lint (push) Successful in 6s
Deploy / test (push) Successful in 34s
Deploy / deploy (push) Successful in 21s
sentiment: LLM buy/hold/avoid + full analysis, and search-budget scoping
Richer LLM output (same grounded call, ~no extra cost):
- All providers now also return a recommendation (buy/hold/avoid) and a thorough
  reasoning paragraph; Gemini now actually captures reasoning + grounding
  citations (it was dropping them). Stored on sentiment_scores (migration 008),
  exposed in the API; display-only — NOT fed into the composite/EV.
- Ticker Sentiment panel shows an "LLM view" badge and a "Full analysis & sources"
  expander with the complete reasoning + citations.

Search-budget scoping (Gemini grounding free tier = 5000/mo):
- collect_sentiment now targets only watchlist + open paper trades + top-N by
  composite, skips tickers refreshed within sentiment_fresh_hours (72h), and caps
  per run (sentiment_max_per_run). Once the relevant set is fresh, runs spend 0
  searches until it ages out — bounding monthly usage well under the free tier.
- Widened sentiment lookback to 7d (scoring + display) so sparser collection
  still feeds the dimension score.

Deploy: alembic upgrade (sentiment_scores.recommendation). Switch provider to
Gemini Flash in Admin for the cost win (grounded, cheapest).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 16:34:19 +02:00

234 lines
9.7 KiB
Python

"""OpenAI sentiment provider using the Responses API with web search."""
from __future__ import annotations
import json
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
import httpx
from openai import AsyncOpenAI
from app.exceptions import ProviderError, RateLimitError
from app.providers.protocol import SentimentData
logger = logging.getLogger(__name__)
_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
_SENTIMENT_PROMPT = """\
Search the web for the latest news, analyst ratings/opinions, and retail/social \
discussion (e.g. Reddit, StockTwits) about the stock ticker {ticker} from roughly \
the past 1-2 weeks.
Assess (1) the current market sentiment and (2) whether BUYING here looks advisable now.
Respond ONLY with a JSON object (no markdown, no extra text):
{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "recommendation": "<buy|hold|avoid>", "reasoning": "<a thorough paragraph citing specific analyst views, news, and retail sentiment you found, and what drives the recommendation>"}}
Rules:
- classification = overall mood/tone of the coverage (bullish, bearish, neutral)
- recommendation = actionable view on buying at the current price (buy, hold, avoid)
- confidence must be an integer from 0 to 100
- reasoning should be several sentences citing specific, recent findings
"""
_SENTIMENT_BATCH_PROMPT = """\
Search the web for the latest news, analyst ratings/opinions, and retail/social \
discussion about each stock ticker from roughly the past 1-2 weeks.
Tickers:
{tickers_csv}
Respond ONLY with a JSON array (no markdown, no extra text), one object per ticker:
[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"recommendation":"buy|hold|avoid","reasoning":"thorough explanation citing findings"}}]
Rules:
- Include every ticker exactly once; ticker must be the uppercase symbol
- classification must be exactly one of: bullish, bearish, neutral
- recommendation must be exactly one of: buy, hold, avoid
- confidence must be an integer from 0 to 100
- reasoning should cite specific recent news or events you found
"""
VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
VALID_RECOMMENDATIONS = {"buy", "hold", "avoid"}
def parse_recommendation(value: object) -> str | None:
"""Normalise a recommendation to buy/hold/avoid, or None if absent/invalid."""
v = str(value or "").strip().lower()
return v if v in VALID_RECOMMENDATIONS else None
class OpenAISentimentProvider:
"""Sentiment via the Responses API + web-search tool, with live grounding.
Works against any provider implementing the OpenAI Responses API. OpenAI
uses the ``web_search_preview`` tool; xAI Grok uses ``web_search`` at the
``https://api.x.ai/v1`` base URL.
"""
def __init__(
self,
api_key: str,
model: str = "gpt-4o-mini",
base_url: str | None = None,
tool_type: str = "web_search_preview",
source: str = "openai",
) -> None:
if not api_key:
raise ProviderError(f"{source} API key is required")
http_kwargs: dict = {}
if _CA_BUNDLE and Path(_CA_BUNDLE).exists():
http_kwargs["verify"] = _CA_BUNDLE
http_client = httpx.AsyncClient(**http_kwargs)
client_kwargs: dict = {"api_key": api_key, "http_client": http_client}
if base_url:
client_kwargs["base_url"] = base_url
self._client = AsyncOpenAI(**client_kwargs)
self._model = model
self._tool_type = tool_type
self._source = source
@staticmethod
def _extract_raw_text(response: object, ticker_context: str) -> str:
raw_text = ""
for item in response.output:
if item.type == "message" and item.content:
for block in item.content:
if hasattr(block, "text") and block.text:
raw_text = block.text
break
if raw_text:
break
if not raw_text:
raise ProviderError(f"No text output from OpenAI for {ticker_context}")
clean = raw_text.strip()
if clean.startswith("```"):
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
if clean.endswith("```"):
clean = clean[:-3]
return clean.strip()
def _normalize_single_result(self, parsed: dict, ticker: str, citations: list[dict[str, str]]) -> SentimentData:
classification = str(parsed.get("classification", "")).lower()
if classification not in VALID_CLASSIFICATIONS:
raise ProviderError(
f"Invalid classification '{classification}' from {self._source} for {ticker}"
)
confidence = int(parsed.get("confidence", 50))
confidence = max(0, min(100, confidence))
reasoning = str(parsed.get("reasoning", ""))
if reasoning:
logger.info(
"%s sentiment for %s: %s (confidence=%d) — %s",
self._source,
ticker,
classification,
confidence,
reasoning,
)
return SentimentData(
ticker=ticker,
classification=classification,
confidence=confidence,
source=self._source,
timestamp=datetime.now(timezone.utc),
reasoning=reasoning,
citations=citations,
recommendation=parse_recommendation(parsed.get("recommendation")),
)
async def fetch_sentiment(self, ticker: str) -> SentimentData:
"""Use the Responses API with web_search_preview to get live sentiment."""
try:
response = await self._client.responses.create(
model=self._model,
tools=[{"type": self._tool_type}],
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
input=_SENTIMENT_PROMPT.format(ticker=ticker),
)
clean = self._extract_raw_text(response, ticker)
logger.debug("OpenAI raw response for %s: %s", ticker, clean)
parsed = json.loads(clean)
# Extract url_citation annotations from response output
citations: list[dict[str, str]] = []
for item in response.output:
if item.type == "message" and item.content:
for block in item.content:
if hasattr(block, "annotations") and block.annotations:
for annotation in block.annotations:
if getattr(annotation, "type", None) == "url_citation":
citations.append({
"url": getattr(annotation, "url", ""),
"title": getattr(annotation, "title", ""),
})
return self._normalize_single_result(parsed, ticker, citations)
except json.JSONDecodeError as exc:
logger.error("Failed to parse OpenAI JSON for %s: %s", ticker, exc)
raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc
except ProviderError:
raise
except Exception as exc:
msg = str(exc).lower()
if "429" in msg or "rate" in msg or "quota" in msg:
raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc
logger.error("OpenAI provider error for %s: %s", ticker, exc)
raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc
async def fetch_sentiment_batch(self, tickers: list[str]) -> dict[str, SentimentData]:
"""Fetch sentiment for multiple tickers in one OpenAI request.
Returns a map keyed by uppercase ticker symbol. Invalid/missing rows are skipped.
"""
normalized = [t.strip().upper() for t in tickers if t and t.strip()]
if not normalized:
return {}
ticker_context = ",".join(normalized)
try:
response = await self._client.responses.create(
model=self._model,
tools=[{"type": self._tool_type}],
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
input=_SENTIMENT_BATCH_PROMPT.format(tickers_csv=", ".join(normalized)),
)
clean = self._extract_raw_text(response, ticker_context)
logger.debug("OpenAI batch raw response for %s: %s", ticker_context, clean)
parsed = json.loads(clean)
if not isinstance(parsed, list):
raise ProviderError("Batch sentiment response must be a JSON array")
out: dict[str, SentimentData] = {}
requested = set(normalized)
for row in parsed:
if not isinstance(row, dict):
continue
symbol = str(row.get("ticker", "")).strip().upper()
if symbol not in requested:
continue
try:
out[symbol] = self._normalize_single_result(row, symbol, citations=[])
except Exception:
continue
return out
except json.JSONDecodeError as exc:
raise ProviderError(f"Invalid batch JSON from OpenAI for {ticker_context}") from exc
except ProviderError:
raise
except Exception as exc:
msg = str(exc).lower()
if "429" in msg or "rate" in msg or "quota" in msg:
raise RateLimitError(f"OpenAI rate limit hit for batch {ticker_context}") from exc
raise ProviderError(f"OpenAI batch provider error for {ticker_context}: {exc}") from exc