137 lines
5.5 KiB
Python
137 lines
5.5 KiB
Python
"""OpenAI sentiment provider using the Responses API with web search."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from openai import AsyncOpenAI
|
|
|
|
from app.exceptions import ProviderError, RateLimitError
|
|
from app.providers.protocol import SentimentData
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
|
|
|
|
_SENTIMENT_PROMPT = """\
|
|
Search the web for the LATEST news, analyst opinions, and market developments \
|
|
about the stock ticker {ticker} from the past 24-48 hours.
|
|
|
|
Based on your web search findings, analyze the CURRENT market sentiment.
|
|
|
|
Respond ONLY with a JSON object in this exact format (no markdown, no extra text):
|
|
{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "reasoning": "<brief explanation citing recent news>"}}
|
|
|
|
Rules:
|
|
- classification must be exactly one of: bullish, bearish, neutral
|
|
- confidence must be an integer from 0 to 100
|
|
- reasoning should cite specific recent news or events you found
|
|
"""
|
|
|
|
VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
|
|
|
|
|
|
class OpenAISentimentProvider:
|
|
"""Fetches sentiment analysis from OpenAI Responses API with live web search."""
|
|
|
|
def __init__(self, api_key: str, model: str = "gpt-4o-mini") -> None:
|
|
if not api_key:
|
|
raise ProviderError("OpenAI API key is required")
|
|
http_kwargs: dict = {}
|
|
if _CA_BUNDLE and Path(_CA_BUNDLE).exists():
|
|
http_kwargs["verify"] = _CA_BUNDLE
|
|
http_client = httpx.AsyncClient(**http_kwargs)
|
|
self._client = AsyncOpenAI(api_key=api_key, http_client=http_client)
|
|
self._model = model
|
|
|
|
async def fetch_sentiment(self, ticker: str) -> SentimentData:
|
|
"""Use the Responses API with web_search_preview to get live sentiment."""
|
|
try:
|
|
response = await self._client.responses.create(
|
|
model=self._model,
|
|
tools=[{"type": "web_search_preview"}],
|
|
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
|
|
input=_SENTIMENT_PROMPT.format(ticker=ticker),
|
|
)
|
|
|
|
# Extract text from the ResponseOutputMessage in the output
|
|
raw_text = ""
|
|
for item in response.output:
|
|
if item.type == "message" and item.content:
|
|
for block in item.content:
|
|
if hasattr(block, "text") and block.text:
|
|
raw_text = block.text
|
|
break
|
|
if raw_text:
|
|
break
|
|
|
|
if not raw_text:
|
|
raise ProviderError(f"No text output from OpenAI for {ticker}")
|
|
|
|
raw_text = raw_text.strip()
|
|
logger.debug("OpenAI raw response for %s: %s", ticker, raw_text)
|
|
|
|
# Strip markdown fences if present
|
|
clean = raw_text
|
|
if clean.startswith("```"):
|
|
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
|
|
if clean.endswith("```"):
|
|
clean = clean[:-3]
|
|
clean = clean.strip()
|
|
|
|
parsed = json.loads(clean)
|
|
|
|
classification = parsed.get("classification", "").lower()
|
|
if classification not in VALID_CLASSIFICATIONS:
|
|
raise ProviderError(
|
|
f"Invalid classification '{classification}' from OpenAI for {ticker}"
|
|
)
|
|
|
|
confidence = int(parsed.get("confidence", 50))
|
|
confidence = max(0, min(100, confidence))
|
|
|
|
reasoning = parsed.get("reasoning", "")
|
|
if reasoning:
|
|
logger.info("OpenAI sentiment for %s: %s (confidence=%d) — %s",
|
|
ticker, classification, confidence, reasoning)
|
|
|
|
# Extract url_citation annotations from response output
|
|
citations: list[dict[str, str]] = []
|
|
for item in response.output:
|
|
if item.type == "message" and item.content:
|
|
for block in item.content:
|
|
if hasattr(block, "annotations") and block.annotations:
|
|
for annotation in block.annotations:
|
|
if getattr(annotation, "type", None) == "url_citation":
|
|
citations.append({
|
|
"url": getattr(annotation, "url", ""),
|
|
"title": getattr(annotation, "title", ""),
|
|
})
|
|
|
|
return SentimentData(
|
|
ticker=ticker,
|
|
classification=classification,
|
|
confidence=confidence,
|
|
source="openai",
|
|
timestamp=datetime.now(timezone.utc),
|
|
reasoning=reasoning,
|
|
citations=citations,
|
|
)
|
|
|
|
except json.JSONDecodeError as exc:
|
|
logger.error("Failed to parse OpenAI JSON for %s: %s — raw: %s", ticker, exc, raw_text)
|
|
raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc
|
|
except ProviderError:
|
|
raise
|
|
except Exception as exc:
|
|
msg = str(exc).lower()
|
|
if "429" in msg or "rate" in msg or "quota" in msg:
|
|
raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc
|
|
logger.error("OpenAI provider error for %s: %s", ticker, exc)
|
|
raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc
|