"""OpenAI sentiment provider using the Responses API with web search.""" from __future__ import annotations import json import logging import os from datetime import datetime, timezone from pathlib import Path import httpx from openai import AsyncOpenAI from app.exceptions import ProviderError, RateLimitError from app.providers.protocol import SentimentData logger = logging.getLogger(__name__) _CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "") _SENTIMENT_PROMPT = """\ Search the web for the LATEST news, analyst opinions, and market developments \ about the stock ticker {ticker} from the past 24-48 hours. Based on your web search findings, analyze the CURRENT market sentiment. Respond ONLY with a JSON object in this exact format (no markdown, no extra text): {{"classification": "", "confidence": <0-100>, "reasoning": ""}} Rules: - classification must be exactly one of: bullish, bearish, neutral - confidence must be an integer from 0 to 100 - reasoning should cite specific recent news or events you found """ VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"} class OpenAISentimentProvider: """Fetches sentiment analysis from OpenAI Responses API with live web search.""" def __init__(self, api_key: str, model: str = "gpt-4o-mini") -> None: if not api_key: raise ProviderError("OpenAI API key is required") http_kwargs: dict = {} if _CA_BUNDLE and Path(_CA_BUNDLE).exists(): http_kwargs["verify"] = _CA_BUNDLE http_client = httpx.AsyncClient(**http_kwargs) self._client = AsyncOpenAI(api_key=api_key, http_client=http_client) self._model = model async def fetch_sentiment(self, ticker: str) -> SentimentData: """Use the Responses API with web_search_preview to get live sentiment.""" try: response = await self._client.responses.create( model=self._model, tools=[{"type": "web_search_preview"}], instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.", input=_SENTIMENT_PROMPT.format(ticker=ticker), ) # Extract text from the ResponseOutputMessage in the output raw_text = "" for item in response.output: if item.type == "message" and item.content: for block in item.content: if hasattr(block, "text") and block.text: raw_text = block.text break if raw_text: break if not raw_text: raise ProviderError(f"No text output from OpenAI for {ticker}") raw_text = raw_text.strip() logger.debug("OpenAI raw response for %s: %s", ticker, raw_text) # Strip markdown fences if present clean = raw_text if clean.startswith("```"): clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:] if clean.endswith("```"): clean = clean[:-3] clean = clean.strip() parsed = json.loads(clean) classification = parsed.get("classification", "").lower() if classification not in VALID_CLASSIFICATIONS: raise ProviderError( f"Invalid classification '{classification}' from OpenAI for {ticker}" ) confidence = int(parsed.get("confidence", 50)) confidence = max(0, min(100, confidence)) reasoning = parsed.get("reasoning", "") if reasoning: logger.info("OpenAI sentiment for %s: %s (confidence=%d) — %s", ticker, classification, confidence, reasoning) # Extract url_citation annotations from response output citations: list[dict[str, str]] = [] for item in response.output: if item.type == "message" and item.content: for block in item.content: if hasattr(block, "annotations") and block.annotations: for annotation in block.annotations: if getattr(annotation, "type", None) == "url_citation": citations.append({ "url": getattr(annotation, "url", ""), "title": getattr(annotation, "title", ""), }) return SentimentData( ticker=ticker, classification=classification, confidence=confidence, source="openai", timestamp=datetime.now(timezone.utc), reasoning=reasoning, citations=citations, ) except json.JSONDecodeError as exc: logger.error("Failed to parse OpenAI JSON for %s: %s — raw: %s", ticker, exc, raw_text) raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc except ProviderError: raise except Exception as exc: msg = str(exc).lower() if "429" in msg or "rate" in msg or "quota" in msg: raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc logger.error("OpenAI provider error for %s: %s", ticker, exc) raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc