major update

2026-02-27 16:08:09 +01:00
parent 61ab24490d
commit 181cfe6588
71 changed files with 7647 additions and 281 deletions
--- a/app/providers/openai_sentiment.py
+++ b/app/providers/openai_sentiment.py
@@ -0,0 +1,136 @@
+"""OpenAI sentiment provider using the Responses API with web search."""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+import httpx
+from openai import AsyncOpenAI
+
+from app.exceptions import ProviderError, RateLimitError
+from app.providers.protocol import SentimentData
+
+logger = logging.getLogger(__name__)
+
+_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
+
+_SENTIMENT_PROMPT = """\
+Search the web for the LATEST news, analyst opinions, and market developments \
+about the stock ticker {ticker} from the past 24-48 hours.
+
+Based on your web search findings, analyze the CURRENT market sentiment.
+
+Respond ONLY with a JSON object in this exact format (no markdown, no extra text):
+{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "reasoning": "<brief explanation citing recent news>"}}
+
+Rules:
+- classification must be exactly one of: bullish, bearish, neutral
+- confidence must be an integer from 0 to 100
+- reasoning should cite specific recent news or events you found
+"""
+
+VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
+
+
+class OpenAISentimentProvider:
+    """Fetches sentiment analysis from OpenAI Responses API with live web search."""
+
+    def __init__(self, api_key: str, model: str = "gpt-4o-mini") -> None:
+        if not api_key:
+            raise ProviderError("OpenAI API key is required")
+        http_kwargs: dict = {}
+        if _CA_BUNDLE and Path(_CA_BUNDLE).exists():
+            http_kwargs["verify"] = _CA_BUNDLE
+        http_client = httpx.AsyncClient(**http_kwargs)
+        self._client = AsyncOpenAI(api_key=api_key, http_client=http_client)
+        self._model = model
+
+    async def fetch_sentiment(self, ticker: str) -> SentimentData:
+        """Use the Responses API with web_search_preview to get live sentiment."""
+        try:
+            response = await self._client.responses.create(
+                model=self._model,
+                tools=[{"type": "web_search_preview"}],
+                instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
+                input=_SENTIMENT_PROMPT.format(ticker=ticker),
+            )
+
+            # Extract text from the ResponseOutputMessage in the output
+            raw_text = ""
+            for item in response.output:
+                if item.type == "message" and item.content:
+                    for block in item.content:
+                        if hasattr(block, "text") and block.text:
+                            raw_text = block.text
+                            break
+                    if raw_text:
+                        break
+
+            if not raw_text:
+                raise ProviderError(f"No text output from OpenAI for {ticker}")
+
+            raw_text = raw_text.strip()
+            logger.debug("OpenAI raw response for %s: %s", ticker, raw_text)
+
+            # Strip markdown fences if present
+            clean = raw_text
+            if clean.startswith("```"):
+                clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
+            if clean.endswith("```"):
+                clean = clean[:-3]
+            clean = clean.strip()
+
+            parsed = json.loads(clean)
+
+            classification = parsed.get("classification", "").lower()
+            if classification not in VALID_CLASSIFICATIONS:
+                raise ProviderError(
+                    f"Invalid classification '{classification}' from OpenAI for {ticker}"
+                )
+
+            confidence = int(parsed.get("confidence", 50))
+            confidence = max(0, min(100, confidence))
+
+            reasoning = parsed.get("reasoning", "")
+            if reasoning:
+                logger.info("OpenAI sentiment for %s: %s (confidence=%d) — %s",
+                            ticker, classification, confidence, reasoning)
+
+            # Extract url_citation annotations from response output
+            citations: list[dict[str, str]] = []
+            for item in response.output:
+                if item.type == "message" and item.content:
+                    for block in item.content:
+                        if hasattr(block, "annotations") and block.annotations:
+                            for annotation in block.annotations:
+                                if getattr(annotation, "type", None) == "url_citation":
+                                    citations.append({
+                                        "url": getattr(annotation, "url", ""),
+                                        "title": getattr(annotation, "title", ""),
+                                    })
+
+            return SentimentData(
+                ticker=ticker,
+                classification=classification,
+                confidence=confidence,
+                source="openai",
+                timestamp=datetime.now(timezone.utc),
+                reasoning=reasoning,
+                citations=citations,
+            )
+
+        except json.JSONDecodeError as exc:
+            logger.error("Failed to parse OpenAI JSON for %s: %s — raw: %s", ticker, exc, raw_text)
+            raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc
+        except ProviderError:
+            raise
+        except Exception as exc:
+            msg = str(exc).lower()
+            if "429" in msg or "rate" in msg or "quota" in msg:
+                raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc
+            logger.error("OpenAI provider error for %s: %s", ticker, exc)
+            raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc