major update

2026-02-27 16:08:09 +01:00
parent 61ab24490d
commit 181cfe6588
71 changed files with 7647 additions and 281 deletions
@@ -1,9 +1,15 @@
-"""Financial Modeling Prep (FMP) fundamentals provider using httpx."""
+"""Financial Modeling Prep (FMP) fundamentals provider using httpx.
+
+Uses the stable API endpoints (https://financialmodelingprep.com/stable/)
+which replaced the legacy /api/v3/ endpoints deprecated in Aug 2025.
+"""

 from __future__ import annotations

 import logging
+import os
 from datetime import datetime, timezone
+from pathlib import Path

 import httpx

@@ -12,7 +18,14 @@ from app.providers.protocol import FundamentalData

 logger = logging.getLogger(__name__)

-_FMP_BASE_URL = "https://financialmodelingprep.com/api/v3"
+_FMP_STABLE_URL = "https://financialmodelingprep.com/stable"
+
+# Resolve CA bundle for explicit httpx verify
+_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
+if not _CA_BUNDLE or not Path(_CA_BUNDLE).exists():
+    _CA_BUNDLE_PATH: str | bool = True  # use system default
+else:
+    _CA_BUNDLE_PATH = _CA_BUNDLE


 class FMPFundamentalProvider:
@@ -23,17 +36,54 @@ class FMPFundamentalProvider:
            raise ProviderError("FMP API key is required")
        self._api_key = api_key

-    async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
-        """Fetch P/E, revenue growth, earnings surprise, and market cap."""
-        try:
-            async with httpx.AsyncClient(timeout=30.0) as client:
-                profile = await self._fetch_profile(client, ticker)
-                earnings = await self._fetch_earnings_surprise(client, ticker)
+    # Mapping from FMP endpoint name to the FundamentalData field it populates
+    _ENDPOINT_FIELD_MAP: dict[str, str] = {
+        "ratios-ttm": "pe_ratio",
+        "financial-growth": "revenue_growth",
+        "earnings": "earnings_surprise",
+    }

-            pe_ratio = self._safe_float(profile.get("pe"))
-            revenue_growth = self._safe_float(profile.get("revenueGrowth"))
-            market_cap = self._safe_float(profile.get("mktCap"))
-            earnings_surprise = self._safe_float(earnings)
+    async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
+        """Fetch P/E, revenue growth, earnings surprise, and market cap.
+
+        Fetches from multiple stable endpoints. If a supplementary endpoint
+        (ratios, growth, earnings) returns 402 (paid tier), we gracefully
+        degrade and return partial data rather than failing entirely, and
+        record the affected field in ``unavailable_fields``.
+        """
+        try:
+            endpoints_402: set[str] = set()
+
+            async with httpx.AsyncClient(timeout=30.0, verify=_CA_BUNDLE_PATH) as client:
+                params = {"symbol": ticker, "apikey": self._api_key}
+
+                # Profile is the primary source — must succeed
+                profile = await self._fetch_json(client, "profile", params, ticker)
+
+                # Supplementary sources — degrade gracefully on 402
+                ratios, was_402 = await self._fetch_json_optional(client, "ratios-ttm", params, ticker)
+                if was_402:
+                    endpoints_402.add("ratios-ttm")
+
+                growth, was_402 = await self._fetch_json_optional(client, "financial-growth", params, ticker)
+                if was_402:
+                    endpoints_402.add("financial-growth")
+
+                earnings, was_402 = await self._fetch_json_optional(client, "earnings", params, ticker)
+                if was_402:
+                    endpoints_402.add("earnings")
+
+            pe_ratio = self._safe_float(ratios.get("priceToEarningsRatioTTM"))
+            revenue_growth = self._safe_float(growth.get("revenueGrowth"))
+            market_cap = self._safe_float(profile.get("marketCap"))
+            earnings_surprise = self._compute_earnings_surprise(earnings)
+
+            # Build unavailable_fields from 402 endpoints
+            unavailable_fields: dict[str, str] = {
+                self._ENDPOINT_FIELD_MAP[ep]: "requires paid plan"
+                for ep in endpoints_402
+                if ep in self._ENDPOINT_FIELD_MAP
+            }

            return FundamentalData(
                ticker=ticker,
@@ -42,6 +92,7 @@ class FMPFundamentalProvider:
                earnings_surprise=earnings_surprise,
                market_cap=market_cap,
                fetched_at=datetime.now(timezone.utc),
+                unavailable_fields=unavailable_fields,
            )

        except (ProviderError, RateLimitError):
@@ -50,27 +101,52 @@ class FMPFundamentalProvider:
            logger.error("FMP provider error for %s: %s", ticker, exc)
            raise ProviderError(f"FMP provider error for {ticker}: {exc}") from exc

-    async def _fetch_profile(self, client: httpx.AsyncClient, ticker: str) -> dict:
-        """Fetch company profile (P/E, revenue growth, market cap)."""
-        url = f"{_FMP_BASE_URL}/profile/{ticker}"
-        resp = await client.get(url, params={"apikey": self._api_key})
-        self._check_response(resp, ticker, "profile")
+    async def _fetch_json(
+        self,
+        client: httpx.AsyncClient,
+        endpoint: str,
+        params: dict,
+        ticker: str,
+    ) -> dict:
+        """Fetch a stable endpoint and return the first item (or empty dict)."""
+        url = f"{_FMP_STABLE_URL}/{endpoint}"
+        resp = await client.get(url, params=params)
+        self._check_response(resp, ticker, endpoint)
        data = resp.json()
-        if isinstance(data, list) and data:
-            return data[0]
+        if isinstance(data, list):
+            return data[0] if data else {}
        return data if isinstance(data, dict) else {}

-    async def _fetch_earnings_surprise(
-        self, client: httpx.AsyncClient, ticker: str
-    ) -> float | None:
-        """Fetch the most recent earnings surprise percentage."""
-        url = f"{_FMP_BASE_URL}/earnings-surprises/{ticker}"
-        resp = await client.get(url, params={"apikey": self._api_key})
-        self._check_response(resp, ticker, "earnings-surprises")
+    async def _fetch_json_optional(
+        self,
+        client: httpx.AsyncClient,
+        endpoint: str,
+        params: dict,
+        ticker: str,
+    ) -> tuple[dict, bool]:
+        """Fetch a stable endpoint, returning ``({}, True)`` on 402 (paid tier).
+
+        Returns a tuple of (data_dict, was_402) so callers can track which
+        endpoints required a paid plan.
+        """
+        url = f"{_FMP_STABLE_URL}/{endpoint}"
+        resp = await client.get(url, params=params)
+        if resp.status_code == 402:
+            logger.warning("FMP %s requires paid plan — skipping for %s", endpoint, ticker)
+            return {}, True
+        self._check_response(resp, ticker, endpoint)
        data = resp.json()
-        if isinstance(data, list) and data:
-            return self._safe_float(data[0].get("actualEarningResult"))
-        return None
+        if isinstance(data, list):
+            return (data[0] if data else {}, False)
+        return (data if isinstance(data, dict) else {}, False)
+
+    def _compute_earnings_surprise(self, earnings_data: dict) -> float | None:
+        """Compute earnings surprise % from the most recent actual vs estimated EPS."""
+        actual = self._safe_float(earnings_data.get("epsActual"))
+        estimated = self._safe_float(earnings_data.get("epsEstimated"))
+        if actual is None or estimated is None or estimated == 0:
+            return None
+        return ((actual - estimated) / abs(estimated)) * 100

    def _check_response(
        self, resp: httpx.Response, ticker: str, endpoint: str
@@ -78,6 +154,10 @@ class FMPFundamentalProvider:
        """Raise appropriate errors for non-200 responses."""
        if resp.status_code == 429:
            raise RateLimitError(f"FMP rate limit hit for {ticker} ({endpoint})")
+        if resp.status_code == 403:
+            raise ProviderError(
+                f"FMP {endpoint} access denied for {ticker}: HTTP 403 — check API key validity and plan tier"
+            )
        if resp.status_code != 200:
            raise ProviderError(
                f"FMP {endpoint} error for {ticker}: HTTP {resp.status_code}"
@@ -4,7 +4,10 @@ from __future__ import annotations

 import json
 import logging
+import os
+import ssl
 from datetime import datetime, timezone
+from pathlib import Path

 from google import genai
 from google.genai import types
@@ -14,6 +17,19 @@ from app.providers.protocol import SentimentData

 logger = logging.getLogger(__name__)

+# Ensure aiohttp's cached SSL context includes our corporate CA bundle.
+# aiohttp creates _SSL_CONTEXT_VERIFIED at import time; we must patch it
+# after import so that google-genai's aiohttp session trusts our proxy CA.
+_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
+if _CA_BUNDLE and Path(_CA_BUNDLE).exists():
+    try:
+        import aiohttp.connector as _aio_conn
+        if hasattr(_aio_conn, "_SSL_CONTEXT_VERIFIED") and _aio_conn._SSL_CONTEXT_VERIFIED is not None:
+            _aio_conn._SSL_CONTEXT_VERIFIED.load_verify_locations(cafile=_CA_BUNDLE)
+            logger.debug("Patched aiohttp _SSL_CONTEXT_VERIFIED with %s", _CA_BUNDLE)
+    except Exception:
+        logger.warning("Could not patch aiohttp SSL context", exc_info=True)
+
 _SENTIMENT_PROMPT = """\
 Analyze the current market sentiment for the stock ticker {ticker}.
 Search the web for recent news articles, social media mentions, and analyst opinions.
@@ -84,7 +100,7 @@ class GeminiSentimentProvider:
            raise
        except Exception as exc:
            msg = str(exc).lower()
-            if "rate" in msg or "quota" in msg or "429" in msg:
+            if "429" in msg or "resource exhausted" in msg or "quota" in msg or ("rate" in msg and "limit" in msg):
                raise RateLimitError(f"Gemini rate limit hit for {ticker}") from exc
            logger.error("Gemini provider error for %s: %s", ticker, exc)
            raise ProviderError(f"Gemini provider error for {ticker}: {exc}") from exc
@@ -0,0 +1,136 @@
+"""OpenAI sentiment provider using the Responses API with web search."""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+import httpx
+from openai import AsyncOpenAI
+
+from app.exceptions import ProviderError, RateLimitError
+from app.providers.protocol import SentimentData
+
+logger = logging.getLogger(__name__)
+
+_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
+
+_SENTIMENT_PROMPT = """\
+Search the web for the LATEST news, analyst opinions, and market developments \
+about the stock ticker {ticker} from the past 24-48 hours.
+
+Based on your web search findings, analyze the CURRENT market sentiment.
+
+Respond ONLY with a JSON object in this exact format (no markdown, no extra text):
+{{"classification": "<bullish|bearish|neutral>", "confidence": <0-100>, "reasoning": "<brief explanation citing recent news>"}}
+
+Rules:
+- classification must be exactly one of: bullish, bearish, neutral
+- confidence must be an integer from 0 to 100
+- reasoning should cite specific recent news or events you found
+"""
+
+VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
+
+
+class OpenAISentimentProvider:
+    """Fetches sentiment analysis from OpenAI Responses API with live web search."""
+
+    def __init__(self, api_key: str, model: str = "gpt-4o-mini") -> None:
+        if not api_key:
+            raise ProviderError("OpenAI API key is required")
+        http_kwargs: dict = {}
+        if _CA_BUNDLE and Path(_CA_BUNDLE).exists():
+            http_kwargs["verify"] = _CA_BUNDLE
+        http_client = httpx.AsyncClient(**http_kwargs)
+        self._client = AsyncOpenAI(api_key=api_key, http_client=http_client)
+        self._model = model
+
+    async def fetch_sentiment(self, ticker: str) -> SentimentData:
+        """Use the Responses API with web_search_preview to get live sentiment."""
+        try:
+            response = await self._client.responses.create(
+                model=self._model,
+                tools=[{"type": "web_search_preview"}],
+                instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
+                input=_SENTIMENT_PROMPT.format(ticker=ticker),
+            )
+
+            # Extract text from the ResponseOutputMessage in the output
+            raw_text = ""
+            for item in response.output:
+                if item.type == "message" and item.content:
+                    for block in item.content:
+                        if hasattr(block, "text") and block.text:
+                            raw_text = block.text
+                            break
+                    if raw_text:
+                        break
+
+            if not raw_text:
+                raise ProviderError(f"No text output from OpenAI for {ticker}")
+
+            raw_text = raw_text.strip()
+            logger.debug("OpenAI raw response for %s: %s", ticker, raw_text)
+
+            # Strip markdown fences if present
+            clean = raw_text
+            if clean.startswith("```"):
+                clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
+            if clean.endswith("```"):
+                clean = clean[:-3]
+            clean = clean.strip()
+
+            parsed = json.loads(clean)
+
+            classification = parsed.get("classification", "").lower()
+            if classification not in VALID_CLASSIFICATIONS:
+                raise ProviderError(
+                    f"Invalid classification '{classification}' from OpenAI for {ticker}"
+                )
+
+            confidence = int(parsed.get("confidence", 50))
+            confidence = max(0, min(100, confidence))
+
+            reasoning = parsed.get("reasoning", "")
+            if reasoning:
+                logger.info("OpenAI sentiment for %s: %s (confidence=%d) — %s",
+                            ticker, classification, confidence, reasoning)
+
+            # Extract url_citation annotations from response output
+            citations: list[dict[str, str]] = []
+            for item in response.output:
+                if item.type == "message" and item.content:
+                    for block in item.content:
+                        if hasattr(block, "annotations") and block.annotations:
+                            for annotation in block.annotations:
+                                if getattr(annotation, "type", None) == "url_citation":
+                                    citations.append({
+                                        "url": getattr(annotation, "url", ""),
+                                        "title": getattr(annotation, "title", ""),
+                                    })
+
+            return SentimentData(
+                ticker=ticker,
+                classification=classification,
+                confidence=confidence,
+                source="openai",
+                timestamp=datetime.now(timezone.utc),
+                reasoning=reasoning,
+                citations=citations,
+            )
+
+        except json.JSONDecodeError as exc:
+            logger.error("Failed to parse OpenAI JSON for %s: %s — raw: %s", ticker, exc, raw_text)
+            raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc
+        except ProviderError:
+            raise
+        except Exception as exc:
+            msg = str(exc).lower()
+            if "429" in msg or "rate" in msg or "quota" in msg:
+                raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc
+            logger.error("OpenAI provider error for %s: %s", ticker, exc)
+            raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc
@@ -7,7 +7,7 @@ transfer data between providers and the service layer.

 from __future__ import annotations

-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import date, datetime
 from typing import Protocol

@@ -39,6 +39,8 @@ class SentimentData:
    confidence: int  # 0-100
    source: str
    timestamp: datetime
+    reasoning: str = ""
+    citations: list[dict[str, str]] = field(default_factory=list)  # [{"url": ..., "title": ...}]


@dataclass(frozen=True, slots=True)
@@ -51,6 +53,7 @@ class FundamentalData:
    earnings_surprise: float | None
    market_cap: float | None
    fetched_at: datetime
+    unavailable_fields: dict[str, str] = field(default_factory=dict)


 # ---------------------------------------------------------------------------