Big refactoring

2026-03-03 15:20:18 +01:00
parent 181cfe6588
commit 0a011d4ce9
55 changed files with 6898 additions and 544 deletions
@@ -0,0 +1,253 @@
+"""Chained fundamentals provider with fallback adapters.
+
+Order:
+1) FMP (if configured)
+2) Finnhub (if configured)
+3) Alpha Vantage (if configured)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+import httpx
+
+from app.config import settings
+from app.exceptions import ProviderError, RateLimitError
+from app.providers.fmp import FMPFundamentalProvider
+from app.providers.protocol import FundamentalData, FundamentalProvider
+
+logger = logging.getLogger(__name__)
+
+_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
+if not _CA_BUNDLE or not Path(_CA_BUNDLE).exists():
+    _CA_BUNDLE_PATH: str | bool = True
+else:
+    _CA_BUNDLE_PATH = _CA_BUNDLE
+
+
+def _safe_float(value: object) -> float | None:
+    if value is None:
+        return None
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return None
+
+
+class FinnhubFundamentalProvider:
+    """Fundamentals provider backed by Finnhub free endpoints."""
+
+    def __init__(self, api_key: str) -> None:
+        if not api_key:
+            raise ProviderError("Finnhub API key is required")
+        self._api_key = api_key
+        self._base_url = "https://finnhub.io/api/v1"
+
+    async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
+        unavailable: dict[str, str] = {}
+
+        async with httpx.AsyncClient(timeout=30.0, verify=_CA_BUNDLE_PATH) as client:
+            profile_resp = await client.get(
+                f"{self._base_url}/stock/profile2",
+                params={"symbol": ticker, "token": self._api_key},
+            )
+            metric_resp = await client.get(
+                f"{self._base_url}/stock/metric",
+                params={"symbol": ticker, "metric": "all", "token": self._api_key},
+            )
+            earnings_resp = await client.get(
+                f"{self._base_url}/stock/earnings",
+                params={"symbol": ticker, "limit": 1, "token": self._api_key},
+            )
+
+        for resp, endpoint in (
+            (profile_resp, "profile2"),
+            (metric_resp, "stock/metric"),
+            (earnings_resp, "stock/earnings"),
+        ):
+            if resp.status_code == 429:
+                raise RateLimitError(f"Finnhub rate limit hit for {ticker} ({endpoint})")
+            if resp.status_code in (401, 403):
+                raise ProviderError(f"Finnhub access denied for {ticker} ({endpoint}): HTTP {resp.status_code}")
+            if resp.status_code != 200:
+                raise ProviderError(f"Finnhub error for {ticker} ({endpoint}): HTTP {resp.status_code}")
+
+        profile_payload = profile_resp.json() if profile_resp.text else {}
+        metric_payload = metric_resp.json() if metric_resp.text else {}
+        earnings_payload = earnings_resp.json() if earnings_resp.text else []
+
+        metrics = metric_payload.get("metric", {}) if isinstance(metric_payload, dict) else {}
+        market_cap = _safe_float((profile_payload or {}).get("marketCapitalization"))
+        pe_ratio = _safe_float(metrics.get("peTTM") or metrics.get("peNormalizedAnnual"))
+        revenue_growth = _safe_float(metrics.get("revenueGrowthTTMYoy") or metrics.get("revenueGrowth5Y"))
+
+        earnings_surprise = None
+        if isinstance(earnings_payload, list) and earnings_payload:
+            first = earnings_payload[0] if isinstance(earnings_payload[0], dict) else {}
+            earnings_surprise = _safe_float(first.get("surprisePercent"))
+
+        if pe_ratio is None:
+            unavailable["pe_ratio"] = "not available from provider payload"
+        if revenue_growth is None:
+            unavailable["revenue_growth"] = "not available from provider payload"
+        if earnings_surprise is None:
+            unavailable["earnings_surprise"] = "not available from provider payload"
+        if market_cap is None:
+            unavailable["market_cap"] = "not available from provider payload"
+
+        return FundamentalData(
+            ticker=ticker,
+            pe_ratio=pe_ratio,
+            revenue_growth=revenue_growth,
+            earnings_surprise=earnings_surprise,
+            market_cap=market_cap,
+            fetched_at=datetime.now(timezone.utc),
+            unavailable_fields=unavailable,
+        )
+
+
+class AlphaVantageFundamentalProvider:
+    """Fundamentals provider backed by Alpha Vantage free endpoints."""
+
+    def __init__(self, api_key: str) -> None:
+        if not api_key:
+            raise ProviderError("Alpha Vantage API key is required")
+        self._api_key = api_key
+        self._base_url = "https://www.alphavantage.co/query"
+
+    async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
+        unavailable: dict[str, str] = {}
+
+        async with httpx.AsyncClient(timeout=30.0, verify=_CA_BUNDLE_PATH) as client:
+            overview_resp = await client.get(
+                self._base_url,
+                params={"function": "OVERVIEW", "symbol": ticker, "apikey": self._api_key},
+            )
+            earnings_resp = await client.get(
+                self._base_url,
+                params={"function": "EARNINGS", "symbol": ticker, "apikey": self._api_key},
+            )
+            income_resp = await client.get(
+                self._base_url,
+                params={"function": "INCOME_STATEMENT", "symbol": ticker, "apikey": self._api_key},
+            )
+
+        for resp, endpoint in (
+            (overview_resp, "OVERVIEW"),
+            (earnings_resp, "EARNINGS"),
+            (income_resp, "INCOME_STATEMENT"),
+        ):
+            if resp.status_code == 429:
+                raise RateLimitError(f"Alpha Vantage rate limit hit for {ticker} ({endpoint})")
+            if resp.status_code != 200:
+                raise ProviderError(f"Alpha Vantage error for {ticker} ({endpoint}): HTTP {resp.status_code}")
+
+        overview = overview_resp.json() if overview_resp.text else {}
+        earnings = earnings_resp.json() if earnings_resp.text else {}
+        income = income_resp.json() if income_resp.text else {}
+
+        if isinstance(overview, dict) and overview.get("Information"):
+            raise ProviderError(f"Alpha Vantage unavailable for {ticker}: {overview.get('Information')}")
+        if isinstance(overview, dict) and overview.get("Note"):
+            raise RateLimitError(f"Alpha Vantage rate limit for {ticker}: {overview.get('Note')}")
+
+        pe_ratio = _safe_float((overview or {}).get("PERatio"))
+        market_cap = _safe_float((overview or {}).get("MarketCapitalization"))
+
+        earnings_surprise = None
+        quarterly = earnings.get("quarterlyEarnings", []) if isinstance(earnings, dict) else []
+        if isinstance(quarterly, list) and quarterly:
+            first = quarterly[0] if isinstance(quarterly[0], dict) else {}
+            earnings_surprise = _safe_float(first.get("surprisePercentage"))
+
+        revenue_growth = None
+        annual = income.get("annualReports", []) if isinstance(income, dict) else []
+        if isinstance(annual, list) and len(annual) >= 2:
+            curr = _safe_float((annual[0] or {}).get("totalRevenue"))
+            prev = _safe_float((annual[1] or {}).get("totalRevenue"))
+            if curr is not None and prev not in (None, 0):
+                revenue_growth = ((curr - prev) / abs(prev)) * 100.0
+
+        if pe_ratio is None:
+            unavailable["pe_ratio"] = "not available from provider payload"
+        if revenue_growth is None:
+            unavailable["revenue_growth"] = "not available from provider payload"
+        if earnings_surprise is None:
+            unavailable["earnings_surprise"] = "not available from provider payload"
+        if market_cap is None:
+            unavailable["market_cap"] = "not available from provider payload"
+
+        return FundamentalData(
+            ticker=ticker,
+            pe_ratio=pe_ratio,
+            revenue_growth=revenue_growth,
+            earnings_surprise=earnings_surprise,
+            market_cap=market_cap,
+            fetched_at=datetime.now(timezone.utc),
+            unavailable_fields=unavailable,
+        )
+
+
+class ChainedFundamentalProvider:
+    """Try multiple fundamental providers in order until one succeeds."""
+
+    def __init__(self, providers: list[tuple[str, FundamentalProvider]]) -> None:
+        if not providers:
+            raise ProviderError("No fundamental providers configured")
+        self._providers = providers
+
+    async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
+        errors: list[str] = []
+
+        for provider_name, provider in self._providers:
+            try:
+                data = await provider.fetch_fundamentals(ticker)
+
+                has_any_metric = any(
+                    value is not None
+                    for value in (data.pe_ratio, data.revenue_growth, data.earnings_surprise, data.market_cap)
+                )
+                if not has_any_metric:
+                    errors.append(f"{provider_name}: no usable metrics returned")
+                    continue
+
+                unavailable = dict(data.unavailable_fields)
+                unavailable["provider"] = provider_name
+
+                return FundamentalData(
+                    ticker=data.ticker,
+                    pe_ratio=data.pe_ratio,
+                    revenue_growth=data.revenue_growth,
+                    earnings_surprise=data.earnings_surprise,
+                    market_cap=data.market_cap,
+                    fetched_at=data.fetched_at,
+                    unavailable_fields=unavailable,
+                )
+            except Exception as exc:
+                errors.append(f"{provider_name}: {type(exc).__name__}: {exc}")
+
+        attempts = "; ".join(errors[:6]) if errors else "no provider attempts"
+        raise ProviderError(f"All fundamentals providers failed for {ticker}. Attempts: {attempts}")
+
+
+def build_fundamental_provider_chain() -> FundamentalProvider:
+    providers: list[tuple[str, FundamentalProvider]] = []
+
+    if settings.fmp_api_key:
+        providers.append(("fmp", FMPFundamentalProvider(settings.fmp_api_key)))
+    if settings.finnhub_api_key:
+        providers.append(("finnhub", FinnhubFundamentalProvider(settings.finnhub_api_key)))
+    if settings.alpha_vantage_api_key:
+        providers.append(("alpha_vantage", AlphaVantageFundamentalProvider(settings.alpha_vantage_api_key)))
+
+    if not providers:
+        raise ProviderError(
+            "No fundamentals provider configured. Set one of FMP_API_KEY, FINNHUB_API_KEY, ALPHA_VANTAGE_API_KEY"
+        )
+
+    logger.info("Fundamentals provider chain configured: %s", [name for name, _ in providers])
+    return ChainedFundamentalProvider(providers)
@@ -33,6 +33,24 @@ Rules:
 - reasoning should cite specific recent news or events you found
 """

+_SENTIMENT_BATCH_PROMPT = """\
+Search the web for the LATEST news, analyst opinions, and market developments \
+about each stock ticker from the past 24-48 hours.
+
+Tickers:
+{tickers_csv}
+
+Respond ONLY with a JSON array (no markdown, no extra text), one object per ticker:
+[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"reasoning":"brief explanation"}}]
+
+Rules:
+- Include every ticker exactly once
+- ticker must be uppercase symbol
+- classification must be exactly one of: bullish, bearish, neutral
+- confidence must be an integer from 0 to 100
+- reasoning should cite specific recent news or events you found
+"""
+
 VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}


@@ -49,6 +67,59 @@ class OpenAISentimentProvider:
        self._client = AsyncOpenAI(api_key=api_key, http_client=http_client)
        self._model = model

+    @staticmethod
+    def _extract_raw_text(response: object, ticker_context: str) -> str:
+        raw_text = ""
+        for item in response.output:
+            if item.type == "message" and item.content:
+                for block in item.content:
+                    if hasattr(block, "text") and block.text:
+                        raw_text = block.text
+                        break
+                if raw_text:
+                    break
+
+        if not raw_text:
+            raise ProviderError(f"No text output from OpenAI for {ticker_context}")
+
+        clean = raw_text.strip()
+        if clean.startswith("```"):
+            clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
+        if clean.endswith("```"):
+            clean = clean[:-3]
+        return clean.strip()
+
+    @staticmethod
+    def _normalize_single_result(parsed: dict, ticker: str, citations: list[dict[str, str]]) -> SentimentData:
+        classification = str(parsed.get("classification", "")).lower()
+        if classification not in VALID_CLASSIFICATIONS:
+            raise ProviderError(
+                f"Invalid classification '{classification}' from OpenAI for {ticker}"
+            )
+
+        confidence = int(parsed.get("confidence", 50))
+        confidence = max(0, min(100, confidence))
+        reasoning = str(parsed.get("reasoning", ""))
+
+        if reasoning:
+            logger.info(
+                "OpenAI sentiment for %s: %s (confidence=%d) — %s",
+                ticker,
+                classification,
+                confidence,
+                reasoning,
+            )
+
+        return SentimentData(
+            ticker=ticker,
+            classification=classification,
+            confidence=confidence,
+            source="openai",
+            timestamp=datetime.now(timezone.utc),
+            reasoning=reasoning,
+            citations=citations,
+        )
+
    async def fetch_sentiment(self, ticker: str) -> SentimentData:
        """Use the Responses API with web_search_preview to get live sentiment."""
        try:
@@ -58,48 +129,10 @@ class OpenAISentimentProvider:
                instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
                input=_SENTIMENT_PROMPT.format(ticker=ticker),
            )
-
-            # Extract text from the ResponseOutputMessage in the output
-            raw_text = ""
-            for item in response.output:
-                if item.type == "message" and item.content:
-                    for block in item.content:
-                        if hasattr(block, "text") and block.text:
-                            raw_text = block.text
-                            break
-                    if raw_text:
-                        break
-
-            if not raw_text:
-                raise ProviderError(f"No text output from OpenAI for {ticker}")
-
-            raw_text = raw_text.strip()
-            logger.debug("OpenAI raw response for %s: %s", ticker, raw_text)
-
-            # Strip markdown fences if present
-            clean = raw_text
-            if clean.startswith("```"):
-                clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
-            if clean.endswith("```"):
-                clean = clean[:-3]
-            clean = clean.strip()
-
+            clean = self._extract_raw_text(response, ticker)
+            logger.debug("OpenAI raw response for %s: %s", ticker, clean)
            parsed = json.loads(clean)

-            classification = parsed.get("classification", "").lower()
-            if classification not in VALID_CLASSIFICATIONS:
-                raise ProviderError(
-                    f"Invalid classification '{classification}' from OpenAI for {ticker}"
-                )
-
-            confidence = int(parsed.get("confidence", 50))
-            confidence = max(0, min(100, confidence))
-
-            reasoning = parsed.get("reasoning", "")
-            if reasoning:
-                logger.info("OpenAI sentiment for %s: %s (confidence=%d) — %s",
-                            ticker, classification, confidence, reasoning)
-
            # Extract url_citation annotations from response output
            citations: list[dict[str, str]] = []
            for item in response.output:
@@ -112,19 +145,10 @@ class OpenAISentimentProvider:
                                        "url": getattr(annotation, "url", ""),
                                        "title": getattr(annotation, "title", ""),
                                    })
-
-            return SentimentData(
-                ticker=ticker,
-                classification=classification,
-                confidence=confidence,
-                source="openai",
-                timestamp=datetime.now(timezone.utc),
-                reasoning=reasoning,
-                citations=citations,
-            )
+            return self._normalize_single_result(parsed, ticker, citations)

        except json.JSONDecodeError as exc:
-            logger.error("Failed to parse OpenAI JSON for %s: %s — raw: %s", ticker, exc, raw_text)
+            logger.error("Failed to parse OpenAI JSON for %s: %s", ticker, exc)
            raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc
        except ProviderError:
            raise
@@ -134,3 +158,49 @@ class OpenAISentimentProvider:
                raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc
            logger.error("OpenAI provider error for %s: %s", ticker, exc)
            raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc
+
+    async def fetch_sentiment_batch(self, tickers: list[str]) -> dict[str, SentimentData]:
+        """Fetch sentiment for multiple tickers in one OpenAI request.
+
+        Returns a map keyed by uppercase ticker symbol. Invalid/missing rows are skipped.
+        """
+        normalized = [t.strip().upper() for t in tickers if t and t.strip()]
+        if not normalized:
+            return {}
+
+        ticker_context = ",".join(normalized)
+        try:
+            response = await self._client.responses.create(
+                model=self._model,
+                tools=[{"type": "web_search_preview"}],
+                instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
+                input=_SENTIMENT_BATCH_PROMPT.format(tickers_csv=", ".join(normalized)),
+            )
+            clean = self._extract_raw_text(response, ticker_context)
+            logger.debug("OpenAI batch raw response for %s: %s", ticker_context, clean)
+            parsed = json.loads(clean)
+            if not isinstance(parsed, list):
+                raise ProviderError("Batch sentiment response must be a JSON array")
+
+            out: dict[str, SentimentData] = {}
+            requested = set(normalized)
+            for row in parsed:
+                if not isinstance(row, dict):
+                    continue
+                symbol = str(row.get("ticker", "")).strip().upper()
+                if symbol not in requested:
+                    continue
+                try:
+                    out[symbol] = self._normalize_single_result(row, symbol, citations=[])
+                except Exception:
+                    continue
+            return out
+        except json.JSONDecodeError as exc:
+            raise ProviderError(f"Invalid batch JSON from OpenAI for {ticker_context}") from exc
+        except ProviderError:
+            raise
+        except Exception as exc:
+            msg = str(exc).lower()
+            if "429" in msg or "rate" in msg or "quota" in msg:
+                raise RateLimitError(f"OpenAI rate limit hit for batch {ticker_context}") from exc
+            raise ProviderError(f"OpenAI batch provider error for {ticker_context}: {exc}") from exc