Big refactoring
Some checks failed
Deploy / lint (push) Failing after 21s
Deploy / test (push) Has been skipped
Deploy / deploy (push) Has been skipped

This commit is contained in:
Dennis Thiessen
2026-03-03 15:20:18 +01:00
parent 181cfe6588
commit 0a011d4ce9
55 changed files with 6898 additions and 544 deletions

View File

@@ -0,0 +1,253 @@
"""Chained fundamentals provider with fallback adapters.
Order:
1) FMP (if configured)
2) Finnhub (if configured)
3) Alpha Vantage (if configured)
"""
from __future__ import annotations
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
import httpx
from app.config import settings
from app.exceptions import ProviderError, RateLimitError
from app.providers.fmp import FMPFundamentalProvider
from app.providers.protocol import FundamentalData, FundamentalProvider
logger = logging.getLogger(__name__)
_CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "")
if not _CA_BUNDLE or not Path(_CA_BUNDLE).exists():
_CA_BUNDLE_PATH: str | bool = True
else:
_CA_BUNDLE_PATH = _CA_BUNDLE
def _safe_float(value: object) -> float | None:
if value is None:
return None
try:
return float(value)
except (TypeError, ValueError):
return None
class FinnhubFundamentalProvider:
"""Fundamentals provider backed by Finnhub free endpoints."""
def __init__(self, api_key: str) -> None:
if not api_key:
raise ProviderError("Finnhub API key is required")
self._api_key = api_key
self._base_url = "https://finnhub.io/api/v1"
async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
unavailable: dict[str, str] = {}
async with httpx.AsyncClient(timeout=30.0, verify=_CA_BUNDLE_PATH) as client:
profile_resp = await client.get(
f"{self._base_url}/stock/profile2",
params={"symbol": ticker, "token": self._api_key},
)
metric_resp = await client.get(
f"{self._base_url}/stock/metric",
params={"symbol": ticker, "metric": "all", "token": self._api_key},
)
earnings_resp = await client.get(
f"{self._base_url}/stock/earnings",
params={"symbol": ticker, "limit": 1, "token": self._api_key},
)
for resp, endpoint in (
(profile_resp, "profile2"),
(metric_resp, "stock/metric"),
(earnings_resp, "stock/earnings"),
):
if resp.status_code == 429:
raise RateLimitError(f"Finnhub rate limit hit for {ticker} ({endpoint})")
if resp.status_code in (401, 403):
raise ProviderError(f"Finnhub access denied for {ticker} ({endpoint}): HTTP {resp.status_code}")
if resp.status_code != 200:
raise ProviderError(f"Finnhub error for {ticker} ({endpoint}): HTTP {resp.status_code}")
profile_payload = profile_resp.json() if profile_resp.text else {}
metric_payload = metric_resp.json() if metric_resp.text else {}
earnings_payload = earnings_resp.json() if earnings_resp.text else []
metrics = metric_payload.get("metric", {}) if isinstance(metric_payload, dict) else {}
market_cap = _safe_float((profile_payload or {}).get("marketCapitalization"))
pe_ratio = _safe_float(metrics.get("peTTM") or metrics.get("peNormalizedAnnual"))
revenue_growth = _safe_float(metrics.get("revenueGrowthTTMYoy") or metrics.get("revenueGrowth5Y"))
earnings_surprise = None
if isinstance(earnings_payload, list) and earnings_payload:
first = earnings_payload[0] if isinstance(earnings_payload[0], dict) else {}
earnings_surprise = _safe_float(first.get("surprisePercent"))
if pe_ratio is None:
unavailable["pe_ratio"] = "not available from provider payload"
if revenue_growth is None:
unavailable["revenue_growth"] = "not available from provider payload"
if earnings_surprise is None:
unavailable["earnings_surprise"] = "not available from provider payload"
if market_cap is None:
unavailable["market_cap"] = "not available from provider payload"
return FundamentalData(
ticker=ticker,
pe_ratio=pe_ratio,
revenue_growth=revenue_growth,
earnings_surprise=earnings_surprise,
market_cap=market_cap,
fetched_at=datetime.now(timezone.utc),
unavailable_fields=unavailable,
)
class AlphaVantageFundamentalProvider:
"""Fundamentals provider backed by Alpha Vantage free endpoints."""
def __init__(self, api_key: str) -> None:
if not api_key:
raise ProviderError("Alpha Vantage API key is required")
self._api_key = api_key
self._base_url = "https://www.alphavantage.co/query"
async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
unavailable: dict[str, str] = {}
async with httpx.AsyncClient(timeout=30.0, verify=_CA_BUNDLE_PATH) as client:
overview_resp = await client.get(
self._base_url,
params={"function": "OVERVIEW", "symbol": ticker, "apikey": self._api_key},
)
earnings_resp = await client.get(
self._base_url,
params={"function": "EARNINGS", "symbol": ticker, "apikey": self._api_key},
)
income_resp = await client.get(
self._base_url,
params={"function": "INCOME_STATEMENT", "symbol": ticker, "apikey": self._api_key},
)
for resp, endpoint in (
(overview_resp, "OVERVIEW"),
(earnings_resp, "EARNINGS"),
(income_resp, "INCOME_STATEMENT"),
):
if resp.status_code == 429:
raise RateLimitError(f"Alpha Vantage rate limit hit for {ticker} ({endpoint})")
if resp.status_code != 200:
raise ProviderError(f"Alpha Vantage error for {ticker} ({endpoint}): HTTP {resp.status_code}")
overview = overview_resp.json() if overview_resp.text else {}
earnings = earnings_resp.json() if earnings_resp.text else {}
income = income_resp.json() if income_resp.text else {}
if isinstance(overview, dict) and overview.get("Information"):
raise ProviderError(f"Alpha Vantage unavailable for {ticker}: {overview.get('Information')}")
if isinstance(overview, dict) and overview.get("Note"):
raise RateLimitError(f"Alpha Vantage rate limit for {ticker}: {overview.get('Note')}")
pe_ratio = _safe_float((overview or {}).get("PERatio"))
market_cap = _safe_float((overview or {}).get("MarketCapitalization"))
earnings_surprise = None
quarterly = earnings.get("quarterlyEarnings", []) if isinstance(earnings, dict) else []
if isinstance(quarterly, list) and quarterly:
first = quarterly[0] if isinstance(quarterly[0], dict) else {}
earnings_surprise = _safe_float(first.get("surprisePercentage"))
revenue_growth = None
annual = income.get("annualReports", []) if isinstance(income, dict) else []
if isinstance(annual, list) and len(annual) >= 2:
curr = _safe_float((annual[0] or {}).get("totalRevenue"))
prev = _safe_float((annual[1] or {}).get("totalRevenue"))
if curr is not None and prev not in (None, 0):
revenue_growth = ((curr - prev) / abs(prev)) * 100.0
if pe_ratio is None:
unavailable["pe_ratio"] = "not available from provider payload"
if revenue_growth is None:
unavailable["revenue_growth"] = "not available from provider payload"
if earnings_surprise is None:
unavailable["earnings_surprise"] = "not available from provider payload"
if market_cap is None:
unavailable["market_cap"] = "not available from provider payload"
return FundamentalData(
ticker=ticker,
pe_ratio=pe_ratio,
revenue_growth=revenue_growth,
earnings_surprise=earnings_surprise,
market_cap=market_cap,
fetched_at=datetime.now(timezone.utc),
unavailable_fields=unavailable,
)
class ChainedFundamentalProvider:
"""Try multiple fundamental providers in order until one succeeds."""
def __init__(self, providers: list[tuple[str, FundamentalProvider]]) -> None:
if not providers:
raise ProviderError("No fundamental providers configured")
self._providers = providers
async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
errors: list[str] = []
for provider_name, provider in self._providers:
try:
data = await provider.fetch_fundamentals(ticker)
has_any_metric = any(
value is not None
for value in (data.pe_ratio, data.revenue_growth, data.earnings_surprise, data.market_cap)
)
if not has_any_metric:
errors.append(f"{provider_name}: no usable metrics returned")
continue
unavailable = dict(data.unavailable_fields)
unavailable["provider"] = provider_name
return FundamentalData(
ticker=data.ticker,
pe_ratio=data.pe_ratio,
revenue_growth=data.revenue_growth,
earnings_surprise=data.earnings_surprise,
market_cap=data.market_cap,
fetched_at=data.fetched_at,
unavailable_fields=unavailable,
)
except Exception as exc:
errors.append(f"{provider_name}: {type(exc).__name__}: {exc}")
attempts = "; ".join(errors[:6]) if errors else "no provider attempts"
raise ProviderError(f"All fundamentals providers failed for {ticker}. Attempts: {attempts}")
def build_fundamental_provider_chain() -> FundamentalProvider:
providers: list[tuple[str, FundamentalProvider]] = []
if settings.fmp_api_key:
providers.append(("fmp", FMPFundamentalProvider(settings.fmp_api_key)))
if settings.finnhub_api_key:
providers.append(("finnhub", FinnhubFundamentalProvider(settings.finnhub_api_key)))
if settings.alpha_vantage_api_key:
providers.append(("alpha_vantage", AlphaVantageFundamentalProvider(settings.alpha_vantage_api_key)))
if not providers:
raise ProviderError(
"No fundamentals provider configured. Set one of FMP_API_KEY, FINNHUB_API_KEY, ALPHA_VANTAGE_API_KEY"
)
logger.info("Fundamentals provider chain configured: %s", [name for name, _ in providers])
return ChainedFundamentalProvider(providers)

View File

@@ -33,6 +33,24 @@ Rules:
- reasoning should cite specific recent news or events you found
"""
_SENTIMENT_BATCH_PROMPT = """\
Search the web for the LATEST news, analyst opinions, and market developments \
about each stock ticker from the past 24-48 hours.
Tickers:
{tickers_csv}
Respond ONLY with a JSON array (no markdown, no extra text), one object per ticker:
[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"reasoning":"brief explanation"}}]
Rules:
- Include every ticker exactly once
- ticker must be uppercase symbol
- classification must be exactly one of: bullish, bearish, neutral
- confidence must be an integer from 0 to 100
- reasoning should cite specific recent news or events you found
"""
VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
@@ -49,6 +67,59 @@ class OpenAISentimentProvider:
self._client = AsyncOpenAI(api_key=api_key, http_client=http_client)
self._model = model
@staticmethod
def _extract_raw_text(response: object, ticker_context: str) -> str:
raw_text = ""
for item in response.output:
if item.type == "message" and item.content:
for block in item.content:
if hasattr(block, "text") and block.text:
raw_text = block.text
break
if raw_text:
break
if not raw_text:
raise ProviderError(f"No text output from OpenAI for {ticker_context}")
clean = raw_text.strip()
if clean.startswith("```"):
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
if clean.endswith("```"):
clean = clean[:-3]
return clean.strip()
@staticmethod
def _normalize_single_result(parsed: dict, ticker: str, citations: list[dict[str, str]]) -> SentimentData:
classification = str(parsed.get("classification", "")).lower()
if classification not in VALID_CLASSIFICATIONS:
raise ProviderError(
f"Invalid classification '{classification}' from OpenAI for {ticker}"
)
confidence = int(parsed.get("confidence", 50))
confidence = max(0, min(100, confidence))
reasoning = str(parsed.get("reasoning", ""))
if reasoning:
logger.info(
"OpenAI sentiment for %s: %s (confidence=%d) — %s",
ticker,
classification,
confidence,
reasoning,
)
return SentimentData(
ticker=ticker,
classification=classification,
confidence=confidence,
source="openai",
timestamp=datetime.now(timezone.utc),
reasoning=reasoning,
citations=citations,
)
async def fetch_sentiment(self, ticker: str) -> SentimentData:
"""Use the Responses API with web_search_preview to get live sentiment."""
try:
@@ -58,48 +129,10 @@ class OpenAISentimentProvider:
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
input=_SENTIMENT_PROMPT.format(ticker=ticker),
)
# Extract text from the ResponseOutputMessage in the output
raw_text = ""
for item in response.output:
if item.type == "message" and item.content:
for block in item.content:
if hasattr(block, "text") and block.text:
raw_text = block.text
break
if raw_text:
break
if not raw_text:
raise ProviderError(f"No text output from OpenAI for {ticker}")
raw_text = raw_text.strip()
logger.debug("OpenAI raw response for %s: %s", ticker, raw_text)
# Strip markdown fences if present
clean = raw_text
if clean.startswith("```"):
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
if clean.endswith("```"):
clean = clean[:-3]
clean = clean.strip()
clean = self._extract_raw_text(response, ticker)
logger.debug("OpenAI raw response for %s: %s", ticker, clean)
parsed = json.loads(clean)
classification = parsed.get("classification", "").lower()
if classification not in VALID_CLASSIFICATIONS:
raise ProviderError(
f"Invalid classification '{classification}' from OpenAI for {ticker}"
)
confidence = int(parsed.get("confidence", 50))
confidence = max(0, min(100, confidence))
reasoning = parsed.get("reasoning", "")
if reasoning:
logger.info("OpenAI sentiment for %s: %s (confidence=%d) — %s",
ticker, classification, confidence, reasoning)
# Extract url_citation annotations from response output
citations: list[dict[str, str]] = []
for item in response.output:
@@ -112,19 +145,10 @@ class OpenAISentimentProvider:
"url": getattr(annotation, "url", ""),
"title": getattr(annotation, "title", ""),
})
return SentimentData(
ticker=ticker,
classification=classification,
confidence=confidence,
source="openai",
timestamp=datetime.now(timezone.utc),
reasoning=reasoning,
citations=citations,
)
return self._normalize_single_result(parsed, ticker, citations)
except json.JSONDecodeError as exc:
logger.error("Failed to parse OpenAI JSON for %s: %s — raw: %s", ticker, exc, raw_text)
logger.error("Failed to parse OpenAI JSON for %s: %s", ticker, exc)
raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc
except ProviderError:
raise
@@ -134,3 +158,49 @@ class OpenAISentimentProvider:
raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc
logger.error("OpenAI provider error for %s: %s", ticker, exc)
raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc
async def fetch_sentiment_batch(self, tickers: list[str]) -> dict[str, SentimentData]:
"""Fetch sentiment for multiple tickers in one OpenAI request.
Returns a map keyed by uppercase ticker symbol. Invalid/missing rows are skipped.
"""
normalized = [t.strip().upper() for t in tickers if t and t.strip()]
if not normalized:
return {}
ticker_context = ",".join(normalized)
try:
response = await self._client.responses.create(
model=self._model,
tools=[{"type": "web_search_preview"}],
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
input=_SENTIMENT_BATCH_PROMPT.format(tickers_csv=", ".join(normalized)),
)
clean = self._extract_raw_text(response, ticker_context)
logger.debug("OpenAI batch raw response for %s: %s", ticker_context, clean)
parsed = json.loads(clean)
if not isinstance(parsed, list):
raise ProviderError("Batch sentiment response must be a JSON array")
out: dict[str, SentimentData] = {}
requested = set(normalized)
for row in parsed:
if not isinstance(row, dict):
continue
symbol = str(row.get("ticker", "")).strip().upper()
if symbol not in requested:
continue
try:
out[symbol] = self._normalize_single_result(row, symbol, citations=[])
except Exception:
continue
return out
except json.JSONDecodeError as exc:
raise ProviderError(f"Invalid batch JSON from OpenAI for {ticker_context}") from exc
except ProviderError:
raise
except Exception as exc:
msg = str(exc).lower()
if "429" in msg or "rate" in msg or "quota" in msg:
raise RateLimitError(f"OpenAI rate limit hit for batch {ticker_context}") from exc
raise ProviderError(f"OpenAI batch provider error for {ticker_context}: {exc}") from exc