"""Ticker universe discovery and bootstrap service. Provides a minimal, provider-backed way to populate tracked tickers from well-known universes (S&P 500, NASDAQ-100, NASDAQ All). """ from __future__ import annotations import json import logging import os import re from collections.abc import Iterable from datetime import datetime, timezone from pathlib import Path import httpx from sqlalchemy import delete, select from sqlalchemy.ext.asyncio import AsyncSession from app.config import settings from app.exceptions import ProviderError, ValidationError from app.models.settings import SystemSetting from app.models.ticker import Ticker logger = logging.getLogger(__name__) SUPPORTED_UNIVERSES = {"sp500", "nasdaq100", "nasdaq_all"} _SYMBOL_PATTERN = re.compile(r"^[A-Z0-9-]{1,10}$") _SEED_UNIVERSES: dict[str, list[str]] = { "sp500": [ "AAPL", "MSFT", "NVDA", "AMZN", "META", "GOOGL", "GOOG", "BRK-B", "TSLA", "JPM", "V", "MA", "UNH", "XOM", "LLY", "AVGO", "COST", "PG", "JNJ", "HD", "MRK", "BAC", "ABBV", "PEP", "KO", "ADBE", "NFLX", "CRM", "CSCO", "WMT", "AMD", "TMO", "MCD", "ORCL", "ACN", "CVX", "LIN", "DHR", "ABT", "QCOM", "TXN", "PM", "DIS", "INTU", ], "nasdaq100": [ "AAPL", "MSFT", "NVDA", "AMZN", "META", "GOOGL", "GOOG", "TSLA", "AVGO", "COST", "NFLX", "ADBE", "CSCO", "AMD", "INTU", "QCOM", "AMGN", "TXN", "INTC", "BKNG", "GILD", "ISRG", "MDLZ", "ADP", "LRCX", "ADI", "PANW", "SNPS", "CDNS", "KLAC", "MELI", "MU", "SBUX", "CSX", "REGN", "VRTX", "MAR", "MNST", "CTAS", "ASML", "PYPL", "AMAT", "NXPI", ], "nasdaq_all": [ "AAPL", "MSFT", "NVDA", "AMZN", "META", "GOOGL", "TSLA", "AMD", "INTC", "QCOM", "CSCO", "ADBE", "NFLX", "PYPL", "AMAT", "MU", "SBUX", "GILD", "INTU", "BKNG", "ADP", "CTAS", "PANW", "SNPS", "CDNS", "LRCX", "KLAC", "MELI", "ASML", "REGN", "VRTX", "MDLZ", "AMGN", ], } _CA_BUNDLE = os.environ.get("SSL_CERT_FILE", "") if not _CA_BUNDLE or not Path(_CA_BUNDLE).exists(): _CA_BUNDLE_PATH: str | bool = True else: _CA_BUNDLE_PATH = _CA_BUNDLE def _validate_universe(universe: str) -> str: normalised = universe.strip().lower() if normalised not in SUPPORTED_UNIVERSES: supported = ", ".join(sorted(SUPPORTED_UNIVERSES)) raise ValidationError(f"Unsupported universe '{universe}'. Supported: {supported}") return normalised def _normalise_symbols(symbols: Iterable[str]) -> list[str]: deduped: set[str] = set() for raw_symbol in symbols: symbol = raw_symbol.strip().upper().replace(".", "-") if not symbol: continue if _SYMBOL_PATTERN.fullmatch(symbol) is None: continue deduped.add(symbol) return sorted(deduped) def _extract_symbols_from_fmp_payload(payload: object) -> list[str]: if not isinstance(payload, list): return [] symbols: list[str] = [] for item in payload: if not isinstance(item, dict): continue candidate = item.get("symbol") or item.get("ticker") if isinstance(candidate, str): symbols.append(candidate) return symbols async def _try_fmp_urls( client: httpx.AsyncClient, urls: list[str], ) -> tuple[list[str], list[str]]: failures: list[str] = [] for url in urls: endpoint = url.split("?")[0] try: response = await client.get(url) except httpx.HTTPError as exc: failures.append(f"{endpoint}: network error ({type(exc).__name__}: {exc})") continue if response.status_code != 200: failures.append(f"{endpoint}: HTTP {response.status_code}") continue try: payload = response.json() except ValueError: failures.append(f"{endpoint}: invalid JSON payload") continue symbols = _extract_symbols_from_fmp_payload(payload) if symbols: return symbols, failures failures.append(f"{endpoint}: empty/unsupported payload") return [], failures async def _fetch_universe_symbols_from_fmp(universe: str) -> list[str]: if not settings.fmp_api_key: raise ValidationError( "FMP API key is required for universe bootstrap (set FMP_API_KEY)" ) api_key = settings.fmp_api_key stable_base = "https://financialmodelingprep.com/stable" legacy_base = "https://financialmodelingprep.com/api/v3" stable_candidates: dict[str, list[str]] = { "sp500": [ f"{stable_base}/sp500-constituent?apikey={api_key}", f"{stable_base}/sp500-constituents?apikey={api_key}", ], "nasdaq100": [ f"{stable_base}/nasdaq-100-constituent?apikey={api_key}", f"{stable_base}/nasdaq100-constituent?apikey={api_key}", f"{stable_base}/nasdaq-100-constituents?apikey={api_key}", ], "nasdaq_all": [ f"{stable_base}/stock-screener?exchange=NASDAQ&isEtf=false&limit=10000&apikey={api_key}", f"{stable_base}/available-traded/list?apikey={api_key}", ], } legacy_candidates: dict[str, list[str]] = { "sp500": [ f"{legacy_base}/sp500_constituent?apikey={api_key}", f"{legacy_base}/sp500_constituent", ], "nasdaq100": [ f"{legacy_base}/nasdaq_constituent?apikey={api_key}", f"{legacy_base}/nasdaq_constituent", ], "nasdaq_all": [ f"{legacy_base}/stock-screener?exchange=NASDAQ&isEtf=false&limit=10000&apikey={api_key}", ], } failures: list[str] = [] async with httpx.AsyncClient(timeout=30.0, verify=_CA_BUNDLE_PATH) as client: stable_symbols, stable_failures = await _try_fmp_urls(client, stable_candidates[universe]) failures.extend(stable_failures) if stable_symbols: return stable_symbols legacy_symbols, legacy_failures = await _try_fmp_urls(client, legacy_candidates[universe]) failures.extend(legacy_failures) if legacy_symbols: return legacy_symbols if failures: reason = "; ".join(failures[:6]) logger.warning("FMP universe fetch failed for %s: %s", universe, reason) raise ProviderError( f"Failed to fetch universe symbols from FMP for '{universe}'. Attempts: {reason}" ) raise ProviderError(f"Failed to fetch universe symbols from FMP for '{universe}'") async def _fetch_html_symbols( client: httpx.AsyncClient, url: str, pattern: str, ) -> tuple[list[str], str | None]: try: response = await client.get(url) except httpx.HTTPError as exc: return [], f"{url}: network error ({type(exc).__name__}: {exc})" if response.status_code != 200: return [], f"{url}: HTTP {response.status_code}" matches = re.findall(pattern, response.text, flags=re.IGNORECASE) if not matches: return [], f"{url}: no symbols parsed" return list(matches), None async def _fetch_nasdaq_trader_symbols( client: httpx.AsyncClient, ) -> tuple[list[str], str | None]: url = "https://www.nasdaqtrader.com/dynamic/SymDir/nasdaqlisted.txt" try: response = await client.get(url) except httpx.HTTPError as exc: return [], f"{url}: network error ({type(exc).__name__}: {exc})" if response.status_code != 200: return [], f"{url}: HTTP {response.status_code}" symbols: list[str] = [] for line in response.text.splitlines(): if not line or line.startswith("Symbol|") or line.startswith("File Creation Time"): continue parts = line.split("|") if not parts: continue symbol = parts[0].strip() test_issue = parts[6].strip() if len(parts) > 6 else "N" if test_issue == "Y": continue symbols.append(symbol) if not symbols: return [], f"{url}: no symbols parsed" return symbols, None async def _fetch_universe_symbols_from_public(universe: str) -> tuple[list[str], list[str], str | None]: failures: list[str] = [] sp500_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies" nasdaq100_url = "https://en.wikipedia.org/wiki/Nasdaq-100" wiki_symbol_pattern = r"