Big refactoring
Some checks failed
Deploy / lint (push) Failing after 21s
Deploy / test (push) Has been skipped
Deploy / deploy (push) Has been skipped

This commit is contained in:
Dennis Thiessen
2026-03-03 15:20:18 +01:00
parent 181cfe6588
commit 0a011d4ce9
55 changed files with 6898 additions and 544 deletions

View File

@@ -33,6 +33,24 @@ Rules:
- reasoning should cite specific recent news or events you found
"""
_SENTIMENT_BATCH_PROMPT = """\
Search the web for the LATEST news, analyst opinions, and market developments \
about each stock ticker from the past 24-48 hours.
Tickers:
{tickers_csv}
Respond ONLY with a JSON array (no markdown, no extra text), one object per ticker:
[{{"ticker":"AAPL","classification":"bullish|bearish|neutral","confidence":0-100,"reasoning":"brief explanation"}}]
Rules:
- Include every ticker exactly once
- ticker must be uppercase symbol
- classification must be exactly one of: bullish, bearish, neutral
- confidence must be an integer from 0 to 100
- reasoning should cite specific recent news or events you found
"""
VALID_CLASSIFICATIONS = {"bullish", "bearish", "neutral"}
@@ -49,6 +67,59 @@ class OpenAISentimentProvider:
self._client = AsyncOpenAI(api_key=api_key, http_client=http_client)
self._model = model
@staticmethod
def _extract_raw_text(response: object, ticker_context: str) -> str:
raw_text = ""
for item in response.output:
if item.type == "message" and item.content:
for block in item.content:
if hasattr(block, "text") and block.text:
raw_text = block.text
break
if raw_text:
break
if not raw_text:
raise ProviderError(f"No text output from OpenAI for {ticker_context}")
clean = raw_text.strip()
if clean.startswith("```"):
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
if clean.endswith("```"):
clean = clean[:-3]
return clean.strip()
@staticmethod
def _normalize_single_result(parsed: dict, ticker: str, citations: list[dict[str, str]]) -> SentimentData:
classification = str(parsed.get("classification", "")).lower()
if classification not in VALID_CLASSIFICATIONS:
raise ProviderError(
f"Invalid classification '{classification}' from OpenAI for {ticker}"
)
confidence = int(parsed.get("confidence", 50))
confidence = max(0, min(100, confidence))
reasoning = str(parsed.get("reasoning", ""))
if reasoning:
logger.info(
"OpenAI sentiment for %s: %s (confidence=%d) — %s",
ticker,
classification,
confidence,
reasoning,
)
return SentimentData(
ticker=ticker,
classification=classification,
confidence=confidence,
source="openai",
timestamp=datetime.now(timezone.utc),
reasoning=reasoning,
citations=citations,
)
async def fetch_sentiment(self, ticker: str) -> SentimentData:
"""Use the Responses API with web_search_preview to get live sentiment."""
try:
@@ -58,48 +129,10 @@ class OpenAISentimentProvider:
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
input=_SENTIMENT_PROMPT.format(ticker=ticker),
)
# Extract text from the ResponseOutputMessage in the output
raw_text = ""
for item in response.output:
if item.type == "message" and item.content:
for block in item.content:
if hasattr(block, "text") and block.text:
raw_text = block.text
break
if raw_text:
break
if not raw_text:
raise ProviderError(f"No text output from OpenAI for {ticker}")
raw_text = raw_text.strip()
logger.debug("OpenAI raw response for %s: %s", ticker, raw_text)
# Strip markdown fences if present
clean = raw_text
if clean.startswith("```"):
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
if clean.endswith("```"):
clean = clean[:-3]
clean = clean.strip()
clean = self._extract_raw_text(response, ticker)
logger.debug("OpenAI raw response for %s: %s", ticker, clean)
parsed = json.loads(clean)
classification = parsed.get("classification", "").lower()
if classification not in VALID_CLASSIFICATIONS:
raise ProviderError(
f"Invalid classification '{classification}' from OpenAI for {ticker}"
)
confidence = int(parsed.get("confidence", 50))
confidence = max(0, min(100, confidence))
reasoning = parsed.get("reasoning", "")
if reasoning:
logger.info("OpenAI sentiment for %s: %s (confidence=%d) — %s",
ticker, classification, confidence, reasoning)
# Extract url_citation annotations from response output
citations: list[dict[str, str]] = []
for item in response.output:
@@ -112,19 +145,10 @@ class OpenAISentimentProvider:
"url": getattr(annotation, "url", ""),
"title": getattr(annotation, "title", ""),
})
return SentimentData(
ticker=ticker,
classification=classification,
confidence=confidence,
source="openai",
timestamp=datetime.now(timezone.utc),
reasoning=reasoning,
citations=citations,
)
return self._normalize_single_result(parsed, ticker, citations)
except json.JSONDecodeError as exc:
logger.error("Failed to parse OpenAI JSON for %s: %s — raw: %s", ticker, exc, raw_text)
logger.error("Failed to parse OpenAI JSON for %s: %s", ticker, exc)
raise ProviderError(f"Invalid JSON from OpenAI for {ticker}") from exc
except ProviderError:
raise
@@ -134,3 +158,49 @@ class OpenAISentimentProvider:
raise RateLimitError(f"OpenAI rate limit hit for {ticker}") from exc
logger.error("OpenAI provider error for %s: %s", ticker, exc)
raise ProviderError(f"OpenAI provider error for {ticker}: {exc}") from exc
async def fetch_sentiment_batch(self, tickers: list[str]) -> dict[str, SentimentData]:
"""Fetch sentiment for multiple tickers in one OpenAI request.
Returns a map keyed by uppercase ticker symbol. Invalid/missing rows are skipped.
"""
normalized = [t.strip().upper() for t in tickers if t and t.strip()]
if not normalized:
return {}
ticker_context = ",".join(normalized)
try:
response = await self._client.responses.create(
model=self._model,
tools=[{"type": "web_search_preview"}],
instructions="You are a financial sentiment analyst. Always respond with valid JSON only, no markdown fences.",
input=_SENTIMENT_BATCH_PROMPT.format(tickers_csv=", ".join(normalized)),
)
clean = self._extract_raw_text(response, ticker_context)
logger.debug("OpenAI batch raw response for %s: %s", ticker_context, clean)
parsed = json.loads(clean)
if not isinstance(parsed, list):
raise ProviderError("Batch sentiment response must be a JSON array")
out: dict[str, SentimentData] = {}
requested = set(normalized)
for row in parsed:
if not isinstance(row, dict):
continue
symbol = str(row.get("ticker", "")).strip().upper()
if symbol not in requested:
continue
try:
out[symbol] = self._normalize_single_result(row, symbol, citations=[])
except Exception:
continue
return out
except json.JSONDecodeError as exc:
raise ProviderError(f"Invalid batch JSON from OpenAI for {ticker_context}") from exc
except ProviderError:
raise
except Exception as exc:
msg = str(exc).lower()
if "429" in msg or "rate" in msg or "quota" in msg:
raise RateLimitError(f"OpenAI rate limit hit for batch {ticker_context}") from exc
raise ProviderError(f"OpenAI batch provider error for {ticker_context}: {exc}") from exc