signal-platform/app/services/sentiment_service.py

"""Sentiment service.

Stores sentiment records and computes the sentiment dimension score
using a time-decay weighted average over a configurable lookback window.
"""

from __future__ import annotations

import json
import math
from datetime import datetime, timedelta, timezone

from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from app.exceptions import NotFoundError
from app.models.sentiment import SentimentScore
from app.models.ticker import Ticker


async def _get_ticker(db: AsyncSession, symbol: str) -> Ticker:
    """Look up a ticker by symbol."""
    normalised = symbol.strip().upper()
    result = await db.execute(select(Ticker).where(Ticker.symbol == normalised))
    ticker = result.scalar_one_or_none()
    if ticker is None:
        raise NotFoundError(f"Ticker not found: {normalised}")
    return ticker


async def store_sentiment(
    db: AsyncSession,
    symbol: str,
    classification: str,
    confidence: int,
    source: str,
    timestamp: datetime | None = None,
    reasoning: str = "",
    citations: list[dict] | None = None,
    recommendation: str | None = None,
) -> SentimentScore:
    """Store a new sentiment record for a ticker."""
    ticker = await _get_ticker(db, symbol)

    if timestamp is None:
        timestamp = datetime.now(timezone.utc)

    if citations is None:
        citations = []

    record = SentimentScore(
        ticker_id=ticker.id,
        classification=classification,
        confidence=confidence,
        source=source,
        timestamp=timestamp,
        reasoning=reasoning,
        citations_json=json.dumps(citations),
        recommendation=recommendation,
    )
    db.add(record)
    await db.commit()
    await db.refresh(record)
    return record


async def get_sentiment_scores(
    db: AsyncSession,
    symbol: str,
    lookback_hours: float = 24,
) -> list[SentimentScore]:
    """Get recent sentiment records within the lookback window."""
    ticker = await _get_ticker(db, symbol)
    cutoff = datetime.now(timezone.utc) - timedelta(hours=lookback_hours)

    result = await db.execute(
        select(SentimentScore)
        .where(
            SentimentScore.ticker_id == ticker.id,
            SentimentScore.timestamp >= cutoff,
        )
        .order_by(SentimentScore.timestamp.desc())
    )
    return list(result.scalars().all())


def _classification_to_base_score(classification: str, confidence: int) -> float:
    """Map classification + confidence to a base score (0-100).

    bullish  → confidence (high confidence = high score)
    bearish  → 100 - confidence (high confidence bearish = low score)
    neutral  → 50
    """
    cl = classification.lower()
    if cl == "bullish":
        return float(confidence)
    elif cl == "bearish":
        return float(100 - confidence)
    else:
        return 50.0


async def compute_sentiment_dimension_score(
    db: AsyncSession,
    symbol: str,
    lookback_hours: float = 24,
    decay_rate: float = 0.1,
) -> float | None:
    """Compute the sentiment dimension score using time-decay weighted average.

    Returns a score in [0, 100] or None if no scores exist in the window.

    Algorithm:
      1. For each score in the lookback window, compute base_score from
         classification + confidence.
      2. Apply time decay: weight = exp(-decay_rate * hours_since_score).
      3. Weighted average: sum(base_score * weight) / sum(weight).
    """
    scores = await get_sentiment_scores(db, symbol, lookback_hours)
    if not scores:
        return None

    now = datetime.now(timezone.utc)
    weighted_sum = 0.0
    weight_total = 0.0

    for score in scores:
        ts = score.timestamp
        if ts.tzinfo is None:
            ts = ts.replace(tzinfo=timezone.utc)
        hours_since = (now - ts).total_seconds() / 3600.0
        weight = math.exp(-decay_rate * hours_since)
        base = _classification_to_base_score(score.classification, score.confidence)
        weighted_sum += base * weight
        weight_total += weight

    if weight_total == 0:
        return None

    result = weighted_sum / weight_total
    return max(0.0, min(100.0, result))