132 lines
3.8 KiB
Python
132 lines
3.8 KiB
Python
"""Sentiment service.
|
|
|
|
Stores sentiment records and computes the sentiment dimension score
|
|
using a time-decay weighted average over a configurable lookback window.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import math
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.exceptions import NotFoundError
|
|
from app.models.sentiment import SentimentScore
|
|
from app.models.ticker import Ticker
|
|
|
|
|
|
async def _get_ticker(db: AsyncSession, symbol: str) -> Ticker:
|
|
"""Look up a ticker by symbol."""
|
|
normalised = symbol.strip().upper()
|
|
result = await db.execute(select(Ticker).where(Ticker.symbol == normalised))
|
|
ticker = result.scalar_one_or_none()
|
|
if ticker is None:
|
|
raise NotFoundError(f"Ticker not found: {normalised}")
|
|
return ticker
|
|
|
|
|
|
async def store_sentiment(
|
|
db: AsyncSession,
|
|
symbol: str,
|
|
classification: str,
|
|
confidence: int,
|
|
source: str,
|
|
timestamp: datetime | None = None,
|
|
) -> SentimentScore:
|
|
"""Store a new sentiment record for a ticker."""
|
|
ticker = await _get_ticker(db, symbol)
|
|
|
|
if timestamp is None:
|
|
timestamp = datetime.now(timezone.utc)
|
|
|
|
record = SentimentScore(
|
|
ticker_id=ticker.id,
|
|
classification=classification,
|
|
confidence=confidence,
|
|
source=source,
|
|
timestamp=timestamp,
|
|
)
|
|
db.add(record)
|
|
await db.commit()
|
|
await db.refresh(record)
|
|
return record
|
|
|
|
|
|
async def get_sentiment_scores(
|
|
db: AsyncSession,
|
|
symbol: str,
|
|
lookback_hours: float = 24,
|
|
) -> list[SentimentScore]:
|
|
"""Get recent sentiment records within the lookback window."""
|
|
ticker = await _get_ticker(db, symbol)
|
|
cutoff = datetime.now(timezone.utc) - timedelta(hours=lookback_hours)
|
|
|
|
result = await db.execute(
|
|
select(SentimentScore)
|
|
.where(
|
|
SentimentScore.ticker_id == ticker.id,
|
|
SentimentScore.timestamp >= cutoff,
|
|
)
|
|
.order_by(SentimentScore.timestamp.desc())
|
|
)
|
|
return list(result.scalars().all())
|
|
|
|
|
|
def _classification_to_base_score(classification: str, confidence: int) -> float:
|
|
"""Map classification + confidence to a base score (0-100).
|
|
|
|
bullish → confidence (high confidence = high score)
|
|
bearish → 100 - confidence (high confidence bearish = low score)
|
|
neutral → 50
|
|
"""
|
|
cl = classification.lower()
|
|
if cl == "bullish":
|
|
return float(confidence)
|
|
elif cl == "bearish":
|
|
return float(100 - confidence)
|
|
else:
|
|
return 50.0
|
|
|
|
|
|
async def compute_sentiment_dimension_score(
|
|
db: AsyncSession,
|
|
symbol: str,
|
|
lookback_hours: float = 24,
|
|
decay_rate: float = 0.1,
|
|
) -> float | None:
|
|
"""Compute the sentiment dimension score using time-decay weighted average.
|
|
|
|
Returns a score in [0, 100] or None if no scores exist in the window.
|
|
|
|
Algorithm:
|
|
1. For each score in the lookback window, compute base_score from
|
|
classification + confidence.
|
|
2. Apply time decay: weight = exp(-decay_rate * hours_since_score).
|
|
3. Weighted average: sum(base_score * weight) / sum(weight).
|
|
"""
|
|
scores = await get_sentiment_scores(db, symbol, lookback_hours)
|
|
if not scores:
|
|
return None
|
|
|
|
now = datetime.now(timezone.utc)
|
|
weighted_sum = 0.0
|
|
weight_total = 0.0
|
|
|
|
for score in scores:
|
|
ts = score.timestamp
|
|
if ts.tzinfo is None:
|
|
ts = ts.replace(tzinfo=timezone.utc)
|
|
hours_since = (now - ts).total_seconds() / 3600.0
|
|
weight = math.exp(-decay_rate * hours_since)
|
|
base = _classification_to_base_score(score.classification, score.confidence)
|
|
weighted_sum += base * weight
|
|
weight_total += weight
|
|
|
|
if weight_total == 0:
|
|
return None
|
|
|
|
result = weighted_sum / weight_total
|
|
return max(0.0, min(100.0, result))
|