Fix scoring/recommendation correctness and calibration

Triggered by CNC showing "LONG (High Confidence)" with SHORT reasoning and no long setup. - A: recommendation action + reasoning are ticker-level and identical on both setups; reasoning always matches the shown action - B: recommended_action only picks a direction with a tradeable setup; strong bias with no setup (e.g. price at ATH) → NEUTRAL with an explanatory reason instead of a fake LONG_HIGH - C: confidence is a directional-agreement model — opposing signals push it below 50 (SHORT on a 92-technical/99-momentum stock ~0%, not 55%) - D: fundamental score requires >=2 real metrics (market-cap-only no longer yields a high score) - E: RSI score peaks at healthy momentum (~60) and penalizes overbought/oversold extremes instead of treating RSI 90 as maximal - F: fundamentals chain merges fields across providers (FMP market cap + Finnhub P/E) instead of stopping at the first with any field - NEUTRAL label: "No Clear Setup" (covers untradeable-bias case) Scores recompute on next scan/scoring run; C and E shift score distributions intentionally. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-13 15:34:36 +02:00
parent ffb609d38f
commit d3eb8a2b97
9 changed files with 269 additions and 108 deletions
@@ -202,8 +202,18 @@ class AlphaVantageFundamentalProvider:
        )


+_FUNDAMENTAL_FIELDS = ("pe_ratio", "revenue_growth", "earnings_surprise", "market_cap")
+
+
 class ChainedFundamentalProvider:
-    """Try multiple fundamental providers in order until one succeeds."""
+    """Merge fundamentals across providers, filling gaps from later sources.
+
+    A single provider rarely covers everything on free tiers — FMP's free plan,
+    for example, returns only market cap (the ratios/growth/earnings endpoints
+    402). Rather than stop at the first provider with *any* field, we take each
+    field from the first provider that supplies it, so FMP's market cap is
+    combined with Finnhub's P/E and earnings surprise.
+    """

    def __init__(self, providers: list[tuple[str, FundamentalProvider]]) -> None:
        if not providers:
@@ -211,38 +221,49 @@ class ChainedFundamentalProvider:
        self._providers = providers

    async def fetch_fundamentals(self, ticker: str) -> FundamentalData:
+        merged: dict[str, float | None] = {f: None for f in _FUNDAMENTAL_FIELDS}
+        field_source: dict[str, str] = {}
        errors: list[str] = []

        for provider_name, provider in self._providers:
+            if all(merged[f] is not None for f in _FUNDAMENTAL_FIELDS):
+                break
            try:
                data = await provider.fetch_fundamentals(ticker)
-
-                has_any_metric = any(
-                    value is not None
-                    for value in (data.pe_ratio, data.revenue_growth, data.earnings_surprise, data.market_cap)
-                )
-                if not has_any_metric:
-                    errors.append(f"{provider_name}: no usable metrics returned")
-                    continue
-
-                unavailable = dict(data.unavailable_fields)
-                unavailable["provider"] = provider_name
-
-                return FundamentalData(
-                    ticker=data.ticker,
-                    pe_ratio=data.pe_ratio,
-                    revenue_growth=data.revenue_growth,
-                    earnings_surprise=data.earnings_surprise,
-                    market_cap=data.market_cap,
-                    fetched_at=data.fetched_at,
-                    unavailable_fields=unavailable,
-                )
            except Exception as exc:
                errors.append(f"{provider_name}: {type(exc).__name__}: {exc}")
+                continue

-        attempts = "; ".join(errors[:6]) if errors else "no provider attempts"
+            for field in _FUNDAMENTAL_FIELDS:
+                if merged[field] is None:
+                    value = getattr(data, field)
+                    if value is not None:
+                        merged[field] = value
+                        field_source[field] = provider_name
+
+        if all(merged[f] is None for f in _FUNDAMENTAL_FIELDS):
+            attempts = "; ".join(errors[:6]) if errors else "no usable metrics from any provider"
            raise ProviderError(f"All fundamentals providers failed for {ticker}. Attempts: {attempts}")

+        unavailable: dict[str, str] = {
+            field: "not available from any configured provider"
+            for field in _FUNDAMENTAL_FIELDS
+            if merged[field] is None
+        }
+        # Record which provider supplied each field for transparency.
+        for field, src in field_source.items():
+            unavailable[f"source_{field}"] = src
+
+        return FundamentalData(
+            ticker=ticker,
+            pe_ratio=merged["pe_ratio"],
+            revenue_growth=merged["revenue_growth"],
+            earnings_surprise=merged["earnings_surprise"],
+            market_cap=merged["market_cap"],
+            fetched_at=datetime.now(timezone.utc),
+            unavailable_fields=unavailable,
+        )
+

 def build_fundamental_provider_chain() -> FundamentalProvider:
    providers: list[tuple[str, FundamentalProvider]] = []
@@ -156,11 +156,27 @@ def compute_ema(
    }


+def _rsi_to_score(rsi: float) -> float:
+    """Map RSI to a 'healthy momentum' score, penalizing extremes.
+
+    Raw RSI as a score treats 90 as maximally bullish, but RSI 90 is extreme
+    overbought (exhaustion/reversal risk), not a green light. This peaks around
+    RSI 60 (healthy uptrend) and falls off toward both ends, with the overbought
+    side penalized harder than oversold (oversold can mean-revert upward).
+    """
+    peak = 60.0
+    if rsi <= peak:
+        score = 90.0 - (peak - rsi) * 0.9   # 60→90, 30→63, 0→36
+    else:
+        score = 90.0 - (rsi - peak) * 1.6   # 60→90, 80→58, 90→42, 100→26
+    return max(0.0, min(100.0, score))
+
+
 def compute_rsi(
    closes: list[float],
    period: int = 14,
 ) -> dict[str, Any]:
-    """Compute RSI. Score = RSI value (already 0-100)."""
+    """Compute RSI. Score is a peaked mapping (see _rsi_to_score), not raw RSI."""
    n = len(closes)
    if n < period + 1:
        raise ValidationError(
@@ -184,12 +200,10 @@ def compute_rsi(
        rs = avg_gain / avg_loss
        rsi = 100.0 - 100.0 / (1.0 + rs)

-    score = max(0.0, min(100.0, rsi))
-
    return {
        "rsi": round(rsi, 4),
        "period": period,
-        "score": round(score, 4),
+        "score": round(_rsi_to_score(rsi), 4),
    }


@@ -127,58 +127,45 @@ class DirectionAnalyzer:
        sentiment_classification: str | None,
        conflicts: list[str] | None = None,
    ) -> float:
-        confidence = 50.0
+        """Directional-agreement confidence around a 50 baseline.
+
+        Each dimension contributes in proportion to how strongly it agrees
+        with the proposed direction: a bullish dimension RAISES long confidence
+        and LOWERS short confidence (and vice-versa). Signals that oppose the
+        direction push confidence below 50 — so a short on a strongly bullish
+        stock scores near zero, not 55.
+        """
        technical = float(dimension_scores.get("technical", 50.0))
        momentum = float(dimension_scores.get("momentum", 50.0))
        fundamental = float(dimension_scores.get("fundamental", 50.0))
        sentiment = _sentiment_value(sentiment_classification)
+        dir_sign = 1.0 if direction == "long" else -1.0

-        if direction == "long":
-            if technical > 70:
-                confidence += 25.0
-            elif technical > 60:
-                confidence += 15.0
+        def agree(score: float) -> float:
+            # -1 (fully against) .. +1 (fully for) the proposed direction
+            return ((score - 50.0) / 50.0) * dir_sign

-            if momentum > 70:
-                confidence += 20.0
-            elif momentum > 60:
-                confidence += 15.0
+        sentiment_val = {"bullish": 1.0, "bearish": -1.0}.get(sentiment or "", 0.0)
+        sentiment_agree = sentiment_val * dir_sign

-            if sentiment == "bullish":
-                confidence += 15.0
-            elif sentiment == "neutral":
-                confidence += 5.0
-
-            if fundamental > 60:
-                confidence += 10.0
-        else:
-            if technical < 30:
-                confidence += 25.0
-            elif technical < 40:
-                confidence += 15.0
-
-            if momentum < 30:
-                confidence += 20.0
-            elif momentum < 40:
-                confidence += 15.0
-
-            if sentiment == "bearish":
-                confidence += 15.0
-            elif sentiment == "neutral":
-                confidence += 5.0
-
-            if fundamental < 40:
-                confidence += 10.0
+        confidence = 50.0 + (
+            agree(technical) * 25.0
+            + agree(momentum) * 20.0
+            + sentiment_agree * 15.0
+            + agree(fundamental) * 10.0
+        )

+        # Explicit conflict patterns trim a little more (the agreement terms
+        # already capture most disagreement, so penalties are modest).
        for conflict in conflicts or []:
            if "sentiment-technical" in conflict:
-                confidence -= 20.0
+                confidence -= 12.0
            elif "momentum-technical" in conflict:
-                confidence -= 15.0
-            elif "sentiment-momentum" in conflict:
-                confidence -= 20.0
-            elif "fundamental-technical" in conflict:
                confidence -= 10.0
+            elif "sentiment-momentum" in conflict:
+                confidence -= 12.0
+            elif "fundamental-technical" in conflict:
+                confidence -= 6.0

        return _clamp(confidence, 0.0, 100.0)

@@ -377,53 +364,83 @@ def _choose_recommended_action(
    long_confidence: float,
    short_confidence: float,
    config: dict[str, float],
+    available_directions: set[str] | None = None,
 ) -> str:
+    """Pick the ticker action — but only recommend a direction you can trade.
+
+    A direction is recommendable only if a tradeable setup exists for it
+    (``available_directions``). So a strong LONG bias on a stock at all-time
+    highs — where the scanner can build no long target — does NOT yield
+    LONG_HIGH; it falls through to NEUTRAL, and the reasoning explains why.
+    """
    high = float(config.get("recommendation_high_confidence_threshold", 70.0))
    moderate = float(config.get("recommendation_moderate_confidence_threshold", 50.0))
    diff = float(config.get("recommendation_confidence_diff_threshold", 20.0))

-    if long_confidence >= high and (long_confidence - short_confidence) >= diff:
+    long_ok = available_directions is None or "long" in available_directions
+    short_ok = available_directions is None or "short" in available_directions
+
+    if long_ok and long_confidence >= high and (long_confidence - short_confidence) >= diff:
        return "LONG_HIGH"
-    if short_confidence >= high and (short_confidence - long_confidence) >= diff:
+    if short_ok and short_confidence >= high and (short_confidence - long_confidence) >= diff:
        return "SHORT_HIGH"
-    if long_confidence >= moderate and (long_confidence - short_confidence) >= diff:
+    if long_ok and long_confidence >= moderate and (long_confidence - short_confidence) >= diff:
        return "LONG_MODERATE"
-    if short_confidence >= moderate and (short_confidence - long_confidence) >= diff:
+    if short_ok and short_confidence >= moderate and (short_confidence - long_confidence) >= diff:
        return "SHORT_MODERATE"
    return "NEUTRAL"


 def _build_reasoning(
-    direction: str,
-    confidence: float,
+    action: str,
+    long_confidence: float,
+    short_confidence: float,
    conflicts: list[str],
    dimension_scores: dict[str, float],
    sentiment_classification: str | None,
-    action: str,
+    config: dict[str, float],
+    available_directions: set[str] | None = None,
 ) -> str:
-    aligned, alignment_text = check_signal_alignment(
-        direction,
-        dimension_scores,
-        sentiment_classification,
-    )
+    """Ticker-level reasoning that always matches the recommended action.
+
+    Stored identically on both setups so the displayed summary can never mix a
+    SHORT setup's reasoning under a LONG action.
+    """
    sentiment = _sentiment_value(sentiment_classification) or "unknown"
    technical = float(dimension_scores.get("technical", 50.0))
    momentum = float(dimension_scores.get("momentum", 50.0))
+    signals = f"technical={technical:.0f}, momentum={momentum:.0f}, sentiment={sentiment}"
+    conflict_note = f" {len(conflicts)} conflict(s) detected, risk-adjusted." if conflicts else ""

-    direction_text = direction.upper()
-    alignment_summary = "aligned" if aligned else "mixed"
-    base = (
-        f"{direction_text} confidence {confidence:.1f}% with {alignment_summary} signals "
-        f"(technical={technical:.0f}, momentum={momentum:.0f}, sentiment={sentiment})."
-    )
-
-    if conflicts:
+    if action != "NEUTRAL":
+        direction = "long" if action.startswith("LONG") else "short"
+        tier = "high" if action.endswith("HIGH") else "moderate"
+        confidence = long_confidence if direction == "long" else short_confidence
+        aligned, _ = check_signal_alignment(direction, dimension_scores, sentiment_classification)
        return (
-            f"{base} {alignment_text} Detected {len(conflicts)} conflict(s), "
-            f"so recommendation is risk-adjusted. Action={action}."
+            f"{direction.upper()} ({tier} confidence): {confidence:.0f}% with "
+            f"{'aligned' if aligned else 'mixed'} signals ({signals}).{conflict_note}"
        )

-    return f"{base} {alignment_text} No major conflicts detected. Action={action}."
+    # NEUTRAL — explain whether it's a missing setup or genuinely mixed signals.
+    moderate = float(config.get("recommendation_moderate_confidence_threshold", 50.0))
+    avail = available_directions if available_directions is not None else {"long", "short"}
+    bias_dir = "long" if long_confidence >= short_confidence else "short"
+    bias_conf = max(long_confidence, short_confidence)
+
+    if bias_conf >= moderate and bias_dir not in avail:
+        other = "short" if bias_dir == "long" else "long"
+        extreme = "highs (no resistance target above)" if bias_dir == "long" else "lows (no support target below)"
+        return (
+            f"Ticker bias is {bias_dir.upper()} (confidence {bias_conf:.0f}%, {signals}) but price is "
+            f"extended near {extreme}, so no high-conviction {bias_dir} setup is available. "
+            f"The available {other.upper()} setup is counter-trend.{conflict_note}"
+        )
+
+    return (
+        f"No high-conviction setup: LONG {long_confidence:.0f}%, SHORT {short_confidence:.0f}% "
+        f"({signals}).{conflict_note}"
+    )


 async def enhance_trade_setup(
@@ -434,6 +451,7 @@ async def enhance_trade_setup(
    sr_levels: list[SRLevel],
    sentiment_classification: str | None,
    atr_value: float,
+    available_directions: set[str] | None = None,
 ) -> TradeSetup:
    config = await get_recommendation_config(db)

@@ -476,24 +494,32 @@ async def enhance_trade_setup(
            config=config,
        )

+    # Per-setup conflicts (target availability is specific to this setup)
+    setup_conflicts = list(conflicts)
    if len(targets) < 3:
-        conflicts = [*conflicts, "target-availability: Fewer than 3 valid S/R targets available"]
+        setup_conflicts.append("target-availability: Fewer than 3 valid S/R targets available")

-    action = _choose_recommended_action(long_confidence, short_confidence, config)
-    risk_level = _risk_level_from_conflicts(conflicts)
-
-    setup.confidence_score = round(confidence, 2)
-    setup.targets_json = json.dumps(targets)
-    setup.conflict_flags_json = json.dumps(conflicts)
-    setup.recommended_action = action
-    setup.reasoning = _build_reasoning(
-        direction=direction,
-        confidence=confidence,
+    # Action and reasoning are ticker-level: they consider both directions and
+    # which directions are actually tradeable, and are identical on every setup.
+    action = _choose_recommended_action(
+        long_confidence, short_confidence, config, available_directions
+    )
+    reasoning = _build_reasoning(
+        action=action,
+        long_confidence=long_confidence,
+        short_confidence=short_confidence,
        conflicts=conflicts,
        dimension_scores=dimension_scores,
        sentiment_classification=sentiment_classification,
-        action=action,
+        config=config,
+        available_directions=available_directions,
    )
-    setup.risk_level = risk_level
+
+    setup.confidence_score = round(confidence, 2)
+    setup.targets_json = json.dumps(targets)
+    setup.conflict_flags_json = json.dumps(setup_conflicts)
+    setup.recommended_action = action
+    setup.reasoning = reasoning
+    setup.risk_level = _risk_level_from_conflicts(setup_conflicts)

    return setup
@@ -202,6 +202,7 @@ async def scan_ticker(
                    detected_at=now,
                ))

+    available_directions = {s.direction for s in setups}
    enhanced_setups: list[TradeSetup] = []
    for setup in setups:
        try:
@@ -213,6 +214,7 @@ async def scan_ticker(
                sr_levels=sr_levels,
                sentiment_classification=sentiment_classification,
                atr_value=atr_value,
+                available_directions=available_directions,
            )
            enhanced_setups.append(enhanced)
        except Exception:
@@ -482,13 +482,22 @@ async def _compute_fundamental_score(
            "reason": "Earnings surprise data not available",
        })

+    # Require at least two real metrics — a single available metric (e.g. only
+    # market cap is free on FMP) does not make a meaningful fundamental score.
+    MIN_METRICS = 2
+    if len(scores) < MIN_METRICS:
+        unavailable.append({
+            "name": "insufficient_metrics",
+            "reason": f"Only {len(scores)} fundamental metric(s) available; need {MIN_METRICS}+ to score.",
+        })
+
    breakdown: dict = {
        "sub_scores": sub_scores,
        "formula": formula,
        "unavailable": unavailable,
    }

-    if not scores:
+    if len(scores) < MIN_METRICS:
        return None, breakdown

    return sum(scores) / len(scores), breakdown
@@ -7,7 +7,7 @@ export const RECOMMENDATION_ACTION_LABELS: Record<RecommendationAction, string>
  LONG_MODERATE: 'LONG (Moderate Confidence)',
  SHORT_HIGH: 'SHORT (High Confidence)',
  SHORT_MODERATE: 'SHORT (Moderate Confidence)',
-  NEUTRAL: 'NEUTRAL (Conflicting Signals)',
+  NEUTRAL: 'NEUTRAL (No Clear Setup)',
 };

 export const RECOMMENDATION_ACTION_GLOSSARY: Array<{ action: RecommendationAction; description: string }> = [
@@ -29,7 +29,7 @@ export const RECOMMENDATION_ACTION_GLOSSARY: Array<{ action: RecommendationActio
  },
  {
    action: 'NEUTRAL',
-    description: 'No strong directional edge. Signals are mixed or confidence gap is too small.',
+    description: 'No actionable setup — either signals are mixed, or the favored direction has no tradeable setup (e.g. price extended with no target). See the reasoning for which.',
  },
 ];

@@ -56,7 +56,35 @@ async def test_chained_provider_uses_fallback_provider_on_primary_failure():

    assert result.pe_ratio == 25.0
    assert result.market_cap == 1_000_000.0
-    assert result.unavailable_fields.get("provider") == "fallback"
+    assert result.unavailable_fields.get("source_pe_ratio") == "fallback"
+
+
+@pytest.mark.asyncio
+async def test_chained_provider_merges_fields_across_providers():
+    """Primary supplies only market cap; fallback fills P/E and earnings."""
+    primary_data = FundamentalData(
+        ticker="AAPL", pe_ratio=None, revenue_growth=None, earnings_surprise=None,
+        market_cap=2_000_000.0, fetched_at=datetime.now(timezone.utc), unavailable_fields={},
+    )
+    fallback_data = FundamentalData(
+        ticker="AAPL", pe_ratio=18.0, revenue_growth=12.0, earnings_surprise=4.0,
+        market_cap=999.0, fetched_at=datetime.now(timezone.utc), unavailable_fields={},
+    )
+
+    provider = ChainedFundamentalProvider([
+        ("fmp", _DataProvider(primary_data)),
+        ("finnhub", _DataProvider(fallback_data)),
+    ])
+
+    result = await provider.fetch_fundamentals("AAPL")
+
+    # market cap from primary (first to supply it), the rest from fallback
+    assert result.market_cap == 2_000_000.0
+    assert result.pe_ratio == 18.0
+    assert result.revenue_growth == 12.0
+    assert result.earnings_surprise == 4.0
+    assert result.unavailable_fields.get("source_market_cap") == "fmp"
+    assert result.unavailable_fields.get("source_pe_ratio") == "finnhub"


@pytest.mark.asyncio
@@ -89,6 +89,17 @@ class TestComputeRSI:
        with pytest.raises(ValidationError, match="RSI requires"):
            compute_rsi([100.0] * 5)

+    def test_overbought_rsi_is_penalized_not_maximal(self):
+        """RSI 100 (extreme overbought) must NOT score near 100."""
+        from app.services.indicator_service import _rsi_to_score
+
+        assert _rsi_to_score(100.0) < 40.0   # overbought penalized
+        assert _rsi_to_score(90.0) < _rsi_to_score(60.0)   # extreme < healthy
+        assert _rsi_to_score(60.0) > 80.0    # healthy momentum rewarded
+        # All gains → RSI 100 → low score, not 100
+        result = compute_rsi(_rising_closes(20, step=1))
+        assert result["score"] < 40.0
+

 # ---------------------------------------------------------------------------
 # ATR
@@ -3,12 +3,20 @@ from __future__ import annotations
 from dataclasses import dataclass

 from app.services.recommendation_service import (
+    _build_reasoning,
+    _choose_recommended_action,
    direction_analyzer,
    probability_estimator,
    signal_conflict_detector,
    target_generator,
 )

+_DEFAULT_CFG = {
+    "recommendation_high_confidence_threshold": 70.0,
+    "recommendation_moderate_confidence_threshold": 50.0,
+    "recommendation_confidence_diff_threshold": 20.0,
+}
+

@dataclass
 class _SRLevelStub:
@@ -52,6 +60,48 @@ def test_high_confidence_short_example():
    assert confidence > 70.0


+def test_short_confidence_low_on_strongly_bullish_stock():
+    """The CNC case: technical 92 / momentum 99 must make SHORT confidence low."""
+    dims = {"technical": 92.0, "momentum": 99.0, "fundamental": 96.0}
+
+    short_conf = direction_analyzer.calculate_confidence(
+        direction="short", dimension_scores=dims, sentiment_classification="neutral", conflicts=[],
+    )
+    long_conf = direction_analyzer.calculate_confidence(
+        direction="long", dimension_scores=dims, sentiment_classification="neutral", conflicts=[],
+    )
+
+    assert short_conf < 20.0   # not 55
+    assert long_conf > 90.0
+
+
+def test_action_neutral_when_bias_direction_has_no_setup():
+    """Strong LONG bias but only a SHORT setup is tradeable → NEUTRAL, not LONG_HIGH."""
+    action = _choose_recommended_action(
+        long_confidence=100.0, short_confidence=5.0, config=_DEFAULT_CFG,
+        available_directions={"short"},
+    )
+    assert action == "NEUTRAL"
+
+    # With the long setup available, the same numbers give LONG_HIGH
+    action_ok = _choose_recommended_action(
+        long_confidence=100.0, short_confidence=5.0, config=_DEFAULT_CFG,
+        available_directions={"long", "short"},
+    )
+    assert action_ok == "LONG_HIGH"
+
+
+def test_reasoning_explains_missing_setup():
+    reasoning = _build_reasoning(
+        action="NEUTRAL", long_confidence=100.0, short_confidence=5.0, conflicts=[],
+        dimension_scores={"technical": 92.0, "momentum": 99.0},
+        sentiment_classification="neutral", config=_DEFAULT_CFG,
+        available_directions={"short"},
+    )
+    assert "bias is LONG" in reasoning
+    assert "no high-conviction long setup" in reasoning.lower()
+
+
 def test_detects_sentiment_technical_conflict():
    conflicts = signal_conflict_detector.detect_conflicts(
        dimension_scores={"technical": 72.0, "momentum": 55.0, "fundamental": 50.0},