From d3eb8a2b97b24946662e52252b698cf3e72bf8fa Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Sat, 13 Jun 2026 15:34:36 +0200 Subject: [PATCH] Fix scoring/recommendation correctness and calibration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Triggered by CNC showing "LONG (High Confidence)" with SHORT reasoning and no long setup. - A: recommendation action + reasoning are ticker-level and identical on both setups; reasoning always matches the shown action - B: recommended_action only picks a direction with a tradeable setup; strong bias with no setup (e.g. price at ATH) → NEUTRAL with an explanatory reason instead of a fake LONG_HIGH - C: confidence is a directional-agreement model — opposing signals push it below 50 (SHORT on a 92-technical/99-momentum stock ~0%, not 55%) - D: fundamental score requires >=2 real metrics (market-cap-only no longer yields a high score) - E: RSI score peaks at healthy momentum (~60) and penalizes overbought/oversold extremes instead of treating RSI 90 as maximal - F: fundamentals chain merges fields across providers (FMP market cap + Finnhub P/E) instead of stopping at the first with any field - NEUTRAL label: "No Clear Setup" (covers untradeable-bias case) Scores recompute on next scan/scoring run; C and E shift score distributions intentionally. Co-Authored-By: Claude Fable 5 --- app/providers/fundamentals_chain.py | 69 ++++--- app/services/indicator_service.py | 22 ++- app/services/recommendation_service.py | 178 ++++++++++-------- app/services/rr_scanner_service.py | 2 + app/services/scoring_service.py | 11 +- frontend/src/lib/recommendation.ts | 4 +- .../unit/test_fundamentals_chain_provider.py | 30 ++- tests/unit/test_indicator_service.py | 11 ++ tests/unit/test_recommendation_service.py | 50 +++++ 9 files changed, 269 insertions(+), 108 deletions(-) diff --git a/app/providers/fundamentals_chain.py b/app/providers/fundamentals_chain.py index bb512be..9d78114 100644 --- a/app/providers/fundamentals_chain.py +++ b/app/providers/fundamentals_chain.py @@ -202,8 +202,18 @@ class AlphaVantageFundamentalProvider: ) +_FUNDAMENTAL_FIELDS = ("pe_ratio", "revenue_growth", "earnings_surprise", "market_cap") + + class ChainedFundamentalProvider: - """Try multiple fundamental providers in order until one succeeds.""" + """Merge fundamentals across providers, filling gaps from later sources. + + A single provider rarely covers everything on free tiers — FMP's free plan, + for example, returns only market cap (the ratios/growth/earnings endpoints + 402). Rather than stop at the first provider with *any* field, we take each + field from the first provider that supplies it, so FMP's market cap is + combined with Finnhub's P/E and earnings surprise. + """ def __init__(self, providers: list[tuple[str, FundamentalProvider]]) -> None: if not providers: @@ -211,37 +221,48 @@ class ChainedFundamentalProvider: self._providers = providers async def fetch_fundamentals(self, ticker: str) -> FundamentalData: + merged: dict[str, float | None] = {f: None for f in _FUNDAMENTAL_FIELDS} + field_source: dict[str, str] = {} errors: list[str] = [] for provider_name, provider in self._providers: + if all(merged[f] is not None for f in _FUNDAMENTAL_FIELDS): + break try: data = await provider.fetch_fundamentals(ticker) - - has_any_metric = any( - value is not None - for value in (data.pe_ratio, data.revenue_growth, data.earnings_surprise, data.market_cap) - ) - if not has_any_metric: - errors.append(f"{provider_name}: no usable metrics returned") - continue - - unavailable = dict(data.unavailable_fields) - unavailable["provider"] = provider_name - - return FundamentalData( - ticker=data.ticker, - pe_ratio=data.pe_ratio, - revenue_growth=data.revenue_growth, - earnings_surprise=data.earnings_surprise, - market_cap=data.market_cap, - fetched_at=data.fetched_at, - unavailable_fields=unavailable, - ) except Exception as exc: errors.append(f"{provider_name}: {type(exc).__name__}: {exc}") + continue - attempts = "; ".join(errors[:6]) if errors else "no provider attempts" - raise ProviderError(f"All fundamentals providers failed for {ticker}. Attempts: {attempts}") + for field in _FUNDAMENTAL_FIELDS: + if merged[field] is None: + value = getattr(data, field) + if value is not None: + merged[field] = value + field_source[field] = provider_name + + if all(merged[f] is None for f in _FUNDAMENTAL_FIELDS): + attempts = "; ".join(errors[:6]) if errors else "no usable metrics from any provider" + raise ProviderError(f"All fundamentals providers failed for {ticker}. Attempts: {attempts}") + + unavailable: dict[str, str] = { + field: "not available from any configured provider" + for field in _FUNDAMENTAL_FIELDS + if merged[field] is None + } + # Record which provider supplied each field for transparency. + for field, src in field_source.items(): + unavailable[f"source_{field}"] = src + + return FundamentalData( + ticker=ticker, + pe_ratio=merged["pe_ratio"], + revenue_growth=merged["revenue_growth"], + earnings_surprise=merged["earnings_surprise"], + market_cap=merged["market_cap"], + fetched_at=datetime.now(timezone.utc), + unavailable_fields=unavailable, + ) def build_fundamental_provider_chain() -> FundamentalProvider: diff --git a/app/services/indicator_service.py b/app/services/indicator_service.py index ab5a2e8..2e73e66 100644 --- a/app/services/indicator_service.py +++ b/app/services/indicator_service.py @@ -156,11 +156,27 @@ def compute_ema( } +def _rsi_to_score(rsi: float) -> float: + """Map RSI to a 'healthy momentum' score, penalizing extremes. + + Raw RSI as a score treats 90 as maximally bullish, but RSI 90 is extreme + overbought (exhaustion/reversal risk), not a green light. This peaks around + RSI 60 (healthy uptrend) and falls off toward both ends, with the overbought + side penalized harder than oversold (oversold can mean-revert upward). + """ + peak = 60.0 + if rsi <= peak: + score = 90.0 - (peak - rsi) * 0.9 # 60→90, 30→63, 0→36 + else: + score = 90.0 - (rsi - peak) * 1.6 # 60→90, 80→58, 90→42, 100→26 + return max(0.0, min(100.0, score)) + + def compute_rsi( closes: list[float], period: int = 14, ) -> dict[str, Any]: - """Compute RSI. Score = RSI value (already 0-100).""" + """Compute RSI. Score is a peaked mapping (see _rsi_to_score), not raw RSI.""" n = len(closes) if n < period + 1: raise ValidationError( @@ -184,12 +200,10 @@ def compute_rsi( rs = avg_gain / avg_loss rsi = 100.0 - 100.0 / (1.0 + rs) - score = max(0.0, min(100.0, rsi)) - return { "rsi": round(rsi, 4), "period": period, - "score": round(score, 4), + "score": round(_rsi_to_score(rsi), 4), } diff --git a/app/services/recommendation_service.py b/app/services/recommendation_service.py index 5068199..7d4bfad 100644 --- a/app/services/recommendation_service.py +++ b/app/services/recommendation_service.py @@ -127,58 +127,45 @@ class DirectionAnalyzer: sentiment_classification: str | None, conflicts: list[str] | None = None, ) -> float: - confidence = 50.0 + """Directional-agreement confidence around a 50 baseline. + + Each dimension contributes in proportion to how strongly it agrees + with the proposed direction: a bullish dimension RAISES long confidence + and LOWERS short confidence (and vice-versa). Signals that oppose the + direction push confidence below 50 — so a short on a strongly bullish + stock scores near zero, not 55. + """ technical = float(dimension_scores.get("technical", 50.0)) momentum = float(dimension_scores.get("momentum", 50.0)) fundamental = float(dimension_scores.get("fundamental", 50.0)) sentiment = _sentiment_value(sentiment_classification) + dir_sign = 1.0 if direction == "long" else -1.0 - if direction == "long": - if technical > 70: - confidence += 25.0 - elif technical > 60: - confidence += 15.0 + def agree(score: float) -> float: + # -1 (fully against) .. +1 (fully for) the proposed direction + return ((score - 50.0) / 50.0) * dir_sign - if momentum > 70: - confidence += 20.0 - elif momentum > 60: - confidence += 15.0 + sentiment_val = {"bullish": 1.0, "bearish": -1.0}.get(sentiment or "", 0.0) + sentiment_agree = sentiment_val * dir_sign - if sentiment == "bullish": - confidence += 15.0 - elif sentiment == "neutral": - confidence += 5.0 - - if fundamental > 60: - confidence += 10.0 - else: - if technical < 30: - confidence += 25.0 - elif technical < 40: - confidence += 15.0 - - if momentum < 30: - confidence += 20.0 - elif momentum < 40: - confidence += 15.0 - - if sentiment == "bearish": - confidence += 15.0 - elif sentiment == "neutral": - confidence += 5.0 - - if fundamental < 40: - confidence += 10.0 + confidence = 50.0 + ( + agree(technical) * 25.0 + + agree(momentum) * 20.0 + + sentiment_agree * 15.0 + + agree(fundamental) * 10.0 + ) + # Explicit conflict patterns trim a little more (the agreement terms + # already capture most disagreement, so penalties are modest). for conflict in conflicts or []: if "sentiment-technical" in conflict: - confidence -= 20.0 + confidence -= 12.0 elif "momentum-technical" in conflict: - confidence -= 15.0 - elif "sentiment-momentum" in conflict: - confidence -= 20.0 - elif "fundamental-technical" in conflict: confidence -= 10.0 + elif "sentiment-momentum" in conflict: + confidence -= 12.0 + elif "fundamental-technical" in conflict: + confidence -= 6.0 return _clamp(confidence, 0.0, 100.0) @@ -377,53 +364,83 @@ def _choose_recommended_action( long_confidence: float, short_confidence: float, config: dict[str, float], + available_directions: set[str] | None = None, ) -> str: + """Pick the ticker action — but only recommend a direction you can trade. + + A direction is recommendable only if a tradeable setup exists for it + (``available_directions``). So a strong LONG bias on a stock at all-time + highs — where the scanner can build no long target — does NOT yield + LONG_HIGH; it falls through to NEUTRAL, and the reasoning explains why. + """ high = float(config.get("recommendation_high_confidence_threshold", 70.0)) moderate = float(config.get("recommendation_moderate_confidence_threshold", 50.0)) diff = float(config.get("recommendation_confidence_diff_threshold", 20.0)) - if long_confidence >= high and (long_confidence - short_confidence) >= diff: + long_ok = available_directions is None or "long" in available_directions + short_ok = available_directions is None or "short" in available_directions + + if long_ok and long_confidence >= high and (long_confidence - short_confidence) >= diff: return "LONG_HIGH" - if short_confidence >= high and (short_confidence - long_confidence) >= diff: + if short_ok and short_confidence >= high and (short_confidence - long_confidence) >= diff: return "SHORT_HIGH" - if long_confidence >= moderate and (long_confidence - short_confidence) >= diff: + if long_ok and long_confidence >= moderate and (long_confidence - short_confidence) >= diff: return "LONG_MODERATE" - if short_confidence >= moderate and (short_confidence - long_confidence) >= diff: + if short_ok and short_confidence >= moderate and (short_confidence - long_confidence) >= diff: return "SHORT_MODERATE" return "NEUTRAL" def _build_reasoning( - direction: str, - confidence: float, + action: str, + long_confidence: float, + short_confidence: float, conflicts: list[str], dimension_scores: dict[str, float], sentiment_classification: str | None, - action: str, + config: dict[str, float], + available_directions: set[str] | None = None, ) -> str: - aligned, alignment_text = check_signal_alignment( - direction, - dimension_scores, - sentiment_classification, - ) + """Ticker-level reasoning that always matches the recommended action. + + Stored identically on both setups so the displayed summary can never mix a + SHORT setup's reasoning under a LONG action. + """ sentiment = _sentiment_value(sentiment_classification) or "unknown" technical = float(dimension_scores.get("technical", 50.0)) momentum = float(dimension_scores.get("momentum", 50.0)) + signals = f"technical={technical:.0f}, momentum={momentum:.0f}, sentiment={sentiment}" + conflict_note = f" {len(conflicts)} conflict(s) detected, risk-adjusted." if conflicts else "" - direction_text = direction.upper() - alignment_summary = "aligned" if aligned else "mixed" - base = ( - f"{direction_text} confidence {confidence:.1f}% with {alignment_summary} signals " - f"(technical={technical:.0f}, momentum={momentum:.0f}, sentiment={sentiment})." - ) - - if conflicts: + if action != "NEUTRAL": + direction = "long" if action.startswith("LONG") else "short" + tier = "high" if action.endswith("HIGH") else "moderate" + confidence = long_confidence if direction == "long" else short_confidence + aligned, _ = check_signal_alignment(direction, dimension_scores, sentiment_classification) return ( - f"{base} {alignment_text} Detected {len(conflicts)} conflict(s), " - f"so recommendation is risk-adjusted. Action={action}." + f"{direction.upper()} ({tier} confidence): {confidence:.0f}% with " + f"{'aligned' if aligned else 'mixed'} signals ({signals}).{conflict_note}" ) - return f"{base} {alignment_text} No major conflicts detected. Action={action}." + # NEUTRAL — explain whether it's a missing setup or genuinely mixed signals. + moderate = float(config.get("recommendation_moderate_confidence_threshold", 50.0)) + avail = available_directions if available_directions is not None else {"long", "short"} + bias_dir = "long" if long_confidence >= short_confidence else "short" + bias_conf = max(long_confidence, short_confidence) + + if bias_conf >= moderate and bias_dir not in avail: + other = "short" if bias_dir == "long" else "long" + extreme = "highs (no resistance target above)" if bias_dir == "long" else "lows (no support target below)" + return ( + f"Ticker bias is {bias_dir.upper()} (confidence {bias_conf:.0f}%, {signals}) but price is " + f"extended near {extreme}, so no high-conviction {bias_dir} setup is available. " + f"The available {other.upper()} setup is counter-trend.{conflict_note}" + ) + + return ( + f"No high-conviction setup: LONG {long_confidence:.0f}%, SHORT {short_confidence:.0f}% " + f"({signals}).{conflict_note}" + ) async def enhance_trade_setup( @@ -434,6 +451,7 @@ async def enhance_trade_setup( sr_levels: list[SRLevel], sentiment_classification: str | None, atr_value: float, + available_directions: set[str] | None = None, ) -> TradeSetup: config = await get_recommendation_config(db) @@ -476,24 +494,32 @@ async def enhance_trade_setup( config=config, ) + # Per-setup conflicts (target availability is specific to this setup) + setup_conflicts = list(conflicts) if len(targets) < 3: - conflicts = [*conflicts, "target-availability: Fewer than 3 valid S/R targets available"] + setup_conflicts.append("target-availability: Fewer than 3 valid S/R targets available") - action = _choose_recommended_action(long_confidence, short_confidence, config) - risk_level = _risk_level_from_conflicts(conflicts) - - setup.confidence_score = round(confidence, 2) - setup.targets_json = json.dumps(targets) - setup.conflict_flags_json = json.dumps(conflicts) - setup.recommended_action = action - setup.reasoning = _build_reasoning( - direction=direction, - confidence=confidence, + # Action and reasoning are ticker-level: they consider both directions and + # which directions are actually tradeable, and are identical on every setup. + action = _choose_recommended_action( + long_confidence, short_confidence, config, available_directions + ) + reasoning = _build_reasoning( + action=action, + long_confidence=long_confidence, + short_confidence=short_confidence, conflicts=conflicts, dimension_scores=dimension_scores, sentiment_classification=sentiment_classification, - action=action, + config=config, + available_directions=available_directions, ) - setup.risk_level = risk_level + + setup.confidence_score = round(confidence, 2) + setup.targets_json = json.dumps(targets) + setup.conflict_flags_json = json.dumps(setup_conflicts) + setup.recommended_action = action + setup.reasoning = reasoning + setup.risk_level = _risk_level_from_conflicts(setup_conflicts) return setup diff --git a/app/services/rr_scanner_service.py b/app/services/rr_scanner_service.py index ad11dcd..98d9bd1 100644 --- a/app/services/rr_scanner_service.py +++ b/app/services/rr_scanner_service.py @@ -202,6 +202,7 @@ async def scan_ticker( detected_at=now, )) + available_directions = {s.direction for s in setups} enhanced_setups: list[TradeSetup] = [] for setup in setups: try: @@ -213,6 +214,7 @@ async def scan_ticker( sr_levels=sr_levels, sentiment_classification=sentiment_classification, atr_value=atr_value, + available_directions=available_directions, ) enhanced_setups.append(enhanced) except Exception: diff --git a/app/services/scoring_service.py b/app/services/scoring_service.py index 9e28954..2bbb63d 100644 --- a/app/services/scoring_service.py +++ b/app/services/scoring_service.py @@ -482,13 +482,22 @@ async def _compute_fundamental_score( "reason": "Earnings surprise data not available", }) + # Require at least two real metrics — a single available metric (e.g. only + # market cap is free on FMP) does not make a meaningful fundamental score. + MIN_METRICS = 2 + if len(scores) < MIN_METRICS: + unavailable.append({ + "name": "insufficient_metrics", + "reason": f"Only {len(scores)} fundamental metric(s) available; need {MIN_METRICS}+ to score.", + }) + breakdown: dict = { "sub_scores": sub_scores, "formula": formula, "unavailable": unavailable, } - if not scores: + if len(scores) < MIN_METRICS: return None, breakdown return sum(scores) / len(scores), breakdown diff --git a/frontend/src/lib/recommendation.ts b/frontend/src/lib/recommendation.ts index 8fe1038..41f5b9b 100644 --- a/frontend/src/lib/recommendation.ts +++ b/frontend/src/lib/recommendation.ts @@ -7,7 +7,7 @@ export const RECOMMENDATION_ACTION_LABELS: Record LONG_MODERATE: 'LONG (Moderate Confidence)', SHORT_HIGH: 'SHORT (High Confidence)', SHORT_MODERATE: 'SHORT (Moderate Confidence)', - NEUTRAL: 'NEUTRAL (Conflicting Signals)', + NEUTRAL: 'NEUTRAL (No Clear Setup)', }; export const RECOMMENDATION_ACTION_GLOSSARY: Array<{ action: RecommendationAction; description: string }> = [ @@ -29,7 +29,7 @@ export const RECOMMENDATION_ACTION_GLOSSARY: Array<{ action: RecommendationActio }, { action: 'NEUTRAL', - description: 'No strong directional edge. Signals are mixed or confidence gap is too small.', + description: 'No actionable setup — either signals are mixed, or the favored direction has no tradeable setup (e.g. price extended with no target). See the reasoning for which.', }, ]; diff --git a/tests/unit/test_fundamentals_chain_provider.py b/tests/unit/test_fundamentals_chain_provider.py index aa4eb19..2440a7b 100644 --- a/tests/unit/test_fundamentals_chain_provider.py +++ b/tests/unit/test_fundamentals_chain_provider.py @@ -56,7 +56,35 @@ async def test_chained_provider_uses_fallback_provider_on_primary_failure(): assert result.pe_ratio == 25.0 assert result.market_cap == 1_000_000.0 - assert result.unavailable_fields.get("provider") == "fallback" + assert result.unavailable_fields.get("source_pe_ratio") == "fallback" + + +@pytest.mark.asyncio +async def test_chained_provider_merges_fields_across_providers(): + """Primary supplies only market cap; fallback fills P/E and earnings.""" + primary_data = FundamentalData( + ticker="AAPL", pe_ratio=None, revenue_growth=None, earnings_surprise=None, + market_cap=2_000_000.0, fetched_at=datetime.now(timezone.utc), unavailable_fields={}, + ) + fallback_data = FundamentalData( + ticker="AAPL", pe_ratio=18.0, revenue_growth=12.0, earnings_surprise=4.0, + market_cap=999.0, fetched_at=datetime.now(timezone.utc), unavailable_fields={}, + ) + + provider = ChainedFundamentalProvider([ + ("fmp", _DataProvider(primary_data)), + ("finnhub", _DataProvider(fallback_data)), + ]) + + result = await provider.fetch_fundamentals("AAPL") + + # market cap from primary (first to supply it), the rest from fallback + assert result.market_cap == 2_000_000.0 + assert result.pe_ratio == 18.0 + assert result.revenue_growth == 12.0 + assert result.earnings_surprise == 4.0 + assert result.unavailable_fields.get("source_market_cap") == "fmp" + assert result.unavailable_fields.get("source_pe_ratio") == "finnhub" @pytest.mark.asyncio diff --git a/tests/unit/test_indicator_service.py b/tests/unit/test_indicator_service.py index 8c0bad5..7fd6db2 100644 --- a/tests/unit/test_indicator_service.py +++ b/tests/unit/test_indicator_service.py @@ -89,6 +89,17 @@ class TestComputeRSI: with pytest.raises(ValidationError, match="RSI requires"): compute_rsi([100.0] * 5) + def test_overbought_rsi_is_penalized_not_maximal(self): + """RSI 100 (extreme overbought) must NOT score near 100.""" + from app.services.indicator_service import _rsi_to_score + + assert _rsi_to_score(100.0) < 40.0 # overbought penalized + assert _rsi_to_score(90.0) < _rsi_to_score(60.0) # extreme < healthy + assert _rsi_to_score(60.0) > 80.0 # healthy momentum rewarded + # All gains → RSI 100 → low score, not 100 + result = compute_rsi(_rising_closes(20, step=1)) + assert result["score"] < 40.0 + # --------------------------------------------------------------------------- # ATR diff --git a/tests/unit/test_recommendation_service.py b/tests/unit/test_recommendation_service.py index d9deb22..38fad98 100644 --- a/tests/unit/test_recommendation_service.py +++ b/tests/unit/test_recommendation_service.py @@ -3,12 +3,20 @@ from __future__ import annotations from dataclasses import dataclass from app.services.recommendation_service import ( + _build_reasoning, + _choose_recommended_action, direction_analyzer, probability_estimator, signal_conflict_detector, target_generator, ) +_DEFAULT_CFG = { + "recommendation_high_confidence_threshold": 70.0, + "recommendation_moderate_confidence_threshold": 50.0, + "recommendation_confidence_diff_threshold": 20.0, +} + @dataclass class _SRLevelStub: @@ -52,6 +60,48 @@ def test_high_confidence_short_example(): assert confidence > 70.0 +def test_short_confidence_low_on_strongly_bullish_stock(): + """The CNC case: technical 92 / momentum 99 must make SHORT confidence low.""" + dims = {"technical": 92.0, "momentum": 99.0, "fundamental": 96.0} + + short_conf = direction_analyzer.calculate_confidence( + direction="short", dimension_scores=dims, sentiment_classification="neutral", conflicts=[], + ) + long_conf = direction_analyzer.calculate_confidence( + direction="long", dimension_scores=dims, sentiment_classification="neutral", conflicts=[], + ) + + assert short_conf < 20.0 # not 55 + assert long_conf > 90.0 + + +def test_action_neutral_when_bias_direction_has_no_setup(): + """Strong LONG bias but only a SHORT setup is tradeable → NEUTRAL, not LONG_HIGH.""" + action = _choose_recommended_action( + long_confidence=100.0, short_confidence=5.0, config=_DEFAULT_CFG, + available_directions={"short"}, + ) + assert action == "NEUTRAL" + + # With the long setup available, the same numbers give LONG_HIGH + action_ok = _choose_recommended_action( + long_confidence=100.0, short_confidence=5.0, config=_DEFAULT_CFG, + available_directions={"long", "short"}, + ) + assert action_ok == "LONG_HIGH" + + +def test_reasoning_explains_missing_setup(): + reasoning = _build_reasoning( + action="NEUTRAL", long_confidence=100.0, short_confidence=5.0, conflicts=[], + dimension_scores={"technical": 92.0, "momentum": 99.0}, + sentiment_classification="neutral", config=_DEFAULT_CFG, + available_directions={"short"}, + ) + assert "bias is LONG" in reasoning + assert "no high-conviction long setup" in reasoning.lower() + + def test_detects_sentiment_technical_conflict(): conflicts = signal_conflict_detector.detect_conflicts( dimension_scores={"technical": 72.0, "momentum": 55.0, "fundamental": 50.0},