From 9d2e1e74bf2b10a279cc08d9aebe35fcf130c490 Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Mon, 15 Jun 2026 20:52:09 +0200 Subject: [PATCH] fix probability over-confidence: model target-before-stop, not just touch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backtest (32k setups) showed the touch-only probability model was ~2x over-confident — predicted 70% hit 39%, predicted 88% hit 46% — because it ignored the competing stop. estimate_probability now multiplies the reach probability (touch within horizon) by the two-barrier gambler's-ruin ratio 1/(R:R+1) = P(target before stop). A 3:1 setup now reads ~25% base, not ~70%, which lines up with realized rates. Strength/alignment modulation unchanged. Recalibrates every probability and the EV ranking; the min_target_probability gate threshold now means roughly what it says. Re-run the backtest to confirm the calibration table flattens toward the diagonal. Co-Authored-By: Claude Opus 4.8 --- app/services/recommendation_service.py | 28 +++++++++++++++-------- tests/unit/test_recommendation_service.py | 7 +++--- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/app/services/recommendation_service.py b/app/services/recommendation_service.py index 20e023f..98248fc 100644 --- a/app/services/recommendation_service.py +++ b/app/services/recommendation_service.py @@ -357,22 +357,32 @@ class ProbabilityEstimator: direction: str, config: dict[str, float], ) -> float: - """Probability the target is reached within the outcome horizon. + """Probability the target is hit BEFORE the stop, within the horizon. - Base = probability of price *touching* a level at the target's distance - within the evaluation window, under a driftless random walk (reflection - principle): 2·(1 − Φ(d / (ATR·√T))). Distance is in ATR multiples and T - is the horizon in trading days, so a far target is inherently unlikely — - no more 90% on a +39% move. Strength and signal alignment (drift toward - the target) then modulate it modestly. + Two factors (backtest-calibrated 2026-06-15 — the old touch-only model + was ~2× over-confident because it ignored the competing stop): + + reach = P(price touches the target within T) — driftless random walk, + reflection principle: 2·(1 − Φ(d / (ATR·√T))). Falls with + distance, so a far target is inherently unlikely. + ruin = P(target before stop | both reachable) — the two-barrier + gambler's-ruin ratio stop/(target+stop) = 1/(R:R + 1). A 3:1 + setup wins the race ~25% of the time, not ~70%. + + base = reach · ruin. Strength and signal alignment (drift toward target) + then modulate it. """ strength = float(target.get("sr_strength", 50.0)) atr_multiple = float(target.get("distance_atr_multiple", 1.0)) + rr = float(target.get("rr_ratio", 0.0)) expected_move_atr = math.sqrt(_TARGET_HORIZON_DAYS) # ≈ 5.48 ATR over 30d z = atr_multiple / expected_move_atr if expected_move_atr > 0 else 99.0 - touch_prob = 2.0 * (1.0 - _norm_cdf(z)) # 0..1 - probability = touch_prob * 100.0 + reach = 2.0 * (1.0 - _norm_cdf(z)) # 0..1, P(touch target in horizon) + # P(target before stop): stop distance / (target + stop) = 1/(rr+1). + # Without a known rr (e.g. isolated probability checks), assume an even race. + ruin = 1.0 / (rr + 1.0) if rr > 0 else 0.5 + probability = reach * ruin * 100.0 technical = float(dimension_scores.get("technical", 50.0)) momentum = float(dimension_scores.get("momentum", 50.0)) diff --git a/tests/unit/test_recommendation_service.py b/tests/unit/test_recommendation_service.py index a503194..6c5c59f 100644 --- a/tests/unit/test_recommendation_service.py +++ b/tests/unit/test_recommendation_service.py @@ -205,9 +205,10 @@ def test_probability_decreases_with_distance(): # Monotonic decay with distance assert near > mid > far - # Near target is genuinely likely; a 10-ATR target is a long shot - assert near > 60 - assert far < 25 + # Backtest-calibrated: even a near target with no R:R context (even race) is + # only a moderate probability, and a 10-ATR target is a long shot. + assert near > 30 + assert far < 15 def test_far_target_not_high_probability_even_with_strong_level():