From 9d2e1e74bf2b10a279cc08d9aebe35fcf130c490 Mon Sep 17 00:00:00 2001
From: Dennis Thiessen <dennis@thiessen.io>
Date: Mon, 15 Jun 2026 20:52:09 +0200
Subject: [PATCH] fix probability over-confidence: model target-before-stop,
 not just touch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backtest (32k setups) showed the touch-only probability model was ~2x
over-confident — predicted 70% hit 39%, predicted 88% hit 46% — because it
ignored the competing stop. estimate_probability now multiplies the reach
probability (touch within horizon) by the two-barrier gambler's-ruin ratio
1/(R:R+1) = P(target before stop). A 3:1 setup now reads ~25% base, not ~70%,
which lines up with realized rates. Strength/alignment modulation unchanged.

Recalibrates every probability and the EV ranking; the min_target_probability
gate threshold now means roughly what it says. Re-run the backtest to confirm
the calibration table flattens toward the diagonal.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 app/services/recommendation_service.py    | 28 +++++++++++++++--------
 tests/unit/test_recommendation_service.py |  7 +++---
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/app/services/recommendation_service.py b/app/services/recommendation_service.py
index 20e023f..98248fc 100644
--- a/app/services/recommendation_service.py
+++ b/app/services/recommendation_service.py
@@ -357,22 +357,32 @@ class ProbabilityEstimator:
         direction: str,
         config: dict[str, float],
     ) -> float:
-        """Probability the target is reached within the outcome horizon.
+        """Probability the target is hit BEFORE the stop, within the horizon.
 
-        Base = probability of price *touching* a level at the target's distance
-        within the evaluation window, under a driftless random walk (reflection
-        principle): 2·(1 − Φ(d / (ATR·√T))). Distance is in ATR multiples and T
-        is the horizon in trading days, so a far target is inherently unlikely —
-        no more 90% on a +39% move. Strength and signal alignment (drift toward
-        the target) then modulate it modestly.
+        Two factors (backtest-calibrated 2026-06-15 — the old touch-only model
+        was ~2× over-confident because it ignored the competing stop):
+
+          reach  = P(price touches the target within T) — driftless random walk,
+                   reflection principle: 2·(1 − Φ(d / (ATR·√T))). Falls with
+                   distance, so a far target is inherently unlikely.
+          ruin   = P(target before stop | both reachable) — the two-barrier
+                   gambler's-ruin ratio stop/(target+stop) = 1/(R:R + 1). A 3:1
+                   setup wins the race ~25% of the time, not ~70%.
+
+        base = reach · ruin. Strength and signal alignment (drift toward target)
+        then modulate it.
         """
         strength = float(target.get("sr_strength", 50.0))
         atr_multiple = float(target.get("distance_atr_multiple", 1.0))
+        rr = float(target.get("rr_ratio", 0.0))
 
         expected_move_atr = math.sqrt(_TARGET_HORIZON_DAYS)  # ≈ 5.48 ATR over 30d
         z = atr_multiple / expected_move_atr if expected_move_atr > 0 else 99.0
-        touch_prob = 2.0 * (1.0 - _norm_cdf(z))  # 0..1
-        probability = touch_prob * 100.0
+        reach = 2.0 * (1.0 - _norm_cdf(z))  # 0..1, P(touch target in horizon)
+        # P(target before stop): stop distance / (target + stop) = 1/(rr+1).
+        # Without a known rr (e.g. isolated probability checks), assume an even race.
+        ruin = 1.0 / (rr + 1.0) if rr > 0 else 0.5
+        probability = reach * ruin * 100.0
 
         technical = float(dimension_scores.get("technical", 50.0))
         momentum = float(dimension_scores.get("momentum", 50.0))
diff --git a/tests/unit/test_recommendation_service.py b/tests/unit/test_recommendation_service.py
index a503194..6c5c59f 100644
--- a/tests/unit/test_recommendation_service.py
+++ b/tests/unit/test_recommendation_service.py
@@ -205,9 +205,10 @@ def test_probability_decreases_with_distance():
 
     # Monotonic decay with distance
     assert near > mid > far
-    # Near target is genuinely likely; a 10-ATR target is a long shot
-    assert near > 60
-    assert far < 25
+    # Backtest-calibrated: even a near target with no R:R context (even race) is
+    # only a moderate probability, and a 10-ATR target is a long shot.
+    assert near > 30
+    assert far < 15
 
 
 def test_far_target_not_high_probability_even_with_strong_level():