redesign activation gate to expected value + make pipelines cron-configurable
Deploy / lint (push) Successful in 9s
Deploy / test (push) Successful in 46s
Deploy / deploy (push) Successful in 28s

Diagnosing "no qualified signals for 5 days": setups were generated but none
qualified. The gate required BOTH a high min_rr (2.0) AND a high
min_target_probability (60), which became contradictory after the Jun-15
probability recalibration — probability already embeds R:R via the 1/(rr+1) ruin
term, so high-R:R targets are inherently low-probability and nothing cleared both.

Gate is now expected value (R): p*rr - (1-p) from the primary target's
probability. R:R and confidence stay as floors; high-conviction / exclude-conflicts
/ min-target-probability become optional tighteners (default off). Defaults:
min_expected_value=0.15, min_rr=1.2, min_confidence=55. EV is only enforced when
computable. Migration 009 clears stored activation_* rows so the new defaults
apply. Backtest sweeps min_expected_value instead of target probability.

Scheduling: pipelines are now cron-configurable in Admin -> Jobs. daily_pipeline
(full, default 0 7 * * *) plus a new light intraday_pipeline (OHLCV + outcome eval,
default hourly US session) that keeps prices/live-R:R current without setup churn.
Fundamentals on its own early weekly cron. Timezone configurable (default
Europe/Berlin). Moving interval->CronTrigger also fixes the restart-deferral bug
where an interval job's countdown resets on every process restart.

319 backend unit tests pass; frontend tsc clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-23 14:46:38 +02:00
parent d53b4ffb57
commit c34f3cb1a4
22 changed files with 777 additions and 171 deletions
+149 -26
View File
@@ -19,6 +19,7 @@ import asyncio
from datetime import date, datetime, timedelta, timezone
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from sqlalchemy import case, func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -168,6 +169,17 @@ _job_runtime: dict[str, dict[str, object]] = {
"finished_at": None,
"message": None,
},
"intraday_pipeline": {
"running": False,
"status": "idle",
"processed": 0,
"total": None,
"progress_pct": None,
"current_ticker": None,
"started_at": None,
"finished_at": None,
"message": None,
},
}
@@ -1000,7 +1012,9 @@ async def sync_ticker_universe() -> None:
# Steps run in dependency order: each uses fresh output from the previous one.
# (name, coroutine) — the names match the individual jobs so each step still
# updates its own runtime status while the pipeline runs.
_PIPELINE_STEPS = [
#
# Daily (full): the complete data→signal refresh, once a day.
_DAILY_PIPELINE_STEPS = [
("data_collector", "collect_ohlcv"),
("sentiment_collector", "collect_sentiment"),
("rr_scanner", "scan_rr"),
@@ -1008,28 +1022,41 @@ _PIPELINE_STEPS = [
("market_regime", "compute_market_regime"),
]
# Intraday (light): keep prices current and resolve outcomes through the day,
# without the expensive scan/sentiment. The dashboard recomputes live R:R from
# the latest price, so refreshing OHLCV is enough to stop prices lagging; the
# outcome step also closes paper trades that hit their stop/target intraday.
_INTRADAY_PIPELINE_STEPS = [
("data_collector", "collect_ohlcv"),
("outcome_evaluator", "evaluate_outcomes"),
]
async def run_daily_pipeline() -> None:
"""Run the daily data→signal flow in dependency order.
OHLCV → fundamentals → sentiment → R:R scan → outcome eval (+paper close) →
market regime. Each step respects its own enable flag and manages its own
runtime status; a failing step is logged and the pipeline continues.
async def _run_pipeline(job_name: str, steps: list[tuple[str, str]]) -> None:
"""Run an ordered list of (step_name, coroutine_name) steps.
Each step respects its own enable flag and manages its own runtime status; a
failing step is logged and the pipeline continues with the next one.
"""
job_name = "daily_pipeline"
logger.info(json.dumps({"event": "job_start", "job": job_name}))
total = len(_PIPELINE_STEPS)
async with async_session_factory() as db:
if not await _is_job_enabled(db, job_name):
logger.info(json.dumps({"event": "job_skipped", "job": job_name, "reason": "disabled"}))
_runtime_finish(job_name, "skipped", processed=0, total=0, message="Disabled")
return
total = len(steps)
_runtime_start(job_name, total=total)
funcs = globals()
done = 0
try:
for step_name, func_name in _PIPELINE_STEPS:
for step_name, func_name in steps:
_runtime_progress(job_name, processed=done, total=total, current_ticker=step_name)
try:
await funcs[func_name]()
except Exception:
logger.exception("Daily pipeline step %s failed", step_name)
logger.exception("%s step %s failed", job_name, step_name)
done += 1
_runtime_finish(job_name, "completed", processed=done, total=total, message="Pipeline complete")
logger.info(json.dumps({"event": "job_complete", "job": job_name}))
@@ -1041,6 +1068,17 @@ async def run_daily_pipeline() -> None:
}))
async def run_daily_pipeline() -> None:
"""Full daily flow: OHLCV → sentiment → R:R scan → outcome eval (+paper
close) → market regime."""
await _run_pipeline("daily_pipeline", _DAILY_PIPELINE_STEPS)
async def run_intraday_pipeline() -> None:
"""Light intraday flow: refresh OHLCV → evaluate outcomes (+paper close)."""
await _run_pipeline("intraday_pipeline", _INTRADAY_PIPELINE_STEPS)
# ---------------------------------------------------------------------------
# Frequency helpers
# ---------------------------------------------------------------------------
@@ -1057,22 +1095,91 @@ def _parse_frequency(freq: str) -> dict[str, int]:
return _FREQUENCY_MAP.get(freq.lower(), {"hours": 24})
# ---------------------------------------------------------------------------
# Schedule config (cron, admin-configurable)
# ---------------------------------------------------------------------------
#
# The cron-driven jobs read their schedule from SystemSettings so it can be
# tuned from Admin → Jobs without a redeploy. A wall-clock CronTrigger also fixes
# the interval-trigger pitfall: an interval job resets its countdown to now+N on
# every process restart, so on a box that's redeployed often it can keep being
# deferred and never fire. Cron fires at a fixed local time regardless.
SCHEDULE_DEFAULTS: dict[str, str] = {
"schedule_timezone": "Europe/Berlin",
"schedule_daily_pipeline_cron": "0 7 * * *", # full refresh, ready by ~8am
"schedule_intraday_pipeline_cron": "0 14-22 * * 1-5", # hourly across the US session
"schedule_fundamentals_cron": "0 4 * * 1", # weekly, early Monday (slow job)
}
# job id -> schedule setting key
_CRON_JOBS: dict[str, str] = {
"daily_pipeline": "schedule_daily_pipeline_cron",
"intraday_pipeline": "schedule_intraday_pipeline_cron",
"fundamental_collector": "schedule_fundamentals_cron",
}
def validate_cron(expr: str, timezone: str) -> None:
"""Raise ValueError if the cron expression or timezone is invalid."""
CronTrigger.from_crontab((expr or "").strip(), timezone=(timezone or "").strip())
def _cron_trigger(expr: str, timezone: str, fallback_key: str) -> CronTrigger:
"""Build a CronTrigger, falling back to the default (UTC) on a bad value."""
try:
return CronTrigger.from_crontab(expr.strip(), timezone=timezone.strip())
except Exception:
logger.warning(json.dumps({
"event": "invalid_cron", "expr": expr, "timezone": timezone,
"fallback": SCHEDULE_DEFAULTS[fallback_key],
}))
return CronTrigger.from_crontab(SCHEDULE_DEFAULTS[fallback_key], timezone="UTC")
async def load_schedule_config(db: AsyncSession) -> dict[str, str]:
"""Read the cron schedule config from SystemSettings, defaults for any unset."""
result = await db.execute(
select(SystemSetting).where(SystemSetting.key.in_(list(SCHEDULE_DEFAULTS)))
)
stored = {s.key: s.value for s in result.scalars().all()}
return {key: (stored.get(key) or default) for key, default in SCHEDULE_DEFAULTS.items()}
def reschedule_jobs(schedule_config: dict[str, str]) -> dict[str, str]:
"""Re-apply cron triggers to the running scheduler after a settings change."""
tz = schedule_config.get("schedule_timezone") or SCHEDULE_DEFAULTS["schedule_timezone"]
applied: dict[str, str] = {}
for job_id, key in _CRON_JOBS.items():
if scheduler.get_job(job_id) is None:
continue
expr = schedule_config.get(key) or SCHEDULE_DEFAULTS[key]
scheduler.reschedule_job(job_id, trigger=_cron_trigger(expr, tz, key))
applied[job_id] = expr
logger.info(json.dumps({"event": "jobs_rescheduled", "applied": applied, "timezone": tz}))
return applied
# ---------------------------------------------------------------------------
# Scheduler setup
# ---------------------------------------------------------------------------
def configure_scheduler() -> None:
"""Add all jobs to the scheduler with configured intervals.
def configure_scheduler(schedule_config: dict[str, str] | None = None) -> None:
"""Add all jobs to the scheduler.
Call this once before scheduler.start(). Removes any existing jobs first
to ensure idempotency.
Call this once before scheduler.start(). Removes any existing jobs first to
ensure idempotency. ``schedule_config`` supplies the cron strings + timezone
for the cron-driven jobs (daily/intraday pipelines, fundamentals); defaults
are used for anything missing.
"""
cfg = {**SCHEDULE_DEFAULTS, **(schedule_config or {})}
tz = cfg["schedule_timezone"]
scheduler.remove_all_jobs()
# Pipeline members: registered but PAUSED (next_run_time=None) so they never
# auto-fire on their own timer — the daily_pipeline drives them in order. The
# long interval is just a backstop after a manual trigger (which re-arms an
# auto-fire on their own timer — the pipelines drive them in order. The long
# interval is just a backstop after a manual trigger (which re-arms an
# interval job). They stay manually triggerable from Admin → Jobs.
_members = [
(collect_ohlcv, "data_collector", "Data Collector (OHLCV)"),
@@ -1087,23 +1194,30 @@ def configure_scheduler() -> None:
replace_existing=True, next_run_time=None,
)
# Daily Pipeline — the single ordered daily flow
# Cron-driven jobs (admin-configurable times)
scheduler.add_job(
run_daily_pipeline, "interval", hours=24,
run_daily_pipeline,
_cron_trigger(cfg["schedule_daily_pipeline_cron"], tz, "schedule_daily_pipeline_cron"),
id="daily_pipeline", name="Daily Pipeline", replace_existing=True,
)
scheduler.add_job(
run_intraday_pipeline,
_cron_trigger(cfg["schedule_intraday_pipeline_cron"], tz, "schedule_intraday_pipeline_cron"),
id="intraday_pipeline", name="Intraday Pipeline", replace_existing=True,
)
# Fundamentals — quarterly-ish data; weekly by default (conserves API quota).
# Its own early cron so the slow, rate-limited fetch finishes before the day.
scheduler.add_job(
collect_fundamentals,
_cron_trigger(cfg["schedule_fundamentals_cron"], tz, "schedule_fundamentals_cron"),
id="fundamental_collector", name="Fundamental Collector", replace_existing=True,
)
# Independent jobs (own cadence, no ordering dependency)
# Independent interval jobs (own cadence, no ordering dependency)
scheduler.add_job(
sync_ticker_universe, "interval", hours=24,
id="ticker_universe_sync", name="Ticker Universe Sync", replace_existing=True,
)
# Fundamentals — quarterly-ish data; weekly by default (conserves API quota)
fund_interval = _parse_frequency(settings.fundamental_fetch_frequency)
scheduler.add_job(
collect_fundamentals, "interval", **fund_interval,
id="fundamental_collector", name="Fundamental Collector", replace_existing=True,
)
alerts_interval = _parse_frequency(settings.alerts_frequency)
scheduler.add_job(
dispatch_alerts_job, "interval", **alerts_interval,
@@ -1117,7 +1231,16 @@ def configure_scheduler() -> None:
logger.info(
json.dumps({
"event": "scheduler_configured",
"daily_pipeline": [name for name, _ in _PIPELINE_STEPS],
"timezone": tz,
"daily_pipeline": {
"cron": cfg["schedule_daily_pipeline_cron"],
"steps": [name for name, _ in _DAILY_PIPELINE_STEPS],
},
"intraday_pipeline": {
"cron": cfg["schedule_intraday_pipeline_cron"],
"steps": [name for name, _ in _INTRADAY_PIPELINE_STEPS],
},
"fundamental_collector": {"cron": cfg["schedule_fundamentals_cron"]},
"independent": ["ticker_universe_sync", "alerts", "backtest"],
})
)