From 8bcbbfcfd0b757abf49d8f92b4c8f3652dfc0230 Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Sun, 28 Jun 2026 09:01:09 +0200 Subject: [PATCH] fix: show benchmark job in admin; harden + split deploy workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - admin_service: register benchmark_collector in VALID_JOB_NAMES, JOB_LABELS and PIPELINE_MEMBERS. The Admin → Jobs list is built from these hardcoded sets, not the scheduler, so the job was registered but invisible/untriggerable. - deploy.yml: - SSH: verify the host key (StrictHostKeyChecking=yes) now that known_hosts is supplied; move private-key cleanup to an `if: always()` step. - Add a concurrency guard so deploys serialize. - Health-check the service after restart (127.0.0.1:8998/api/v1/health). - Align CI Python to 3.12 (matches prod); pip + npm caching. - Clarify the Postgres service only validates migrations (tests use SQLite); drop the redundant DATABASE_URL from the pytest step. - Split the monolithic "Deploy to server" step into named steps. Co-Authored-By: Claude Opus 4.8 --- .gitea/workflows/deploy.yml | 83 +++++++++++++++++++++++------------ app/services/admin_service.py | 3 ++ 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 5a13643..349f80c 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -22,6 +22,12 @@ on: type: boolean default: false +# Serialize deploys so two quick pushes to main can't rsync/restart on top of +# each other. Don't cancel an in-flight deploy mid-restart. +concurrency: + group: deploy-main + cancel-in-progress: false + jobs: lint: runs-on: ubuntu-latest @@ -29,7 +35,8 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" + cache: "pip" - run: pip install ruff - run: ruff check app/ @@ -52,17 +59,21 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" + cache: "pip" - uses: actions/setup-node@v4 with: node-version: "20" + cache: "npm" + cache-dependency-path: frontend/package-lock.json - run: pip install -e ".[dev]" + # The Postgres service exists only to validate the migrations against real + # Postgres (what prod runs). The test suite itself uses an in-memory SQLite + # engine (tests/conftest.py), so pytest doesn't touch this service. - run: alembic upgrade head env: DATABASE_URL: postgresql+asyncpg://test_user:test_pass@postgres:5432/test_db - run: pytest --tb=short - env: - DATABASE_URL: postgresql+asyncpg://test_user:test_pass@postgres:5432/test_db - run: | cd frontend npm ci @@ -76,37 +87,43 @@ jobs: deploy: needs: test runs-on: ubuntu-latest + env: + DEPLOY_HOST: ${{ vars.DEPLOY_HOST }} + DEPLOY_USER: ${{ vars.DEPLOY_USER }} + DEPLOY_PATH: ${{ vars.DEPLOY_PATH }} + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} + SSH_KNOWN_HOSTS: ${{ vars.SSH_KNOWN_HOSTS }} + SSH_PORT: ${{ vars.SSH_PORT || '22' }} steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: node-version: "20" + cache: "npm" + cache-dependency-path: frontend/package-lock.json + - name: Build frontend run: | cd frontend npm ci npm run build - - name: Deploy to server - env: - DEPLOY_HOST: ${{ vars.DEPLOY_HOST }} - DEPLOY_USER: ${{ vars.DEPLOY_USER }} - DEPLOY_PATH: ${{ vars.DEPLOY_PATH }} - SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} - SSH_KNOWN_HOSTS: ${{ vars.SSH_KNOWN_HOSTS }} - SSH_PORT: ${{ vars.SSH_PORT || '22' }} - run: | - # Install tools missing from runner image - sudo apt-get update -qq && sudo apt-get install -y -qq rsync openssh-client > /dev/null 2>&1 || true - # Write SSH credentials + - name: Install deploy tools + run: sudo apt-get update -qq && sudo apt-get install -y -qq rsync openssh-client > /dev/null 2>&1 || true + + - name: Set up SSH + run: | mkdir -p ~/.ssh + chmod 700 ~/.ssh echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key chmod 600 ~/.ssh/deploy_key echo "$SSH_KNOWN_HOSTS" >> ~/.ssh/known_hosts + # known_hosts is supplied, so verify the host key instead of blindly + # trusting it (StrictHostKeyChecking=no would defeat the fingerprint). + echo "SSH_OPTS=-i $HOME/.ssh/deploy_key -o StrictHostKeyChecking=yes -p ${SSH_PORT}" >> "$GITHUB_ENV" - SSH_OPTS="-i ~/.ssh/deploy_key -o StrictHostKeyChecking=no -p $SSH_PORT" - - # Sync application files + - name: Sync files to server + run: | rsync -avz --delete \ --exclude '.git/' \ --exclude '.gitea/' \ @@ -118,10 +135,11 @@ jobs: --exclude '*.pyc' \ --exclude 'frontend/node_modules/' \ -e "ssh $SSH_OPTS" \ - ./ ${DEPLOY_USER}@${DEPLOY_HOST}:${DEPLOY_PATH}/ + ./ "${DEPLOY_USER}@${DEPLOY_HOST}:${DEPLOY_PATH}/" - # Install deps & restart on server - ssh $SSH_OPTS ${DEPLOY_USER}@${DEPLOY_HOST} << REMOTE_SCRIPT + - name: Install deps & run migrations + run: | + ssh $SSH_OPTS "${DEPLOY_USER}@${DEPLOY_HOST}" << REMOTE_SCRIPT set -e cd ${DEPLOY_PATH} @@ -141,12 +159,19 @@ jobs: else alembic upgrade head fi - - # Restart service - sudo systemctl restart signalplatform.service - echo "✓ signalplatform deployed" - REMOTE_SCRIPT - # Cleanup - rm -f ~/.ssh/deploy_key + - name: Restart service & health check + run: | + ssh $SSH_OPTS "${DEPLOY_USER}@${DEPLOY_HOST}" << REMOTE_SCRIPT + set -e + sudo systemctl restart signalplatform.service + sleep 3 + curl -fsS http://127.0.0.1:8998/api/v1/health > /dev/null \ + || { echo "✗ health check failed after restart"; exit 1; } + echo "✓ signalplatform deployed" + REMOTE_SCRIPT + + - name: Clean up SSH key + if: always() + run: rm -f ~/.ssh/deploy_key diff --git a/app/services/admin_service.py b/app/services/admin_service.py index 97c7ad3..79ea48d 100644 --- a/app/services/admin_service.py +++ b/app/services/admin_service.py @@ -512,6 +512,7 @@ async def get_pipeline_readiness(db: AsyncSession) -> list[dict]: VALID_JOB_NAMES = { "data_collector", "data_backfill", + "benchmark_collector", "sentiment_collector", "fundamental_collector", "rr_scanner", @@ -529,6 +530,7 @@ VALID_JOB_NAMES = { JOB_LABELS = { "data_collector": "Data Collector (OHLCV)", "data_backfill": "Data Backfill (deep history)", + "benchmark_collector": "Benchmark Collector", "sentiment_collector": "Sentiment Collector", "fundamental_collector": "Fundamental Collector", "rr_scanner": "R:R Scanner", @@ -546,6 +548,7 @@ JOB_LABELS = { # Jobs driven by the daily_pipeline (in order) rather than their own timer. PIPELINE_MEMBERS = { "data_collector", + "benchmark_collector", "sentiment_collector", "rr_scanner", "outcome_evaluator",