fix: show benchmark job in admin; harden + split deploy workflow
Deploy / lint (push) Successful in 8s
Deploy / test (push) Successful in 48s
Deploy / deploy (push) Successful in 28s

- admin_service: register benchmark_collector in VALID_JOB_NAMES, JOB_LABELS and
  PIPELINE_MEMBERS. The Admin → Jobs list is built from these hardcoded sets, not
  the scheduler, so the job was registered but invisible/untriggerable.

- deploy.yml:
  - SSH: verify the host key (StrictHostKeyChecking=yes) now that known_hosts is
    supplied; move private-key cleanup to an `if: always()` step.
  - Add a concurrency guard so deploys serialize.
  - Health-check the service after restart (127.0.0.1:8998/api/v1/health).
  - Align CI Python to 3.12 (matches prod); pip + npm caching.
  - Clarify the Postgres service only validates migrations (tests use SQLite);
    drop the redundant DATABASE_URL from the pytest step.
  - Split the monolithic "Deploy to server" step into named steps.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-28 09:01:09 +02:00
parent 0627787bfc
commit 8bcbbfcfd0
2 changed files with 57 additions and 29 deletions
+54 -29
View File
@@ -22,6 +22,12 @@ on:
type: boolean type: boolean
default: false default: false
# Serialize deploys so two quick pushes to main can't rsync/restart on top of
# each other. Don't cancel an in-flight deploy mid-restart.
concurrency:
group: deploy-main
cancel-in-progress: false
jobs: jobs:
lint: lint:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@@ -29,7 +35,8 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: "3.11" python-version: "3.12"
cache: "pip"
- run: pip install ruff - run: pip install ruff
- run: ruff check app/ - run: ruff check app/
@@ -52,17 +59,21 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: "3.11" python-version: "3.12"
cache: "pip"
- uses: actions/setup-node@v4 - uses: actions/setup-node@v4
with: with:
node-version: "20" node-version: "20"
cache: "npm"
cache-dependency-path: frontend/package-lock.json
- run: pip install -e ".[dev]" - run: pip install -e ".[dev]"
# The Postgres service exists only to validate the migrations against real
# Postgres (what prod runs). The test suite itself uses an in-memory SQLite
# engine (tests/conftest.py), so pytest doesn't touch this service.
- run: alembic upgrade head - run: alembic upgrade head
env: env:
DATABASE_URL: postgresql+asyncpg://test_user:test_pass@postgres:5432/test_db DATABASE_URL: postgresql+asyncpg://test_user:test_pass@postgres:5432/test_db
- run: pytest --tb=short - run: pytest --tb=short
env:
DATABASE_URL: postgresql+asyncpg://test_user:test_pass@postgres:5432/test_db
- run: | - run: |
cd frontend cd frontend
npm ci npm ci
@@ -76,37 +87,43 @@ jobs:
deploy: deploy:
needs: test needs: test
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
DEPLOY_HOST: ${{ vars.DEPLOY_HOST }}
DEPLOY_USER: ${{ vars.DEPLOY_USER }}
DEPLOY_PATH: ${{ vars.DEPLOY_PATH }}
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
SSH_KNOWN_HOSTS: ${{ vars.SSH_KNOWN_HOSTS }}
SSH_PORT: ${{ vars.SSH_PORT || '22' }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-node@v4 - uses: actions/setup-node@v4
with: with:
node-version: "20" node-version: "20"
cache: "npm"
cache-dependency-path: frontend/package-lock.json
- name: Build frontend - name: Build frontend
run: | run: |
cd frontend cd frontend
npm ci npm ci
npm run build npm run build
- name: Deploy to server
env:
DEPLOY_HOST: ${{ vars.DEPLOY_HOST }}
DEPLOY_USER: ${{ vars.DEPLOY_USER }}
DEPLOY_PATH: ${{ vars.DEPLOY_PATH }}
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
SSH_KNOWN_HOSTS: ${{ vars.SSH_KNOWN_HOSTS }}
SSH_PORT: ${{ vars.SSH_PORT || '22' }}
run: |
# Install tools missing from runner image
sudo apt-get update -qq && sudo apt-get install -y -qq rsync openssh-client > /dev/null 2>&1 || true
# Write SSH credentials - name: Install deploy tools
run: sudo apt-get update -qq && sudo apt-get install -y -qq rsync openssh-client > /dev/null 2>&1 || true
- name: Set up SSH
run: |
mkdir -p ~/.ssh mkdir -p ~/.ssh
chmod 700 ~/.ssh
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
chmod 600 ~/.ssh/deploy_key chmod 600 ~/.ssh/deploy_key
echo "$SSH_KNOWN_HOSTS" >> ~/.ssh/known_hosts echo "$SSH_KNOWN_HOSTS" >> ~/.ssh/known_hosts
# known_hosts is supplied, so verify the host key instead of blindly
# trusting it (StrictHostKeyChecking=no would defeat the fingerprint).
echo "SSH_OPTS=-i $HOME/.ssh/deploy_key -o StrictHostKeyChecking=yes -p ${SSH_PORT}" >> "$GITHUB_ENV"
SSH_OPTS="-i ~/.ssh/deploy_key -o StrictHostKeyChecking=no -p $SSH_PORT" - name: Sync files to server
run: |
# Sync application files
rsync -avz --delete \ rsync -avz --delete \
--exclude '.git/' \ --exclude '.git/' \
--exclude '.gitea/' \ --exclude '.gitea/' \
@@ -118,10 +135,11 @@ jobs:
--exclude '*.pyc' \ --exclude '*.pyc' \
--exclude 'frontend/node_modules/' \ --exclude 'frontend/node_modules/' \
-e "ssh $SSH_OPTS" \ -e "ssh $SSH_OPTS" \
./ ${DEPLOY_USER}@${DEPLOY_HOST}:${DEPLOY_PATH}/ ./ "${DEPLOY_USER}@${DEPLOY_HOST}:${DEPLOY_PATH}/"
# Install deps & restart on server - name: Install deps & run migrations
ssh $SSH_OPTS ${DEPLOY_USER}@${DEPLOY_HOST} << REMOTE_SCRIPT run: |
ssh $SSH_OPTS "${DEPLOY_USER}@${DEPLOY_HOST}" << REMOTE_SCRIPT
set -e set -e
cd ${DEPLOY_PATH} cd ${DEPLOY_PATH}
@@ -141,12 +159,19 @@ jobs:
else else
alembic upgrade head alembic upgrade head
fi fi
# Restart service
sudo systemctl restart signalplatform.service
echo "✓ signalplatform deployed"
REMOTE_SCRIPT REMOTE_SCRIPT
# Cleanup - name: Restart service & health check
rm -f ~/.ssh/deploy_key run: |
ssh $SSH_OPTS "${DEPLOY_USER}@${DEPLOY_HOST}" << REMOTE_SCRIPT
set -e
sudo systemctl restart signalplatform.service
sleep 3
curl -fsS http://127.0.0.1:8998/api/v1/health > /dev/null \
|| { echo "✗ health check failed after restart"; exit 1; }
echo "✓ signalplatform deployed"
REMOTE_SCRIPT
- name: Clean up SSH key
if: always()
run: rm -f ~/.ssh/deploy_key
+3
View File
@@ -512,6 +512,7 @@ async def get_pipeline_readiness(db: AsyncSession) -> list[dict]:
VALID_JOB_NAMES = { VALID_JOB_NAMES = {
"data_collector", "data_collector",
"data_backfill", "data_backfill",
"benchmark_collector",
"sentiment_collector", "sentiment_collector",
"fundamental_collector", "fundamental_collector",
"rr_scanner", "rr_scanner",
@@ -529,6 +530,7 @@ VALID_JOB_NAMES = {
JOB_LABELS = { JOB_LABELS = {
"data_collector": "Data Collector (OHLCV)", "data_collector": "Data Collector (OHLCV)",
"data_backfill": "Data Backfill (deep history)", "data_backfill": "Data Backfill (deep history)",
"benchmark_collector": "Benchmark Collector",
"sentiment_collector": "Sentiment Collector", "sentiment_collector": "Sentiment Collector",
"fundamental_collector": "Fundamental Collector", "fundamental_collector": "Fundamental Collector",
"rr_scanner": "R:R Scanner", "rr_scanner": "R:R Scanner",
@@ -546,6 +548,7 @@ JOB_LABELS = {
# Jobs driven by the daily_pipeline (in order) rather than their own timer. # Jobs driven by the daily_pipeline (in order) rather than their own timer.
PIPELINE_MEMBERS = { PIPELINE_MEMBERS = {
"data_collector", "data_collector",
"benchmark_collector",
"sentiment_collector", "sentiment_collector",
"rr_scanner", "rr_scanner",
"outcome_evaluator", "outcome_evaluator",