diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 2df8e8a..171c44d 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -82,7 +82,8 @@ "Bash(job_scout/.venv/Scripts/python.exe job_scout/scout.py --only=google)", "Bash(job_scout/.venv/Scripts/python.exe -c ' *)", "Bash(job_scout/.venv/Scripts/python.exe job_scout/scout.py --only=meta)", - "Bash(job_scout/.venv/Scripts/python.exe job_scout/scout.py --only=cisco --include-weak)" + "Bash(job_scout/.venv/Scripts/python.exe job_scout/scout.py --only=cisco --include-weak)", + "Bash(job_scout/.venv/Scripts/python.exe job_scout/scout.py --only=confluent)" ] } } diff --git a/CLAUDE.md b/CLAUDE.md index cd0c6b5..f491e67 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -136,6 +136,7 @@ _Update this section when starting/finishing a JD._ | Infineon AI Engineer | Critique DONE Pass 2 (78.5/100) | Submit or Tier 2 polish | | Apple Data Engineer (ISE, Zurich) | Critique DONE Pass 1 (78.5/100) | /edit-resume for Tier 1 fixes or submit | | Kraken AI Infrastructure | Critique DONE Pass 2 (84.5/100) — converged near max | Submit, or apply Tier 2 polish (agent orchestration / guardrails in skills) | +| Google FDE GenAI (Zurich) | PAUSED — GenAI evidence gap too large; redirecting to data-eng/MLOps roles | Likely abandon | --- diff --git a/job_scout/scout.py b/job_scout/scout.py index 8a953ac..9d0fd21 100644 --- a/job_scout/scout.py +++ b/job_scout/scout.py @@ -37,7 +37,7 @@ CH_LOCATION_KEYWORDS = [ "lausanne", "zug", "rüschlikon", "stäfa", "schweiz", "suisse", ] -REMOTE_KEYWORDS = ["remote"] +REMOTE_KEYWORDS = ["remote", "home based", "home-based", "anywhere", "distributed"] US_ONLY_PATTERNS = [ "remote - us", "remote, us", "remote-us", "us remote", "us-remote", @@ -49,7 +49,7 @@ EU_HINT_KEYWORDS = [ "germany", "france", "spain", "portugal", "ireland", "netherlands", "sweden", "norway", "finland", "denmark", "poland", "czech", "romania", "italy", "austria", "belgium", "uk", "united kingdom", - "europe", "emea", "global", + "europe", "emea", "global", "worldwide", ] + CH_LOCATION_KEYWORDS POSITIVE_KEYWORDS = { @@ -77,6 +77,19 @@ NEGATIVE_KEYWORDS = { "intern": -5, "internship": -5, "graduate program": -3, " junior ": -3, } +# Title prefilter for high-volume boards (all-remote tech orgs + commodity traders that +# post mostly non-tech roles). Only keep titles containing one of these specific role +# phrases — kept tight so "Sales Engineer"/"Staff Accountant"/"Data Privacy Counsel" +# don't leak in. Matched as case-insensitive substrings against the title only. +ENG_TITLE_FILTER = [ + "data engineer", "data engineering", "data platform", "platform engineer", + "data infrastructure", "data architect", "analytics engineer", + "mlops", "ml engineer", "ml platform", "machine learning engineer", + "site reliability", "sre", "backend engineer", "back-end engineer", + "devops engineer", "cloud engineer", "software engineer", "infrastructure engineer", + "kafka", "streaming", "big data", "quantitative developer", "quant developer", +] + # id, display, adapter, adapter_args COMPANIES = [ ("nvidia", "NVIDIA", "workday", { @@ -103,6 +116,15 @@ COMPANIES = [ ("sygnum", "Sygnum", "wp_ajax", { "url": "https://www.sygnum.com/wp-admin/admin-ajax.php?action=fetch_careers&_wpnonce=c036d1627c", }), + # --- Data-infra US tech (his exact stack; mostly all-remote — title-filtered to eng/data) --- + ("confluent", "Confluent", "ashby", {"slug": "confluent", "_title_filter": ENG_TITLE_FILTER}), + ("gitlab", "GitLab", "greenhouse", {"board": "gitlab", "_title_filter": ENG_TITLE_FILTER}), + ("clickhouse","ClickHouse","greenhouse", {"board": "clickhouse", "_title_filter": ENG_TITLE_FILTER}), + ("grafana", "Grafana Labs","greenhouse",{"board": "grafanalabs", "_title_filter": ENG_TITLE_FILTER}), + # --- Energy / commodity trading (SmartRecruiters; title-filtered to tech roles) --- + ("metgroup", "MET Group", "smartrecruiters", {"company": "METGroup", "_title_filter": ENG_TITLE_FILTER}), + ("vitol", "Vitol", "smartrecruiters", {"company": "Vitol", "_title_filter": ENG_TITLE_FILTER}), + ("ldc", "Louis Dreyfus","smartrecruiters",{"company": "LouisDreyfusCompany", "_title_filter": ENG_TITLE_FILTER}), # Headless-browser scrapers — slower (3-15s per company) but covers JS-rendered sites. # Google actively bot-detects; the STEALTH_JS init script (applied to every context) # is what makes its job list render. Cards are