diff --git a/job_scout/scout.py b/job_scout/scout.py
index 3b05323..5e8b8bc 100644
--- a/job_scout/scout.py
+++ b/job_scout/scout.py
@@ -1,8 +1,9 @@
"""Job scout for Dennis's quarterly target companies.
-Pulls latest openings from companies with known public ATS APIs (Workday/Ashby/Greenhouse),
-filters by Swiss location or remote eligibility, scores fit against profile keywords, tracks
-which job IDs we've already seen, writes a markdown report.
+Pulls latest openings from companies via public ATS APIs (Workday/Ashby/Greenhouse/
+SmartRecruiters/Eightfold/RSS) and, for JS-rendered careers sites, a headless-browser
+(playwright) adapter. Filters by Swiss location or remote eligibility, scores fit against
+profile keywords, tracks which job IDs we've already seen, writes a markdown report.
Usage:
py scout.py # Pull all configured companies (strong + medium only)
@@ -13,9 +14,9 @@ Usage:
State : state/seen_jobs.json
Output: reports/YYYY-MM-DD.md
-To add a company: append to COMPANIES with one of the existing adapter types.
-For companies behind custom careers sites (Google, MS, Meta, Apple, Roche, Novartis, IBM,
-Cisco, Sonova, Sygnum) — see TODO_ADAPTERS at the bottom.
+To add a company: append to COMPANIES with one of the existing adapter types. A few sites
+resist scraping even headless and stay in MANUAL_CHECK (surfaced as a report checklist).
+See the adapter-coverage notes at the bottom for the current automated/manual split.
"""
import json
@@ -131,6 +132,15 @@ COMPANIES = [
"url": "https://www.bis.org/doclist/vacancies.rss",
"default_location": "Basel, Switzerland",
}),
+ # Coinbase Ventures web3 talent network (Getro collection 1625). Aggregates roles
+ # across portfolio companies (Notion, Ashby, VALR, World, ...), NOT Coinbase itself —
+ # see fetch_getro. CH-filtered + eng title-filtered to stay relevant.
+ ("coinbase_ventures", "Coinbase Ventures (web3)", "getro", {
+ "collection": 1625,
+ "locations": ["Switzerland"],
+ "job_functions": ["Software Engineering", "IT", "Data Science"],
+ "_title_filter": ENG_TITLE_FILTER,
+ }),
# Headless-browser scrapers — slower (3-15s per company) but covers JS-rendered sites.
# Google actively bot-detects; the STEALTH_JS init script (applied to every context)
# is what makes its job list render. Cards are
with a "Learn more about "
@@ -207,16 +217,12 @@ COMPANIES = [
# Companies where adapter probing did not yield a reliable scrape. Reasons noted.
# These surface as a clickable checklist in the report so they're not forgotten.
MANUAL_CHECK = [
- ("Sonova", "PhenomPeople serves empty shell to automation (body never renders); widgets API rejects requests",
- "https://careers.sonova.com/us/en/search-results?keywords=Switzerland"),
("Coinbase", "/careers/positions 302-redirects to landing; no job links or ATS API exposed even with stealth",
"https://www.coinbase.com/careers"),
("AMINA Bank", "jobs are at /careers/ (#positions) via JS widget; only ~4 apply links, no scrapable list",
"https://aminagroup.com/careers/#positions"),
("Bitcoin Suisse", "jobs under /careers#open-positions load via JS widget; section empty at scrape time (likely no/few openings)",
"https://bitcoinsuisse.com/careers#open-positions"),
- # International org — qualifies (Basel, commutable from Bern, salary net of Swiss tax),
- # but uses a JS-heavy Taleo widget that doesn't render requisitions headless. Manual check.
]
@@ -416,6 +422,51 @@ def fetch_wp_ajax(args):
return jobs
+def fetch_getro(args):
+ """Getro network job-board search API (POST JSON). Powers VC portfolio talent
+ networks — here the Coinbase Ventures web3 network (collection 1625). Returns roles
+ across ALL portfolio companies (Notion, Ashby, VALR, World, ...), NOT Coinbase itself;
+ Coinbase doesn't list its own openings on its Ventures board. Server-side filters:
+ searchable_locations and job_functions. Org name is folded into the title since this
+ is a multi-company board."""
+ collection = args["collection"]
+ url = f"https://api.getro.com/api/v2/collections/{collection}/search/jobs"
+ filters = {}
+ if args.get("locations"):
+ filters["searchable_locations"] = args["locations"]
+ if args.get("job_functions"):
+ filters["job_functions"] = args["job_functions"]
+ jobs, page = [], 0
+ while True:
+ data = http_get_json(url, method="POST", data={
+ "hitsPerPage": 100, "page": page, "query": "", "filters": filters,
+ })
+ res = data.get("results", {}) or {}
+ batch = res.get("jobs", []) or []
+ for j in batch:
+ org = (j.get("organization") or {}).get("name", "")
+ locs = j.get("searchable_locations") or j.get("locations") or []
+ loc_str = " | ".join(locs) if isinstance(locs, list) else str(locs)
+ ts = j.get("created_at")
+ posted = ""
+ if isinstance(ts, (int, float)):
+ posted = datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
+ title = j.get("title", "")
+ jobs.append({
+ "id": str(j.get("id")),
+ "title": f"{title} @ {org}" if org else title,
+ "location": loc_str,
+ "url": j.get("url", ""),
+ "posted": posted,
+ "description": " ".join(filter(None, [org] + (j.get("skills") or []))),
+ })
+ total = res.get("count", 0)
+ page += 1
+ if not batch or len(jobs) >= total or page >= 10:
+ break
+ return jobs
+
+
# Injected before page scripts run, to mask the most common headless-detection signals.
# Required for Google; harmless for the other sites.
STEALTH_JS = """
@@ -583,6 +634,7 @@ ADAPTERS = {
"wp_ajax": fetch_wp_ajax,
"smartrecruiters": fetch_smartrecruiters,
"rss": fetch_rss,
+ "getro": fetch_getro,
"playwright": fetch_playwright,
}
@@ -762,23 +814,33 @@ def main():
print(f"Errors: {len(errors)} - see report", file=sys.stderr)
-# === Adapter probe results (2026-05-21) =======================================
-# Tested all 15 target companies. The 5 working adapters are in COMPANIES above.
-# The remaining 10 are in MANUAL_CHECK. To upgrade one of those from manual to
-# automated, you'd need Playwright/Selenium (real browser) — different project.
+# === Adapter coverage (refreshed 2026-05-24) ==================================
+# 21 companies automated across 9 adapter types; 3 remain in MANUAL_CHECK.
#
-# Google careers.google.com 404 on documented API; auth-gated
-# Microsoft gcsservices.careers.ms.com TLS handshake hangs from non-MS clients
-# Apple jobs.apple.com/api/v1 endpoint exists, location filter codes opaque
-# Meta metacareers.com GraphQL with auth token
-# Roche careers.roche.com PhenomPeople/Eightfold, JS-rendered
-# IBM Research research.ibm.com static page, no API
-# Cisco jobs.cisco.com JS-rendered SPA
-# Sonova careers.sonova.com PhenomPeople SaaS, no public JSON
-# Sygnum sygnum.com/careers Cloudflare-protected
-# AMINA aminagroup.com/career static, low volume
-# Bitcoin Suisse bitcoinsuisse.com/careers static, low volume
-# Coinbase coinbase.com/careers Cloudflare-protected
+# Automated (COMPANIES above):
+# workday nvidia, novartis
+# ashby kraken, openai, confluent
+# greenhouse anthropic, gitlab, clickhouse, grafana
+# pcsx microsoft (Eightfold position-search endpoint)
+# wp_ajax sygnum (WordPress admin-ajax JSON)
+# smartrecruiters metgroup, vitol, ldc
+# rss bis (vacancies.rss — RSS 1.0/RDF)
+# getro coinbase_ventures (web3 portfolio network, collection 1625)
+# playwright google, apple, meta, roche, cisco (headless browser, 3-15s each)
+#
+# Since the 2026-05-21 probe, six originally-manual sites moved to automated:
+# Google/Apple/Meta/Roche/Cisco via the playwright adapter, Microsoft via pcsx, and
+# Sygnum via its WordPress AJAX endpoint. BIS was added via the new rss adapter, and the
+# Coinbase Ventures web3 portfolio network via the new getro adapter. IBM Research and
+# Sonova were dropped from the target list (no API / low fit; Sonova is MedTech, off-thesis).
+#
+# Note: the Coinbase Ventures board (getro) covers PORTFOLIO companies, not Coinbase
+# itself — Coinbase-the-employer's own careers site stays in MANUAL_CHECK below.
+#
+# Still manual (MANUAL_CHECK above) — to automate, each needs a real-browser probe:
+# Coinbase coinbase.com/careers Cloudflare-protected, 302 to landing
+# AMINA aminagroup.com/careers JS widget, ~4 apply links, low volume
+# Bitcoin Suisse bitcoinsuisse.com/careers JS widget, empty at scrape time, low volume
# ==============================================================================