From da66443aa89750017dfd9af4fd3bb77bfd423dda Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Mon, 1 Jun 2026 15:15:22 +0200 Subject: [PATCH] feat(job_scout): add 6 Swiss/EU companies, new adapters, and scan-stats table Automate Palantir, QuantCo, Swissgrid, RUAG, SBB, BKW (drop BFH/Dialectic); 25 companies automated, 0 manual. - adapters: lever (Palantir/QuantCo), generic json (Swissgrid), sbb, bkw - fetch_playwright: optional ?page=N pagination (page_param/max_pages) for RUAG - location_matches: treat pan-EU "Europe"/"EMEA" postings as eligible - per-company _score_floor so pre-filtered German-language boards stay visible - POSITIVE_KEYWORDS: add data scientist / data science (medium) - report: scan-stats table (scraped / CH-remote / match>=2 / newest / time) + totals Co-Authored-By: Claude Opus 4.8 --- job_scout/scout.py | 432 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 383 insertions(+), 49 deletions(-) diff --git a/job_scout/scout.py b/job_scout/scout.py index c720555..8cf99cb 100644 --- a/job_scout/scout.py +++ b/job_scout/scout.py @@ -1,7 +1,7 @@ """Job scout for Dennis's quarterly target companies. Pulls latest openings from companies via public ATS APIs (Workday/Ashby/Greenhouse/ -SmartRecruiters/Eightfold/RSS) and, for JS-rendered careers sites, a headless-browser +SmartRecruiters/Lever/Eightfold/RSS) and, for JS-rendered careers sites, a headless-browser (playwright) adapter. Filters by Swiss location or remote eligibility, scores fit against profile keywords, tracks which job IDs we've already seen, writes a markdown report. @@ -22,6 +22,7 @@ See the adapter-coverage notes at the bottom for the current automated/manual sp import json import re import sys +import time from functools import lru_cache import urllib.error import urllib.parse @@ -59,6 +60,9 @@ POSITIVE_KEYWORDS = { "applied ai": 3, "applied ml": 3, "ai engineer": 3, "ml engineer": 3, "mlops": 3, "ai platform": 3, "ml platform": 3, "python": 2, "java": 2, "data engineer": 2, "data engineering": 2, + # "data scientist" scored modestly (medium, not strong) — secondary to his data-eng/ + # platform thesis, but the targeted band at boutiques like QuantCo (see target memory). + "data scientist": 2, "data science": 2, "solutions architect": 2, "platform engineer": 2, "ai infrastructure": 2, "inference": 2, "rag": 2, "agentic": 2, "kubernetes": 1, "docker": 1, "etl": 1, "pipeline": 1, @@ -227,11 +231,72 @@ COMPANIES = [ "scroll_count": 5, "use_inner_text_as_blob": True, }), + # --- Zürich/Zug high-comp additions (2026-05-31 list review) --- + # Palantir (Lever). Verified: 221 postings on the public board. It's US/London-heavy, so + # Swiss/Schwyz roles are rare but self-surface when posted (the location filter drops the + # US/London bulk). No title filter: his target titles (Forward Deployed Software Engineer, + # Deployment Strategist) aren't in ENG_TITLE_FILTER, so filtering would hide them. + ("palantir", "Palantir", "lever", {"slug": "palantir"}), + # QuantCo (Lever — note the trailing-hyphen slug "quantco-"). ~16 roles, most tagged + # "Europe" (hybrid); QuantCo's continental hub is Zürich, so the EU-wide rule in + # location_matches surfaces them. No title filter: the target band is DS/Quant/AI/Cloud + # (see comp analysis), which ENG_TITLE_FILTER would drop; interns/frontend are caught by + # NEGATIVE_KEYWORDS instead. + ("quantco", "QuantCo", "lever", {"slug": "quantco-"}), + # --- Bern/Thun local tier — WLB & proximity exception (comp bar relaxed; 2026-06-01) --- + # Wired after live endpoint discovery. ⚠️ German citizen: RUAG classified work may require + # Swiss citizenship — verify per-role before tailoring (see project_target_companies). + # Swissgrid (Aarau): Magnolia CMS JSON endpoint (verified). placeOfWork is a bare city + # (Aarau/Prilly/...), so loc_suffix tags it Switzerland for the CH filter. No title filter + # (small board ~13 roles; lets Data Scientist / Applied-ML roles surface). + ("swissgrid", "Swissgrid (Aarau)", "json", { + "url": "https://www.swissgrid.ch/.rest/cloud/component-data?path=%2Fswissgrid%2Fen%2Fhome%2Fcareer%2Fjobs%2Fmain%2Fjoblist_transferred_11", + "jobs_key": "jobs", + "field_title": "title", "field_location": "placeOfWork", + "field_url": "descriptionUrl", "field_date": "onlineSince", + "loc_suffix": " Switzerland", + "desc_keys": ["department", "typeOfEmployment", "entryLevel"], + }), + # RUAG (Thun/Bern/Emmen). Jobs render on the portal as anchors to jobs.ruag.ch; the first + # line of each anchor is the title. All sites are Swiss, so default_location=Switzerland + # passes the CH filter. ENG_TITLE_FILTER cuts the apprenticeship/Lehrstelle bulk. + # Drupal portal: 20 jobs/page, server-rendered, paginated via ?page=N (0-indexed). The + # first page is apprenticeship-heavy; eng roles (DevOps/Data/Cloud) are on later pages, + # so we page through until a page adds nothing new (~5-6 pages). + ("ruag", "RUAG (Thun/Bern)", "playwright", { + "url": "https://www.ruag.ch/en/working-us/job-portal", + "wait_for": "a[href*='/offene-stellen/']", + "card": "a[href*='/offene-stellen/']", + "title_attr": "text", + "link_attr": "href", + "default_location": "Switzerland", + "scroll_count": 1, + "page_param": "page", + "max_pages": 10, + "_title_filter": ENG_TITLE_FILTER, + }), + # SBB (company.sbb.ch — the correct host; company-jobs.sbb.ch was wrong). AEM job filter + # served as a flat JSON list; the fetch_sbb adapter replicates the user's IT + Bern-region + # filter. German/generic titles, so _score_floor keeps the pre-filtered results visible. + # ⚠️ DE-citizen limits may apply to some SBB security/critical-infra roles. + ("sbb", "SBB", "sbb", { + "topic": "IT / Telekommunikation", + "region": "Bern Mittelland", + "_score_floor": 2, + }), + # BKW Group (jobs.bkw.com — the real ATS host). PMS structured-data API; ~600 roles + # group-wide, so fetch_bkw keeps only Berufsfeld categories Informatik/Trading/Finanzen + # (IT/data + energy-trading, incl. the flagged Energiehandel roles). German/generic + # titles, so _score_floor keeps the pre-filtered set visible. + ("bkw", "BKW (Bern)", "bkw", {"_score_floor": 2}), ] # Companies where adapter probing did not yield a reliable scrape. Reasons noted. # These surface as a clickable checklist in the report so they're not forgotten. -# (Empty — all current target companies are automated.) +# Companies that resist scraping stay here as a clickable report checklist. Currently empty — +# every target company is automated. (Dropped 2026-06-01: BFH — academic FH pay below even the +# relaxed Bern/Thun floor, research-leaning, 403s anyway; Dialectic — ~50-person crypto VC, +# 0 open roles, crypto angle already covered by Kraken/Bitcoin Suisse/Coinbase Ventures.) MANUAL_CHECK = [] @@ -509,6 +574,145 @@ def fetch_onlyfy(args): return jobs +def fetch_lever(args): + """Lever public postings API. Palantir uses this. The board is US/London-heavy; + Swiss/Zurich (Schwyz hub) roles are rare on it but will surface here when posted — + location filtering downstream drops the US/London bulk. categories.allLocations + captures multi-location postings; createdAt is epoch-ms.""" + slug = args["slug"] + data = http_get_json(f"https://api.lever.co/v0/postings/{slug}?mode=json") + jobs = [] + for j in data: + cats = j.get("categories") or {} + all_locs = cats.get("allLocations") or [] + loc_blob = " | ".join(x for x in ([cats.get("location") or ""] + [str(a) for a in all_locs]) if x) + ts = j.get("createdAt") + posted = "" + if isinstance(ts, (int, float)): + posted = datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%Y-%m-%d") + jobs.append({ + "id": j.get("id"), + "title": j.get("text", ""), + "location": loc_blob, + "url": j.get("hostedUrl"), + "posted": posted, + "description": (j.get("descriptionPlain") or "")[:2500], + }) + return jobs + + +def fetch_json(args): + """Generic JSON jobs API with configurable field names, for employer sites that expose + a clean public endpoint. Verified use: Swissgrid (Magnolia CMS + /.rest/cloud/component-data — {config, jobs:[...], filters}). Field names vary by site, + so they're configurable: field_title/field_location/field_url/field_date. loc_suffix + appends e.g. ' Switzerland' so the CH location filter matches city-only values such as + "Aarau"/"Prilly" (not every Swiss town is in CH_LOCATION_KEYWORDS). desc_keys fold extra + fields (department, employment type) into the description for keyword scoring. + + Args: url, jobs_key (default "jobs"), field_* (defaults title/location/url/date), + url_prefix, loc_suffix, desc_keys.""" + data = http_get_json(args["url"]) + arr = data.get(args.get("jobs_key", "jobs"), []) if isinstance(data, dict) else (data or []) + ft, fl = args.get("field_title", "title"), args.get("field_location", "location") + fu, fd = args.get("field_url", "url"), args.get("field_date", "date") + prefix, suffix = args.get("url_prefix", ""), args.get("loc_suffix", "") + desc_keys = args.get("desc_keys", []) + jobs = [] + for j in arr: + url = j.get(fu, "") or "" + if url and not url.startswith("http") and prefix: + url = prefix.rstrip("/") + "/" + url.lstrip("/") + loc = (j.get(fl, "") or "").strip() + suffix + desc = " ".join(str(j.get(k)) for k in desc_keys if j.get(k)) + jobs.append({ + "id": str(j.get("id") or url), + "title": j.get(ft, ""), + "location": loc, + "url": url, + "posted": j.get(fd, "") or "", + "description": desc[:500], + }) + return jobs + + +def fetch_sbb(args): + """SBB (company.sbb.ch) AEM job filter. The whole board is served as a flat JSON list + at .../jobfilter.results.json (~145 roles); the website filters client-side via each + job's numbered `attributes`: '20'=Berufsfeld/topic, '110'=region, '100'=city, + 'links.directlink'=the jobs.sbb.ch URL. We replicate the user's IT + Bern-region filter + so only commutable IT roles surface. Titles are German/generic (Application Engineer, + Network Security Engineer, OT Architekt) and won't match ENG_TITLE_FILTER or the keyword + scorer, so this company is given a _score_floor in COMPANIES to keep its pre-filtered + results visible. topic/region are configurable substrings.""" + url = args.get("url", ("https://company.sbb.ch/content/internet/corporate/de/" + "jobs-karriere/jobs/job-suche/jcr:content/parmain/" + "jobfilter.results.json")) + topic = args.get("topic", "IT / Telekommunikation") + region = args.get("region", "Bern Mittelland") + data = http_get_json(url) + arr = data if isinstance(data, list) else (data.get("results") or data.get("jobs") or []) + jobs = [] + for j in arr: + a = j.get("attributes", {}) or {} + blob = " ".join(str(x) for v in a.values() for x in (v if isinstance(v, list) else [v])) + if topic and topic not in blob: + continue + if region and region not in blob: + continue + region_v = " ".join(a.get("110", []) or []) + city_v = " ".join(a.get("100", []) or []) + field_v = " ".join(a.get("20", []) or []) + jobs.append({ + "id": str(j.get("id") or j.get("viewkey") or ""), + "title": j.get("title", ""), + "location": f"{city_v} {region_v} Schweiz".strip(), + "url": (j.get("links") or {}).get("directlink", ""), + "posted": j.get("start_date", "") or "", + "description": (field_v + " " + (j.get("text", "") or ""))[:400], + }) + return jobs + + +def fetch_bkw(args): + """BKW Group (jobs.bkw.com) PMS structured-data API. The whole-group board is ~600 roles + dominated by building-tech / electrical / civil-engineering trades; we keep only the + Berufsfeld categories relevant to the user (Informatik / Trading / Finanzen), which + surfaces IT/data plus the energy-trading roles (Quant Risk Modeller, Solution Architect + Energiehandel, Energy Derivatives/Market-Risk analysts). locations[].address gives + city/country. Pre-filtered + German/generic titles, so paired with a _score_floor in + COMPANIES. The category allowlist is configurable.""" + url = args.get("url", ("https://jobs.bkw.com/_api/v1/structureddata?" + "configFromContentElement=82381&language=de-ch")) + allow = [c.lower() for c in args.get("categories", ["Informatik", "Trading", "Finanzen"])] + data = http_get_json(url) + arr = data if isinstance(data, list) else [] + if not arr and isinstance(data, dict): + for v in data.values(): + if isinstance(v, list) and v and isinstance(v[0], dict) and "title" in v[0]: + arr = v + break + jobs = [] + for j in arr: + if j.get("type") and j.get("type") != "jobs": + continue + cats = [c.get("title", "") for c in (j.get("relations", {}) or {}).get("Berufsfeld", []) or []] + if allow and not any(any(a in c.lower() for a in allow) for c in cats): + continue + locs = j.get("locations") or [] + addr = (locs[0].get("address") if locs and isinstance(locs[0], dict) else {}) or {} + loc = " ".join(x for x in [addr.get("city", ""), addr.get("country", "")] if x) or "Schweiz" + jobs.append({ + "id": str(j.get("id") or j.get("url") or ""), + "title": j.get("title", ""), + "location": loc, + "url": j.get("url", ""), + "posted": "", + "description": " ".join(cats + [j.get("subtitle", "") or ""])[:300], + }) + return jobs + + # Injected before page scripts run, to mask the most common headless-detection signals. # Required for Google; harmless for the other sites. STEALTH_JS = """ @@ -577,18 +781,12 @@ def fetch_playwright(args): ctx.add_init_script(STEALTH_JS) page = ctx.new_page() jobs = [] - try: - page.goto(args["url"], timeout=45000, wait_until="domcontentloaded") - # Optional cookie banner acceptance - for sel in args.get("cookie_accept", []) or []: - try: - btn = page.locator(sel).first - if btn.is_visible(timeout=2000): - btn.click() - page.wait_for_timeout(500) - except Exception: - pass - # Wait for job content to render + seen_ids = set() + + def scrape_current(): + """Extract cards from the currently-loaded page; append new ones to `jobs`. + Returns the count of newly-added (not-yet-seen) cards so a pagination loop can + stop once a page contributes nothing new.""" wait_for = args.get("wait_for") if wait_for: try: @@ -605,6 +803,7 @@ def fetch_playwright(args): cards = page.locator(args["card"]) n = min(cards.count(), args.get("max_cards", 150)) + added = 0 for i in range(n): card = cards.nth(i) try: @@ -638,6 +837,11 @@ def fetch_playwright(args): if not title: continue + jid = href or f"{page.url}#{i}" + if jid in seen_ids: + continue + seen_ids.add(jid) + added += 1 description = "" if args.get("use_inner_text_as_blob"): # Use the full card text as both location source and description @@ -646,26 +850,47 @@ def fetch_playwright(args): if not location: location = full[:300] jobs.append({ - "id": href or f"{args['url']}#{i}", + "id": jid, "title": title, "location": location, - "url": href or args["url"], + "url": href or page.url, "posted": "", "description": description, }) except Exception: continue + return added + + try: + page.goto(args["url"], timeout=45000, wait_until="domcontentloaded") + # Optional cookie banner acceptance (once, on the first page) + for sel in args.get("cookie_accept", []) or []: + try: + btn = page.locator(sel).first + if btn.is_visible(timeout=2000): + btn.click() + page.wait_for_timeout(500) + except Exception: + pass + # Optional query-param pagination (e.g. Drupal "?page=N", 0-indexed). The base URL is + # page 0 (already loaded); fetch successive pages until one adds no new cards. + page_param = args.get("page_param") + if page_param: + base = args["url"] + joiner = "&" if "?" in base else "?" + for p in range(args.get("max_pages", 8)): + if p > 0: + page.goto(f"{base}{joiner}{page_param}={p}", timeout=45000, + wait_until="domcontentloaded") + added = scrape_current() + if p > 0 and added == 0: + break + else: + scrape_current() finally: ctx.close() - # Deduplicate within a single company by id - seen, deduped = set(), [] - for j in jobs: - if j["id"] in seen: - continue - seen.add(j["id"]) - deduped.append(j) - return deduped + return jobs ADAPTERS = { @@ -678,6 +903,10 @@ ADAPTERS = { "rss": fetch_rss, "getro": fetch_getro, "onlyfy": fetch_onlyfy, + "lever": fetch_lever, + "json": fetch_json, + "sbb": fetch_sbb, + "bkw": fetch_bkw, "playwright": fetch_playwright, } @@ -690,9 +919,12 @@ def location_matches(loc_text): has_remote = any(k in low for k in REMOTE_KEYWORDS) is_us_only = any(p in low for p in US_ONLY_PATTERNS) and not in_ch has_eu_hint = any(k in low for k in EU_HINT_KEYWORDS) - # Count as remote-eligible only if it isn't a US-only remote listing - # and it has at least one EU/global hint - is_remote = has_remote and not is_us_only and has_eu_hint + # Pan-European postings (location literally "Europe"/"EMEA", e.g. QuantCo's Lever board) + # are reachable for a DACH-based candidate even without an explicit "remote" keyword, so + # treat them as eligible too. City-specific EU roles (e.g. "Berlin or Munich") stay out. + is_eu_wide = any(k in low for k in ("europe", "emea")) and not is_us_only + # Count as remote/EU-eligible only if it isn't a US-only listing and has an EU/global hint + is_remote = (has_remote or is_eu_wide) and not is_us_only and has_eu_hint return in_ch, is_remote @@ -747,14 +979,65 @@ def save_seen(seen): STATE_FILE.write_text(json.dumps(seen, indent=2, ensure_ascii=False), encoding="utf-8") -def write_report(path, results, errors, new_only, include_weak): +def _parse_posted(s): + """Best-effort parse of an adapter's `posted` field into a date, across the mix of + formats the boards use (ISO 8601 incl. trailing Z, YYYY-MM-DD, DD.MM.YYYY). Returns None + for unparseable values (e.g. Workday's relative "Posted 5 Days Ago", or empty).""" + if not s or not isinstance(s, str): + return None + s = s.strip() + try: + return datetime.fromisoformat(s.replace("Z", "+00:00")).date() + except ValueError: + pass + for fmt in ("%Y-%m-%d", "%d.%m.%Y", "%Y/%m/%d", "%d/%m/%Y"): + try: + return datetime.strptime(s[:10], fmt).date() + except ValueError: + pass + m = re.search(r"\d{4}-\d{2}-\d{2}", s) + if m: + try: + return datetime.strptime(m.group(0), "%Y-%m-%d").date() + except ValueError: + pass + return None + + +def write_stats_table(stats, total_secs): + """Render the per-company scan stats as a markdown table (+ a totals row).""" + out = ["## Scan stats\n", + "| Company | Scraped | CH/Remote | Match ≥2 | Newest posting | Time (s) |", + "|---|--:|--:|--:|:--|--:|"] + t_scraped = t_elig = t_match = 0 + newest_all = None + for s in stats: + name = s["company"] + (" ⚠️" if s.get("error") else "") + newest = s["newest"].isoformat() if s["newest"] else "—" + out.append(f"| {name} | {s['scraped']:,} | {s['eligible']:,} | " + f"{s['match']:,} | {newest} | {s['secs']:.1f} |") + t_scraped += s["scraped"]; t_elig += s["eligible"]; t_match += s["match"] + if s["newest"] and (newest_all is None or s["newest"] > newest_all): + newest_all = s["newest"] + out.append(f"| **Total ({len(stats)})** | **{t_scraped:,}** | **{t_elig:,}** | " + f"**{t_match:,}** | **{newest_all.isoformat() if newest_all else '—'}** | " + f"**{total_secs:.1f}** |") + out.append("") + return out + + +def write_report(path, results, errors, new_only, include_weak, stats=None, total_secs=0.0): today = datetime.now().strftime("%Y-%m-%d") n_new = sum(1 for r in results if r["is_new"]) + n_match = sum(1 for r in results if r["score"] >= 2) lines = [ f"# Job scout report {today}{' (new only)' if new_only else ''}\n", f"Automated coverage: **{len(COMPANIES)}** companies. Manual checks: **{len(MANUAL_CHECK)}**.", - f"Total matches from automated companies: **{len(results)}** ({n_new} new since last run)\n", + f"Eligible (CH/remote): **{len(results)}** · interest matches (score ≥ 2): " + f"**{n_match}** · **{n_new}** new since last run\n", ] + if stats: + lines += write_stats_table(stats, total_secs) if errors: lines.append("## Errors\n") for company, err in errors: @@ -814,29 +1097,43 @@ def main(): seen = load_seen() today = datetime.now(timezone.utc).strftime("%Y-%m-%d") - all_results, errors = [], [] + all_results, errors, stats = [], [], [] + run_start = time.perf_counter() for cid, display, adapter, args in COMPANIES: if only and cid != only: continue print(f"Fetching {display}...", file=sys.stderr) + t0 = time.perf_counter() try: jobs = ADAPTERS[adapter](args) except (urllib.error.URLError, urllib.error.HTTPError, ValueError) as e: errors.append((display, repr(e))) + stats.append({"company": display, "scraped": 0, "eligible": 0, + "match": 0, "newest": None, "secs": time.perf_counter() - t0, + "error": True}) continue except Exception as e: errors.append((display, f"unexpected: {e!r}")) + stats.append({"company": display, "scraped": 0, "eligible": 0, + "match": 0, "newest": None, "secs": time.perf_counter() - t0, + "error": True}) continue + scraped = len(jobs) # Optional per-company title prefilter for high-volume boards title_filter = args.get("_title_filter") if title_filter: jobs = [j for j in jobs if any(_kw_in(k, (j.get("title") or "").lower()) for k in title_filter)] + # Newest posting on the board (board freshness), across parseable dates. + dates = [d for j in jobs if (d := _parse_posted(j.get("posted")))] + newest = max(dates) if dates else None + company_seen = seen.setdefault(cid, {}) title_seen = set() + eligible = match = 0 for j in jobs: jid = str(j.get("id") or j.get("url")) in_ch, is_remote = location_matches(j.get("location", "")) @@ -848,8 +1145,17 @@ def main(): if norm_title in title_seen: continue title_seen.add(norm_title) + eligible += 1 is_new = jid not in company_seen score, pos, neg = score_job(j, title_only=bool(title_filter)) + # Pre-filtered boards (e.g. SBB, already narrowed to IT+Bern by the adapter) carry + # German/generic titles the profile scorer can't read; a _score_floor keeps their + # already-relevant results out of the hidden weak bucket. + floor = args.get("_score_floor") + if floor is not None and score < floor: + score = floor + if score >= 2: + match += 1 all_results.append({ "company": display, "company_id": cid, "title": j["title"], "location": j["location"], @@ -859,8 +1165,13 @@ def main(): }) company_seen[jid] = {"title": j["title"], "first_seen": today} + stats.append({"company": display, "scraped": scraped, "eligible": eligible, + "match": match, "newest": newest, + "secs": time.perf_counter() - t0, "error": False}) + save_seen(seen) _close_browser() + total_secs = time.perf_counter() - run_start if new_only: all_results = [r for r in all_results if r["is_new"]] @@ -869,43 +1180,66 @@ def main(): REPORTS_DIR.mkdir(parents=True, exist_ok=True) report_path = REPORTS_DIR / f"{today}.md" - write_report(report_path, all_results, errors, new_only, include_weak) + write_report(report_path, all_results, errors, new_only, include_weak, + stats=stats, total_secs=total_secs) n_new = sum(1 for r in all_results if r["is_new"]) print(f"\nReport written: {report_path}", file=sys.stderr) - print(f"Total matches: {len(all_results)} ({n_new} new)", file=sys.stderr) + print(f"Total matches: {len(all_results)} ({n_new} new) | " + f"scanned {len(stats)} companies in {total_secs:.1f}s", file=sys.stderr) if errors: print(f"Errors: {len(errors)} - see report", file=sys.stderr) -# === Adapter coverage (refreshed 2026-05-24) ================================== -# 22 companies automated across 10 adapter types; 0 remain in MANUAL_CHECK. +# === Adapter coverage (refreshed 2026-06-01) ================================== +# 25 companies automated across 13 adapter types; MANUAL_CHECK is empty. # # Automated (COMPANIES above): # workday nvidia, novartis # ashby kraken, openai, confluent -# greenhouse anthropic, gitlab, clickhouse, grafana +# greenhouse anthropic, gitlab, grafana # pcsx microsoft (Eightfold position-search endpoint) -# wp_ajax sygnum (WordPress admin-ajax JSON) -# smartrecruiters metgroup, vitol, ldc +# smartrecruiters metgroup, ldc # rss bis (vacancies.rss — RSS 1.0/RDF) # getro coinbase_ventures (web3 portfolio network, collection 1625) # onlyfy bitcoin_suisse (onlyfy.jobs ajax_list HTML fragment) -# playwright google, apple, meta, roche, cisco (headless browser, 3-15s each) +# lever palantir, quantco (api.lever.co; QuantCo slug is "quantco-") +# json swissgrid (Magnolia /.rest/cloud/component-data) +# sbb sbb (company.sbb.ch AEM jobfilter.results.json) +# bkw bkw (jobs.bkw.com PMS structureddata API) +# playwright google, apple, meta, roche, cisco, ruag (headless browser, 3-15s each) # -# Since the 2026-05-21 probe, six originally-manual sites moved to automated: -# Google/Apple/Meta/Roche/Cisco via the playwright adapter, Microsoft via pcsx, and -# Sygnum via its WordPress AJAX endpoint. BIS was added via the new rss adapter, the -# Coinbase Ventures web3 portfolio network via the new getro adapter, and Bitcoin Suisse -# via the new onlyfy adapter (its bitcoinsuisse.com page is a JS SPA, but the underlying -# onlyfy.jobs ATS serves a plain HTML list with locations). IBM Research and Sonova were -# dropped from the target list (no API / low fit; Sonova is MedTech, off-thesis). +# 2026-06-01 list review (verified live): +# - Palantir (lever): 221 postings, US/London-heavy so Swiss/Schwyz roles are rare but +# self-surface (FDSE/Deployment-Strategist titles map to his FDE drafts). +# - Swissgrid (json): Magnolia CMS endpoint; placeOfWork is bare city, so loc_suffix tags +# it Switzerland for the CH filter. ~13 roles incl. Data Scientist / Applied-ML. +# - RUAG (playwright + page_param): Drupal portal, 20 jobs/page, paginated ?page=N. Page 0 +# is apprenticeship-heavy; eng roles (DevOps/Data/Software) are on later pages, so we +# page through (max_pages). ENG_TITLE_FILTER cuts the Lehrstelle bulk. ⚠️ DE-citizen +# limits on RUAG classified roles — verify per-role. +# - SBB (sbb): correct host is company.sbb.ch (not company-jobs.sbb.ch). Flat JSON list; +# fetch_sbb replicates the user's IT + Bern-region filter. German/generic titles, so a +# _score_floor keeps the pre-filtered results visible. ⚠️ DE-citizen limits possible. +# - BKW (bkw): real host is jobs.bkw.com (PMS structureddata API), ~600 group-wide roles; +# fetch_bkw keeps Berufsfeld categories Informatik/Trading/Finanzen (IT/data + energy +# trading: Quant Risk, Solution Architect Energiehandel, ...). _score_floor as above. +# - QuantCo (lever, slug "quantco-"): ~16 roles, most tagged "Europe" (hybrid; Zürich is +# QuantCo's continental hub), surfaced via the EU-wide rule in location_matches. Strong: +# AI Engineer; medium: Cloud Engineer, AI Applied Scientist, Data Scientist, Quant +# Researcher, Software Engineer. Interns/frontend suppressed by NEGATIVE_KEYWORDS. +# The Bern/Thun tier intentionally relaxes the comp bar (see user_comp_bar memory). # -# Note: the Coinbase Ventures board (getro) covers PORTFOLIO companies, not Coinbase -# itself. Coinbase-the-employer was dropped (mass layoffs / hiring freeze as of 2026-05; -# re-add coinbase.com/careers if they reopen). AMINA Bank was dropped (poor Glassdoor). +# MANUAL_CHECK is empty — every target company is automated. Dropped 2026-06-01: BFH +# (academic FH pay below the relaxed Bern/Thun floor, research-leaning, 403s anyway) and +# Dialectic (~50-person crypto VC, 0 open roles; crypto already covered by Kraken / Bitcoin +# Suisse / Coinbase Ventures). # -# MANUAL_CHECK is now empty — every current target company is automated. +# Earlier history: Google/Apple/Meta/Roche/Cisco automated via playwright; Microsoft via +# pcsx; BIS via rss; Coinbase Ventures via getro; Bitcoin Suisse via onlyfy. Dropped: +# ClickHouse, Vitol, Sygnum (Glassdoor/comp red flags), IBM Research + Sonova (low fit), +# Coinbase-the-employer (hiring freeze), AMINA (poor Glassdoor), Canonical (pay+culture). +# The Coinbase Ventures board (getro) covers PORTFOLIO companies, not Coinbase itself. # ==============================================================================