feat(job_scout): add data-infra adapters + Oracle manual-check
Add Databricks, Snowflake, Datadog, Elastic, dbt Labs (greenhouse/ashby, title-filtered). Snowflake slug surfaced the Zürich Observe SWE role. HashiCorp dropped (IBM acquisition killed public boards); Oracle moved to MANUAL_CHECK (ORC SPA resists scraping; REST endpoint documented in code). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+38
-6
@@ -138,9 +138,19 @@ COMPANIES = [
|
|||||||
}),
|
}),
|
||||||
# --- Data-infra US tech (his exact stack; mostly all-remote — title-filtered to eng/data) ---
|
# --- Data-infra US tech (his exact stack; mostly all-remote — title-filtered to eng/data) ---
|
||||||
# Dropped: ClickHouse (Glassdoor 3.3, 36% recommend, toxic-culture flag — 2026-05).
|
# Dropped: ClickHouse (Glassdoor 3.3, 36% recommend, toxic-culture flag — 2026-05).
|
||||||
|
# Dropped: HashiCorp — acquired by IBM (closed 2025); greenhouse/ashby/lever boards all 404,
|
||||||
|
# roles folded into IBM's careers (no clean public ATS API). 2026-06-06.
|
||||||
("confluent", "Confluent", "ashby", {"slug": "confluent", "_title_filter": ENG_TITLE_FILTER}),
|
("confluent", "Confluent", "ashby", {"slug": "confluent", "_title_filter": ENG_TITLE_FILTER}),
|
||||||
("gitlab", "GitLab", "greenhouse", {"board": "gitlab", "_title_filter": ENG_TITLE_FILTER}),
|
("gitlab", "GitLab", "greenhouse", {"board": "gitlab", "_title_filter": ENG_TITLE_FILTER}),
|
||||||
("grafana", "Grafana Labs","greenhouse",{"board": "grafanalabs", "_title_filter": ENG_TITLE_FILTER}),
|
("grafana", "Grafana Labs","greenhouse",{"board": "grafanalabs", "_title_filter": ENG_TITLE_FILTER}),
|
||||||
|
# Added 2026-06-06 (Tier A/B data-infra). Databricks/Snowflake/Datadog have Zürich offices
|
||||||
|
# (Swiss-scale comp, clears bar); Elastic/dbt Labs are remote-EU (verify CH-equiv comp —
|
||||||
|
# may be geo-banded below 180k, like Grafana). All title-filtered (boards are 160-760 roles).
|
||||||
|
("databricks","Databricks","greenhouse", {"board": "databricks", "_title_filter": ENG_TITLE_FILTER}), # Zürich SWE + remote-EU
|
||||||
|
("snowflake", "Snowflake", "ashby", {"slug": "snowflake", "_title_filter": ENG_TITLE_FILTER}), # Zürich "Observe" observability SWE roles
|
||||||
|
("datadog", "Datadog", "greenhouse", {"board": "datadog", "_title_filter": ENG_TITLE_FILTER}), # Zürich branch + remote-EU
|
||||||
|
("elastic", "Elastic", "greenhouse", {"board": "elastic", "_title_filter": ENG_TITLE_FILTER}), # remote-first; ELK = his stack
|
||||||
|
("dbtlabs", "dbt Labs", "greenhouse", {"board": "dbtlabsinc", "_title_filter": ENG_TITLE_FILTER}), # remote-EU; analytics-eng
|
||||||
# --- Energy / commodity trading (SmartRecruiters; title-filtered to tech roles) ---
|
# --- Energy / commodity trading (SmartRecruiters; title-filtered to tech roles) ---
|
||||||
# Dropped: Vitol (Glassdoor 3.5, 55% recommend, grueling-hours/toxic flag — 2026-05).
|
# Dropped: Vitol (Glassdoor 3.5, 55% recommend, grueling-hours/toxic flag — 2026-05).
|
||||||
# Dropped: Sygnum (Glassdoor 3.4, 51% recommend, comp 2.3/5 — below 180k bar — 2026-05).
|
# Dropped: Sygnum (Glassdoor 3.4, 51% recommend, comp 2.3/5 — below 180k bar — 2026-05).
|
||||||
@@ -301,7 +311,19 @@ COMPANIES = [
|
|||||||
# every target company is automated. (Dropped 2026-06-01: BFH — academic FH pay below even the
|
# every target company is automated. (Dropped 2026-06-01: BFH — academic FH pay below even the
|
||||||
# relaxed Bern/Thun floor, research-leaning, 403s anyway; Dialectic — ~50-person crypto VC,
|
# relaxed Bern/Thun floor, research-leaning, 403s anyway; Dialectic — ~50-person crypto VC,
|
||||||
# 0 open roles, crypto angle already covered by Kraken/Bitcoin Suisse/Coinbase Ventures.)
|
# 0 open roles, crypto angle already covered by Kraken/Bitcoin Suisse/Coinbase Ventures.)
|
||||||
MANUAL_CHECK = []
|
MANUAL_CHECK = [
|
||||||
|
# Oracle (Tier C, requested 2026-06-06). Oracle Recruiting Cloud (ORC) resists clean
|
||||||
|
# scraping: careers.oracle.com renders job tiles that are NOT anchors, so the playwright
|
||||||
|
# selector pattern fails. The ORC REST endpoint works and returns data, but reliable CH
|
||||||
|
# filtering needs the Switzerland geography node id, which the facet-expand call 400s on.
|
||||||
|
# Wireable later as a `json` adapter once that geographyId is resolved. For now, manual:
|
||||||
|
# https://eeho.fa.us2.oraclecloud.com/hcmRestApi/resources/latest/recruitingCEJobRequisitions
|
||||||
|
# ?onlyData=true&expand=requisitionList.workLocation
|
||||||
|
# &finder=findReqs;siteNumber=CX_45001,limit=200,sortBy=POSTING_DATES_DESC
|
||||||
|
# (then client-filter requisitionList[].PrimaryLocation for Switzerland/Zürich)
|
||||||
|
("Oracle", "ORC SPA resists scraping; REST endpoint known but needs CH geographyId (see code comment). Check Switzerland tech roles manually.",
|
||||||
|
"https://careers.oracle.com/en/sites/jobsearch/jobs?location=Switzerland"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def http_get_json(url, headers=None, data=None, method="GET"):
|
def http_get_json(url, headers=None, data=None, method="GET"):
|
||||||
@@ -1242,13 +1264,13 @@ def main():
|
|||||||
print(f"Errors: {len(errors)} - see report", file=sys.stderr)
|
print(f"Errors: {len(errors)} - see report", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
# === Adapter coverage (refreshed 2026-06-01) ==================================
|
# === Adapter coverage (refreshed 2026-06-06) ==================================
|
||||||
# 25 companies automated across 13 adapter types; MANUAL_CHECK is empty.
|
# 31 companies automated across 13 adapter types; 1 in MANUAL_CHECK (Oracle).
|
||||||
#
|
#
|
||||||
# Automated (COMPANIES above):
|
# Automated (COMPANIES above):
|
||||||
# workday nvidia, novartis
|
# workday nvidia, novartis
|
||||||
# ashby kraken, openai, confluent
|
# ashby kraken, openai, confluent, snowflake
|
||||||
# greenhouse anthropic, gitlab, grafana
|
# greenhouse anthropic, gitlab, grafana, databricks, datadog, elastic, dbtlabs
|
||||||
# pcsx microsoft (Eightfold position-search endpoint)
|
# pcsx microsoft (Eightfold position-search endpoint)
|
||||||
# smartrecruiters metgroup, ldc
|
# smartrecruiters metgroup, ldc
|
||||||
# rss bis (vacancies.rss — RSS 1.0/RDF)
|
# rss bis (vacancies.rss — RSS 1.0/RDF)
|
||||||
@@ -1281,7 +1303,17 @@ def main():
|
|||||||
# Researcher, Software Engineer. Interns/frontend suppressed by NEGATIVE_KEYWORDS.
|
# Researcher, Software Engineer. Interns/frontend suppressed by NEGATIVE_KEYWORDS.
|
||||||
# The Bern/Thun tier intentionally relaxes the comp bar (see user_comp_bar memory).
|
# The Bern/Thun tier intentionally relaxes the comp bar (see user_comp_bar memory).
|
||||||
#
|
#
|
||||||
# MANUAL_CHECK is empty — every target company is automated. Dropped 2026-06-01: BFH
|
# 2026-06-06 additions (FAANG-adjacent data-infra, Tier A/B from the Zürich/Bern review):
|
||||||
|
# - greenhouse: Databricks (board "databricks", 762 roles, Zürich SWE), Datadog ("datadog",
|
||||||
|
# Zürich branch + remote-EU), Elastic ("elastic", remote-first ELK), dbt Labs ("dbtlabsinc",
|
||||||
|
# remote-EU). ashby: Snowflake (slug "snowflake", 392 roles incl. Zürich "Observe by
|
||||||
|
# Snowflake" observability SWE). All title-filtered (ENG_TITLE_FILTER) — large boards.
|
||||||
|
# - ⚠️ Comp split: Databricks/Snowflake/Datadog pay Swiss-scale (Zürich offices, clears bar);
|
||||||
|
# Elastic/dbt are remote-EU and may be geo-banded below 180k CHF (like Grafana — verify).
|
||||||
|
# - HashiCorp: NOT added — IBM acquisition (2025) killed its public boards; on IBM careers now.
|
||||||
|
# - Oracle: in MANUAL_CHECK — ORC SPA resists scraping; REST endpoint documented there.
|
||||||
|
#
|
||||||
|
# MANUAL_CHECK: Oracle (ORC needs CH geographyId resolved). Dropped 2026-06-01: BFH
|
||||||
# (academic FH pay below the relaxed Bern/Thun floor, research-leaning, 403s anyway) and
|
# (academic FH pay below the relaxed Bern/Thun floor, research-leaning, 403s anyway) and
|
||||||
# Dialectic (~50-person crypto VC, 0 open roles; crypto already covered by Kraken / Bitcoin
|
# Dialectic (~50-person crypto VC, 0 open roles; crypto already covered by Kraken / Bitcoin
|
||||||
# Suisse / Coinbase Ventures).
|
# Suisse / Coinbase Ventures).
|
||||||
|
|||||||
Reference in New Issue
Block a user