From 8a5955c0a82934d105496d521b9cdc8cb06c4e80 Mon Sep 17 00:00:00 2001 From: Dennis Thiessen Date: Mon, 1 Jun 2026 15:30:49 +0200 Subject: [PATCH] feat(job_scout): decision log + report annotations Track per-job decisions across runs so we don't re-evaluate roles. - state/decisions.json (keyed by URL: company/title/decision/note/date), now git-tracked while seen_jobs.json stays local - --decide "" [note] records a decision; --hide-decided gives an undecided-only view; report tags each role inline with its decision - usage docstring updated - seed 18 decisions (9 shortlist, 7 skip, 1 paused, 1 maybe); flags Google Staff FDE GenAI as the paused prior session Co-Authored-By: Claude Opus 4.8 --- .gitignore | 4 +- job_scout/scout.py | 71 +++++++++++++++--- job_scout/state/decisions.json | 128 +++++++++++++++++++++++++++++++++ 3 files changed, 192 insertions(+), 11 deletions(-) create mode 100644 job_scout/state/decisions.json diff --git a/.gitignore b/.gitignore index 876fe13..96233a2 100644 --- a/.gitignore +++ b/.gitignore @@ -23,7 +23,9 @@ __pycache__/ # job_scout runtime artifacts (keep scout.py + requirements.txt only) job_scout/.venv/ job_scout/reports/ -job_scout/state/ +job_scout/state/* +# ...but track the decision log (job application history), not the churny seen-state +!job_scout/state/decisions.json # One-off job-board data pulls (debug artifacts) *_jd.json diff --git a/job_scout/scout.py b/job_scout/scout.py index 8cf99cb..e493edc 100644 --- a/job_scout/scout.py +++ b/job_scout/scout.py @@ -10,9 +10,12 @@ Usage: py scout.py --only=nvidia # Pull a single company by id py scout.py --new-only # Report only jobs not seen before py scout.py --include-weak # Include weak/noise bucket (default hidden) + py scout.py --hide-decided # Drop roles already in the decision log (undecided-only view) + py scout.py --decide "" [note...] # Record a decision and exit + # status is free-text: shortlist | skip | applied | paused | ... -State : state/seen_jobs.json -Output: reports/YYYY-MM-DD.md +State : state/seen_jobs.json (job IDs seen) ยท state/decisions.json (per-URL decisions) +Output: reports/YYYY-MM-DD.md (scan-stats table + scored roles, decisions tagged inline) To add a company: append to COMPANIES with one of the existing adapter types. A few sites resist scraping even headless and stay in MANUAL_CHECK (surfaced as a report checklist). @@ -32,6 +35,7 @@ from pathlib import Path ROOT = Path(__file__).parent STATE_FILE = ROOT / "state" / "seen_jobs.json" +DECISIONS_FILE = ROOT / "state" / "decisions.json" REPORTS_DIR = ROOT / "reports" USER_AGENT = "Mozilla/5.0 (compatible; job-scout/0.1)" @@ -979,6 +983,20 @@ def save_seen(seen): STATE_FILE.write_text(json.dumps(seen, indent=2, ensure_ascii=False), encoding="utf-8") +def load_decisions(): + """Decision log keyed by job URL: {url: {company, title, decision, note, date}}. + Decisions persist across runs so we don't re-evaluate roles we've already judged + (shortlist / skip / applied / paused / rejected โ€” free-text, not enforced).""" + if DECISIONS_FILE.exists(): + return json.loads(DECISIONS_FILE.read_text(encoding="utf-8")) + return {} + + +def save_decisions(decisions): + DECISIONS_FILE.parent.mkdir(parents=True, exist_ok=True) + DECISIONS_FILE.write_text(json.dumps(decisions, indent=2, ensure_ascii=False), encoding="utf-8") + + def _parse_posted(s): """Best-effort parse of an adapter's `posted` field into a date, across the mix of formats the boards use (ISO 8601 incl. trailing Z, YYYY-MM-DD, DD.MM.YYYY). Returns None @@ -1026,7 +1044,9 @@ def write_stats_table(stats, total_secs): return out -def write_report(path, results, errors, new_only, include_weak, stats=None, total_secs=0.0): +def write_report(path, results, errors, new_only, include_weak, stats=None, total_secs=0.0, + decisions=None, hide_decided=False): + decisions = decisions or {} today = datetime.now().strftime("%Y-%m-%d") n_new = sum(1 for r in results if r["is_new"]) n_match = sum(1 for r in results if r["score"] >= 2) @@ -1050,6 +1070,11 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota if not include_weak and weak: lines.append(f"\n_Hiding {len(weak)} weak/noise roles (score < 2). Use --include-weak to show._") + n_decided = sum(1 for r in results if r["url"] in decisions) + if n_decided: + shown = "hidden" if hide_decided else "tagged inline" + lines.append(f"_{n_decided} role(s) already in the decision log ({shown}; " + f"see state/decisions.json)._") buckets = [("Strong fit (score >= 6)", strong), ("Medium fit (score 2-5)", medium)] @@ -1057,17 +1082,23 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota buckets.append(("Weak / noise (score < 2)", weak)) for bucket_name, bucket in buckets: - if not bucket: + shown = [r for r in bucket if not (hide_decided and r["url"] in decisions)] + if not shown: continue - lines.append(f"\n## {bucket_name} - {len(bucket)} role(s)\n") - for r in bucket: + lines.append(f"\n## {bucket_name} - {len(shown)} role(s)\n") + for r in shown: + d = decisions.get(r["url"]) new_tag = " [NEW]" if r["is_new"] else "" + decided_tag = f" โ€” ๐Ÿ—‚ {d['decision'].upper()}" if d else "" loc_tag = "CH" if r["in_ch"] else ("Remote" if r["remote"] else "?") - lines.append(f"### [{r['score']}] {r['company']} - {r['title']}{new_tag}") + lines.append(f"### [{r['score']}] {r['company']} - {r['title']}{new_tag}{decided_tag}") lines.append(f"- Location: {r['location']} *({loc_tag})*") if r.get("posted"): lines.append(f"- Posted: {r['posted']}") lines.append(f"- URL: {r['url']}") + if d: + note = f" โ€” {d['note']}" if d.get("note") else "" + lines.append(f"- ๐Ÿ—‚ Decision: **{d['decision']}**{note} ({d.get('date','')})") if r["pos"]: lines.append(f"- Positive: {', '.join(r['pos'])}") if r["neg"]: @@ -1086,17 +1117,36 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota def main(): - only, new_only, include_weak = None, False, False + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + + # Record a decision and exit: --decide "" [note words...] + if "--decide" in sys.argv: + rest = sys.argv[sys.argv.index("--decide") + 1:] + if len(rest) < 2: + print('Usage: --decide "" [note...]', file=sys.stderr) + return + url, status, note = rest[0], rest[1], " ".join(rest[2:]) + decisions = load_decisions() + prev = decisions.get(url, {}) + decisions[url] = {"company": prev.get("company", ""), "title": prev.get("title", ""), + "decision": status, "note": note, "date": today} + save_decisions(decisions) + print(f"Recorded: {status} โ€” {url}", file=sys.stderr) + return + + only, new_only, include_weak, hide_decided = None, False, False, False for arg in sys.argv[1:]: if arg == "--new-only": new_only = True elif arg == "--include-weak": include_weak = True + elif arg == "--hide-decided": + hide_decided = True elif arg.startswith("--only="): only = arg.split("=", 1)[1] seen = load_seen() - today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + decisions = load_decisions() all_results, errors, stats = [], [], [] run_start = time.perf_counter() @@ -1181,7 +1231,8 @@ def main(): REPORTS_DIR.mkdir(parents=True, exist_ok=True) report_path = REPORTS_DIR / f"{today}.md" write_report(report_path, all_results, errors, new_only, include_weak, - stats=stats, total_secs=total_secs) + stats=stats, total_secs=total_secs, + decisions=decisions, hide_decided=hide_decided) n_new = sum(1 for r in all_results if r["is_new"]) print(f"\nReport written: {report_path}", file=sys.stderr) diff --git a/job_scout/state/decisions.json b/job_scout/state/decisions.json new file mode 100644 index 0000000..4715a48 --- /dev/null +++ b/job_scout/state/decisions.json @@ -0,0 +1,128 @@ +{ + "https://job-boards.greenhouse.io/anthropic/jobs/5204086008": { + "company": "Anthropic", + "title": "Solutions Architect, Applied AI", + "decision": "shortlist", + "note": "Top fit: applied-AI SA in Zurich, on-thesis (platform/SA, not model-building). Top comp. Candidate for tailored package.", + "date": "2026-06-01" + }, + "https://www.google.com/about/careers/applications/jobs/results/116958340671513286-staff-forward-deployed-engineer-genai-google-cloud?location=Switzerland": { + "company": "Google", + "title": "Staff Forward Deployed Engineer, GenAI, Google Cloud", + "decision": "paused", + "note": "Overlaps the PAUSED 'Google FDE GenAI (Zurich)' session (GenAI evidence gap; was redirecting to data-eng/MLOps). Reassess via the Cloud/FDE platform angle rather than GenAI. Reuse existing FDE drafts.", + "date": "2026-06-01" + }, + "https://careers.cisco.com/global/en/job/2012401/Senior-Software-Engineer-Agent-Datapath-eBPF-Isovalent": { + "company": "Cisco", + "title": "Senior SW Engineer, Agent & Datapath (eBPF) - Isovalent", + "decision": "shortlist", + "note": "Cilium/eBPF cloud-native platform eng, CH-based. Strong on-thesis infra role.", + "date": "2026-06-01" + }, + "https://jobs.lever.co/quantco-/a7c4f142-b1ad-449c-a18e-bdbda41a3f86": { + "company": "QuantCo", + "title": "Cloud Engineer", + "decision": "shortlist", + "note": "k8s/cloud at high-comp boutique, Zurich hub. On platform thesis.", + "date": "2026-06-01" + }, + "https://jobs.lever.co/quantco-/67757fe8-3582-4bdd-aa7d-98fa490bde84": { + "company": "QuantCo", + "title": "AI Engineer", + "decision": "maybe", + "note": "High score but more ML-leaning than his platform thesis; prefer the Cloud Engineer role. Revisit only if framed as AI-platform/infra.", + "date": "2026-06-01" + }, + "https://nvidia.wd5.myworkdayjobs.com/job/UK-Remote/Senior-Solutions-Architect--HPC-and-AI_JR2007074": { + "company": "NVIDIA", + "title": "Senior Solutions Architect, HPC and AI", + "decision": "shortlist", + "note": "SA track (preferred over SWE at NVIDIA per profile). Remote UK/DE.", + "date": "2026-06-01" + }, + "https://jobs.ashbyhq.com/kraken.com/5d344194-bbf0-48ce-bafd-f27c51b185b8": { + "company": "Kraken", + "title": "Solutions Architect - Payward Services", + "decision": "shortlist", + "note": "Architect + crypto + trading; remote incl. CH. Kraken has a prior package (AI Infrastructure session, 84.5/100) to draw on. Crypto background differentiating.", + "date": "2026-06-01" + }, + "https://jobs.ashbyhq.com/kraken.com/4aacc5a1-a848-4b68-90b8-958b6e4a2d0b": { + "company": "Kraken", + "title": "AI Agents Solutions Architect - HR", + "decision": "shortlist", + "note": "Agentic + Python + crypto SA. HR-domain variant (also Finance/Compliance variants exist).", + "date": "2026-06-01" + }, + "https://bitcoin-suisse.onlyfy.jobs/job/fkt5dq31d58r2hdoa1pnyp6gykgpuu9": { + "company": "Bitcoin Suisse", + "title": "Solution Architect Trading", + "decision": "shortlist", + "note": "Crypto-native, architect, trading; Zug. Crypto background differentiating.", + "date": "2026-06-01" + }, + "https://job.bkw.com/offene-stellen/solution-architect-energiehandel-alle/99b88701-e999-4ff3-9585-b19963ed8237": { + "company": "BKW (Bern)", + "title": "Solution Architect Energiehandel", + "decision": "shortlist", + "note": "Bern WLB tier (lower comp OK). Energy-trading architect, local. The flagged Energiehandel role.", + "date": "2026-06-01" + }, + "https://job.bkw.com/offene-stellen/senior-quant-risk-modeller-all/1bb5e5b5-35d8-410f-b3d1-c156e6f9114c": { + "company": "BKW (Bern)", + "title": "Senior Quant Risk Modeller", + "decision": "shortlist", + "note": "Bern WLB tier; trading/quant, local. Fits energy-trading interest.", + "date": "2026-06-01" + }, + "https://www.google.com/about/careers/applications/jobs/results/100214884188201670-senior-research-data-scientist-merchant-shopping-data-science?location=Switzerland": { + "company": "Google", + "title": "Senior Research Data Scientist, Merchant Shopping", + "decision": "skip", + "note": "Research / model-building, off-thesis (he targets data-eng/platform/infra, not DS research).", + "date": "2026-06-01" + }, + "https://job-boards.greenhouse.io/anthropic/jobs/5197714008": { + "company": "Anthropic", + "title": "Senior Security SW Engineer, Linux Kernel Security", + "decision": "skip", + "note": "Kernel C / systems-security, off his stack (Python/Java/data-platform).", + "date": "2026-06-01" + }, + "https://jobs.ashbyhq.com/kraken.com/e7480aaf-0a1b-4ed4-a047-37cc5b3b88a3": { + "company": "Kraken", + "title": "Account Executive, Trading-as-a-Service", + "decision": "skip", + "note": "Sales (AE), not engineering. Scored high on crypto/trading keywords only.", + "date": "2026-06-01" + }, + "https://jobs.ashbyhq.com/kraken.com/083afacb-edf7-4920-8428-e6c6fb7aecc9": { + "company": "Kraken", + "title": "Senior Sales Trader, EMEA", + "decision": "skip", + "note": "Sales/trading desk, not engineering.", + "date": "2026-06-01" + }, + "https://jobs.ashbyhq.com/kraken.com/ff2129b4-2fc9-4e8e-9c92-777279b377fe": { + "company": "Kraken", + "title": "Product Manager - Prop Trading - Breakout", + "decision": "skip", + "note": "PM, not engineering.", + "date": "2026-06-01" + }, + "https://jobs.ashbyhq.com/kraken.com/2bb8285f-d45a-441b-8628-983a101006ce": { + "company": "Kraken", + "title": "Staff Product Designer - Consumer", + "decision": "skip", + "note": "Design, not engineering.", + "date": "2026-06-01" + }, + "https://www.google.com/about/careers/applications/jobs/results/126509648931889862-senior-staff-software-engineer-automotive-ai?location=Switzerland": { + "company": "Google", + "title": "Senior Staff SW Engineer, Automotive AI", + "decision": "skip", + "note": "C++/automotive, weaker fit vs his Python/data-platform stack.", + "date": "2026-06-01" + } +} \ No newline at end of file