feat(job_scout): decision log + report annotations

Track per-job decisions across runs so we don't re-evaluate roles. - state/decisions.json (keyed by URL: company/title/decision/note/date), now git-tracked while seen_jobs.json stays local - --decide "<url>" <status> [note] records a decision; --hide-decided gives an undecided-only view; report tags each role inline with its decision - usage docstring updated - seed 18 decisions (9 shortlist, 7 skip, 1 paused, 1 maybe); flags Google Staff FDE GenAI as the paused prior session Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-01 15:30:49 +02:00
parent da66443aa8
commit 8a5955c0a8
3 changed files with 192 additions and 11 deletions
@@ -23,7 +23,9 @@ __pycache__/
 # job_scout runtime artifacts (keep scout.py + requirements.txt only)
 job_scout/.venv/
 job_scout/reports/
-job_scout/state/
+job_scout/state/*
 # ...but track the decision log (job application history), not the churny seen-state
 !job_scout/state/decisions.json
 # One-off job-board data pulls (debug artifacts)
 *_jd.json
@@ -10,9 +10,12 @@ Usage:
    py scout.py --only=nvidia   # Pull a single company by id
    py scout.py --new-only      # Report only jobs not seen before
    py scout.py --include-weak  # Include weak/noise bucket (default hidden)
    py scout.py --hide-decided  # Drop roles already in the decision log (undecided-only view)
    py scout.py --decide "<url>" <status> [note...]   # Record a decision and exit
                                # status is free-text: shortlist | skip | applied | paused | ...
-State : state/seen_jobs.json
+State : state/seen_jobs.json (job IDs seen) · state/decisions.json (per-URL decisions)
-Output: reports/YYYY-MM-DD.md
+Output: reports/YYYY-MM-DD.md  (scan-stats table + scored roles, decisions tagged inline)
 To add a company: append to COMPANIES with one of the existing adapter types. A few sites
 resist scraping even headless and stay in MANUAL_CHECK (surfaced as a report checklist).
@@ -32,6 +35,7 @@ from pathlib import Path
 ROOT = Path(__file__).parent
 STATE_FILE = ROOT / "state" / "seen_jobs.json"
 DECISIONS_FILE = ROOT / "state" / "decisions.json"
 REPORTS_DIR = ROOT / "reports"
 USER_AGENT = "Mozilla/5.0 (compatible; job-scout/0.1)"
@@ -979,6 +983,20 @@ def save_seen(seen):
    STATE_FILE.write_text(json.dumps(seen, indent=2, ensure_ascii=False), encoding="utf-8")
 def load_decisions():
    """Decision log keyed by job URL: {url: {company, title, decision, note, date}}.
    Decisions persist across runs so we don't re-evaluate roles we've already judged
    (shortlist / skip / applied / paused / rejected — free-text, not enforced)."""
    if DECISIONS_FILE.exists():
        return json.loads(DECISIONS_FILE.read_text(encoding="utf-8"))
    return {}
 def save_decisions(decisions):
    DECISIONS_FILE.parent.mkdir(parents=True, exist_ok=True)
    DECISIONS_FILE.write_text(json.dumps(decisions, indent=2, ensure_ascii=False), encoding="utf-8")
 def _parse_posted(s):
    """Best-effort parse of an adapter's `posted` field into a date, across the mix of
    formats the boards use (ISO 8601 incl. trailing Z, YYYY-MM-DD, DD.MM.YYYY). Returns None
@@ -1026,7 +1044,9 @@ def write_stats_table(stats, total_secs):
    return out
-def write_report(path, results, errors, new_only, include_weak, stats=None, total_secs=0.0):
+def write_report(path, results, errors, new_only, include_weak, stats=None, total_secs=0.0,
                 decisions=None, hide_decided=False):
    decisions = decisions or {}
    today = datetime.now().strftime("%Y-%m-%d")
    n_new = sum(1 for r in results if r["is_new"])
    n_match = sum(1 for r in results if r["score"] >= 2)
@@ -1050,6 +1070,11 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota
    if not include_weak and weak:
        lines.append(f"\n_Hiding {len(weak)} weak/noise roles (score < 2). Use --include-weak to show._")
    n_decided = sum(1 for r in results if r["url"] in decisions)
    if n_decided:
        shown = "hidden" if hide_decided else "tagged inline"
        lines.append(f"_{n_decided} role(s) already in the decision log ({shown}; "
                     f"see state/decisions.json)._")
    buckets = [("Strong fit (score >= 6)", strong),
               ("Medium fit (score 2-5)", medium)]
@@ -1057,17 +1082,23 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota
        buckets.append(("Weak / noise (score < 2)", weak))
    for bucket_name, bucket in buckets:
-        if not bucket:
+        shown = [r for r in bucket if not (hide_decided and r["url"] in decisions)]
        if not shown:
            continue
-        lines.append(f"\n## {bucket_name} - {len(bucket)} role(s)\n")
+        lines.append(f"\n## {bucket_name} - {len(shown)} role(s)\n")
-        for r in bucket:
+        for r in shown:
            d = decisions.get(r["url"])
            new_tag = " [NEW]" if r["is_new"] else ""
            decided_tag = f" — 🗂 {d['decision'].upper()}" if d else ""
            loc_tag = "CH" if r["in_ch"] else ("Remote" if r["remote"] else "?")
-            lines.append(f"### [{r['score']}] {r['company']} - {r['title']}{new_tag}")
+            lines.append(f"### [{r['score']}] {r['company']} - {r['title']}{new_tag}{decided_tag}")
            lines.append(f"- Location: {r['location']} *({loc_tag})*")
            if r.get("posted"):
                lines.append(f"- Posted: {r['posted']}")
            lines.append(f"- URL: {r['url']}")
            if d:
                note = f" — {d['note']}" if d.get("note") else ""
                lines.append(f"- 🗂 Decision: **{d['decision']}**{note} ({d.get('date','')})")
            if r["pos"]:
                lines.append(f"- Positive: {', '.join(r['pos'])}")
            if r["neg"]:
@@ -1086,17 +1117,36 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota
 def main():
-    only, new_only, include_weak = None, False, False
+    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
    # Record a decision and exit:  --decide "<url>" <status> [note words...]
    if "--decide" in sys.argv:
        rest = sys.argv[sys.argv.index("--decide") + 1:]
        if len(rest) < 2:
            print('Usage: --decide "<url>" <status> [note...]', file=sys.stderr)
            return
        url, status, note = rest[0], rest[1], " ".join(rest[2:])
        decisions = load_decisions()
        prev = decisions.get(url, {})
        decisions[url] = {"company": prev.get("company", ""), "title": prev.get("title", ""),
                          "decision": status, "note": note, "date": today}
        save_decisions(decisions)
        print(f"Recorded: {status} — {url}", file=sys.stderr)
        return
    only, new_only, include_weak, hide_decided = None, False, False, False
    for arg in sys.argv[1:]:
        if arg == "--new-only":
            new_only = True
        elif arg == "--include-weak":
            include_weak = True
        elif arg == "--hide-decided":
            hide_decided = True
        elif arg.startswith("--only="):
            only = arg.split("=", 1)[1]
    seen = load_seen()
-    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    decisions = load_decisions()
    all_results, errors, stats = [], [], []
    run_start = time.perf_counter()
@@ -1181,7 +1231,8 @@ def main():
    REPORTS_DIR.mkdir(parents=True, exist_ok=True)
    report_path = REPORTS_DIR / f"{today}.md"
    write_report(report_path, all_results, errors, new_only, include_weak,
-                 stats=stats, total_secs=total_secs)
+                 stats=stats, total_secs=total_secs,
                 decisions=decisions, hide_decided=hide_decided)
    n_new = sum(1 for r in all_results if r["is_new"])
    print(f"\nReport written: {report_path}", file=sys.stderr)
@@ -0,0 +1,128 @@
 {
  "https://job-boards.greenhouse.io/anthropic/jobs/5204086008": {
    "company": "Anthropic",
    "title": "Solutions Architect, Applied AI",
    "decision": "shortlist",
    "note": "Top fit: applied-AI SA in Zurich, on-thesis (platform/SA, not model-building). Top comp. Candidate for tailored package.",
    "date": "2026-06-01"
  },
  "https://www.google.com/about/careers/applications/jobs/results/116958340671513286-staff-forward-deployed-engineer-genai-google-cloud?location=Switzerland": {
    "company": "Google",
    "title": "Staff Forward Deployed Engineer, GenAI, Google Cloud",
    "decision": "paused",
    "note": "Overlaps the PAUSED 'Google FDE GenAI (Zurich)' session (GenAI evidence gap; was redirecting to data-eng/MLOps). Reassess via the Cloud/FDE platform angle rather than GenAI. Reuse existing FDE drafts.",
    "date": "2026-06-01"
  },
  "https://careers.cisco.com/global/en/job/2012401/Senior-Software-Engineer-Agent-Datapath-eBPF-Isovalent": {
    "company": "Cisco",
    "title": "Senior SW Engineer, Agent & Datapath (eBPF) - Isovalent",
    "decision": "shortlist",
    "note": "Cilium/eBPF cloud-native platform eng, CH-based. Strong on-thesis infra role.",
    "date": "2026-06-01"
  },
  "https://jobs.lever.co/quantco-/a7c4f142-b1ad-449c-a18e-bdbda41a3f86": {
    "company": "QuantCo",
    "title": "Cloud Engineer",
    "decision": "shortlist",
    "note": "k8s/cloud at high-comp boutique, Zurich hub. On platform thesis.",
    "date": "2026-06-01"
  },
  "https://jobs.lever.co/quantco-/67757fe8-3582-4bdd-aa7d-98fa490bde84": {
    "company": "QuantCo",
    "title": "AI Engineer",
    "decision": "maybe",
    "note": "High score but more ML-leaning than his platform thesis; prefer the Cloud Engineer role. Revisit only if framed as AI-platform/infra.",
    "date": "2026-06-01"
  },
  "https://nvidia.wd5.myworkdayjobs.com/job/UK-Remote/Senior-Solutions-Architect--HPC-and-AI_JR2007074": {
    "company": "NVIDIA",
    "title": "Senior Solutions Architect, HPC and AI",
    "decision": "shortlist",
    "note": "SA track (preferred over SWE at NVIDIA per profile). Remote UK/DE.",
    "date": "2026-06-01"
  },
  "https://jobs.ashbyhq.com/kraken.com/5d344194-bbf0-48ce-bafd-f27c51b185b8": {
    "company": "Kraken",
    "title": "Solutions Architect - Payward Services",
    "decision": "shortlist",
    "note": "Architect + crypto + trading; remote incl. CH. Kraken has a prior package (AI Infrastructure session, 84.5/100) to draw on. Crypto background differentiating.",
    "date": "2026-06-01"
  },
  "https://jobs.ashbyhq.com/kraken.com/4aacc5a1-a848-4b68-90b8-958b6e4a2d0b": {
    "company": "Kraken",
    "title": "AI Agents Solutions Architect - HR",
    "decision": "shortlist",
    "note": "Agentic + Python + crypto SA. HR-domain variant (also Finance/Compliance variants exist).",
    "date": "2026-06-01"
  },
  "https://bitcoin-suisse.onlyfy.jobs/job/fkt5dq31d58r2hdoa1pnyp6gykgpuu9": {
    "company": "Bitcoin Suisse",
    "title": "Solution Architect Trading",
    "decision": "shortlist",
    "note": "Crypto-native, architect, trading; Zug. Crypto background differentiating.",
    "date": "2026-06-01"
  },
  "https://job.bkw.com/offene-stellen/solution-architect-energiehandel-alle/99b88701-e999-4ff3-9585-b19963ed8237": {
    "company": "BKW (Bern)",
    "title": "Solution Architect Energiehandel",
    "decision": "shortlist",
    "note": "Bern WLB tier (lower comp OK). Energy-trading architect, local. The flagged Energiehandel role.",
    "date": "2026-06-01"
  },
  "https://job.bkw.com/offene-stellen/senior-quant-risk-modeller-all/1bb5e5b5-35d8-410f-b3d1-c156e6f9114c": {
    "company": "BKW (Bern)",
    "title": "Senior Quant Risk Modeller",
    "decision": "shortlist",
    "note": "Bern WLB tier; trading/quant, local. Fits energy-trading interest.",
    "date": "2026-06-01"
  },
  "https://www.google.com/about/careers/applications/jobs/results/100214884188201670-senior-research-data-scientist-merchant-shopping-data-science?location=Switzerland": {
    "company": "Google",
    "title": "Senior Research Data Scientist, Merchant Shopping",
    "decision": "skip",
    "note": "Research / model-building, off-thesis (he targets data-eng/platform/infra, not DS research).",
    "date": "2026-06-01"
  },
  "https://job-boards.greenhouse.io/anthropic/jobs/5197714008": {
    "company": "Anthropic",
    "title": "Senior Security SW Engineer, Linux Kernel Security",
    "decision": "skip",
    "note": "Kernel C / systems-security, off his stack (Python/Java/data-platform).",
    "date": "2026-06-01"
  },
  "https://jobs.ashbyhq.com/kraken.com/e7480aaf-0a1b-4ed4-a047-37cc5b3b88a3": {
    "company": "Kraken",
    "title": "Account Executive, Trading-as-a-Service",
    "decision": "skip",
    "note": "Sales (AE), not engineering. Scored high on crypto/trading keywords only.",
    "date": "2026-06-01"
  },
  "https://jobs.ashbyhq.com/kraken.com/083afacb-edf7-4920-8428-e6c6fb7aecc9": {
    "company": "Kraken",
    "title": "Senior Sales Trader, EMEA",
    "decision": "skip",
    "note": "Sales/trading desk, not engineering.",
    "date": "2026-06-01"
  },
  "https://jobs.ashbyhq.com/kraken.com/ff2129b4-2fc9-4e8e-9c92-777279b377fe": {
    "company": "Kraken",
    "title": "Product Manager - Prop Trading - Breakout",
    "decision": "skip",
    "note": "PM, not engineering.",
    "date": "2026-06-01"
  },
  "https://jobs.ashbyhq.com/kraken.com/2bb8285f-d45a-441b-8628-983a101006ce": {
    "company": "Kraken",
    "title": "Staff Product Designer - Consumer",
    "decision": "skip",
    "note": "Design, not engineering.",
    "date": "2026-06-01"
  },
  "https://www.google.com/about/careers/applications/jobs/results/126509648931889862-senior-staff-software-engineer-automotive-ai?location=Switzerland": {
    "company": "Google",
    "title": "Senior Staff SW Engineer, Automotive AI",
    "decision": "skip",
    "note": "C++/automotive, weaker fit vs his Python/data-platform stack.",
    "date": "2026-06-01"
  }
 }