feat(job_scout): decision log + report annotations
Track per-job decisions across runs so we don't re-evaluate roles. - state/decisions.json (keyed by URL: company/title/decision/note/date), now git-tracked while seen_jobs.json stays local - --decide "<url>" <status> [note] records a decision; --hide-decided gives an undecided-only view; report tags each role inline with its decision - usage docstring updated - seed 18 decisions (9 shortlist, 7 skip, 1 paused, 1 maybe); flags Google Staff FDE GenAI as the paused prior session Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+3
-1
@@ -23,7 +23,9 @@ __pycache__/
|
|||||||
# job_scout runtime artifacts (keep scout.py + requirements.txt only)
|
# job_scout runtime artifacts (keep scout.py + requirements.txt only)
|
||||||
job_scout/.venv/
|
job_scout/.venv/
|
||||||
job_scout/reports/
|
job_scout/reports/
|
||||||
job_scout/state/
|
job_scout/state/*
|
||||||
|
# ...but track the decision log (job application history), not the churny seen-state
|
||||||
|
!job_scout/state/decisions.json
|
||||||
|
|
||||||
# One-off job-board data pulls (debug artifacts)
|
# One-off job-board data pulls (debug artifacts)
|
||||||
*_jd.json
|
*_jd.json
|
||||||
|
|||||||
+61
-10
@@ -10,9 +10,12 @@ Usage:
|
|||||||
py scout.py --only=nvidia # Pull a single company by id
|
py scout.py --only=nvidia # Pull a single company by id
|
||||||
py scout.py --new-only # Report only jobs not seen before
|
py scout.py --new-only # Report only jobs not seen before
|
||||||
py scout.py --include-weak # Include weak/noise bucket (default hidden)
|
py scout.py --include-weak # Include weak/noise bucket (default hidden)
|
||||||
|
py scout.py --hide-decided # Drop roles already in the decision log (undecided-only view)
|
||||||
|
py scout.py --decide "<url>" <status> [note...] # Record a decision and exit
|
||||||
|
# status is free-text: shortlist | skip | applied | paused | ...
|
||||||
|
|
||||||
State : state/seen_jobs.json
|
State : state/seen_jobs.json (job IDs seen) · state/decisions.json (per-URL decisions)
|
||||||
Output: reports/YYYY-MM-DD.md
|
Output: reports/YYYY-MM-DD.md (scan-stats table + scored roles, decisions tagged inline)
|
||||||
|
|
||||||
To add a company: append to COMPANIES with one of the existing adapter types. A few sites
|
To add a company: append to COMPANIES with one of the existing adapter types. A few sites
|
||||||
resist scraping even headless and stay in MANUAL_CHECK (surfaced as a report checklist).
|
resist scraping even headless and stay in MANUAL_CHECK (surfaced as a report checklist).
|
||||||
@@ -32,6 +35,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
ROOT = Path(__file__).parent
|
ROOT = Path(__file__).parent
|
||||||
STATE_FILE = ROOT / "state" / "seen_jobs.json"
|
STATE_FILE = ROOT / "state" / "seen_jobs.json"
|
||||||
|
DECISIONS_FILE = ROOT / "state" / "decisions.json"
|
||||||
REPORTS_DIR = ROOT / "reports"
|
REPORTS_DIR = ROOT / "reports"
|
||||||
USER_AGENT = "Mozilla/5.0 (compatible; job-scout/0.1)"
|
USER_AGENT = "Mozilla/5.0 (compatible; job-scout/0.1)"
|
||||||
|
|
||||||
@@ -979,6 +983,20 @@ def save_seen(seen):
|
|||||||
STATE_FILE.write_text(json.dumps(seen, indent=2, ensure_ascii=False), encoding="utf-8")
|
STATE_FILE.write_text(json.dumps(seen, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def load_decisions():
|
||||||
|
"""Decision log keyed by job URL: {url: {company, title, decision, note, date}}.
|
||||||
|
Decisions persist across runs so we don't re-evaluate roles we've already judged
|
||||||
|
(shortlist / skip / applied / paused / rejected — free-text, not enforced)."""
|
||||||
|
if DECISIONS_FILE.exists():
|
||||||
|
return json.loads(DECISIONS_FILE.read_text(encoding="utf-8"))
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def save_decisions(decisions):
|
||||||
|
DECISIONS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
DECISIONS_FILE.write_text(json.dumps(decisions, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
def _parse_posted(s):
|
def _parse_posted(s):
|
||||||
"""Best-effort parse of an adapter's `posted` field into a date, across the mix of
|
"""Best-effort parse of an adapter's `posted` field into a date, across the mix of
|
||||||
formats the boards use (ISO 8601 incl. trailing Z, YYYY-MM-DD, DD.MM.YYYY). Returns None
|
formats the boards use (ISO 8601 incl. trailing Z, YYYY-MM-DD, DD.MM.YYYY). Returns None
|
||||||
@@ -1026,7 +1044,9 @@ def write_stats_table(stats, total_secs):
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def write_report(path, results, errors, new_only, include_weak, stats=None, total_secs=0.0):
|
def write_report(path, results, errors, new_only, include_weak, stats=None, total_secs=0.0,
|
||||||
|
decisions=None, hide_decided=False):
|
||||||
|
decisions = decisions or {}
|
||||||
today = datetime.now().strftime("%Y-%m-%d")
|
today = datetime.now().strftime("%Y-%m-%d")
|
||||||
n_new = sum(1 for r in results if r["is_new"])
|
n_new = sum(1 for r in results if r["is_new"])
|
||||||
n_match = sum(1 for r in results if r["score"] >= 2)
|
n_match = sum(1 for r in results if r["score"] >= 2)
|
||||||
@@ -1050,6 +1070,11 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota
|
|||||||
|
|
||||||
if not include_weak and weak:
|
if not include_weak and weak:
|
||||||
lines.append(f"\n_Hiding {len(weak)} weak/noise roles (score < 2). Use --include-weak to show._")
|
lines.append(f"\n_Hiding {len(weak)} weak/noise roles (score < 2). Use --include-weak to show._")
|
||||||
|
n_decided = sum(1 for r in results if r["url"] in decisions)
|
||||||
|
if n_decided:
|
||||||
|
shown = "hidden" if hide_decided else "tagged inline"
|
||||||
|
lines.append(f"_{n_decided} role(s) already in the decision log ({shown}; "
|
||||||
|
f"see state/decisions.json)._")
|
||||||
|
|
||||||
buckets = [("Strong fit (score >= 6)", strong),
|
buckets = [("Strong fit (score >= 6)", strong),
|
||||||
("Medium fit (score 2-5)", medium)]
|
("Medium fit (score 2-5)", medium)]
|
||||||
@@ -1057,17 +1082,23 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota
|
|||||||
buckets.append(("Weak / noise (score < 2)", weak))
|
buckets.append(("Weak / noise (score < 2)", weak))
|
||||||
|
|
||||||
for bucket_name, bucket in buckets:
|
for bucket_name, bucket in buckets:
|
||||||
if not bucket:
|
shown = [r for r in bucket if not (hide_decided and r["url"] in decisions)]
|
||||||
|
if not shown:
|
||||||
continue
|
continue
|
||||||
lines.append(f"\n## {bucket_name} - {len(bucket)} role(s)\n")
|
lines.append(f"\n## {bucket_name} - {len(shown)} role(s)\n")
|
||||||
for r in bucket:
|
for r in shown:
|
||||||
|
d = decisions.get(r["url"])
|
||||||
new_tag = " [NEW]" if r["is_new"] else ""
|
new_tag = " [NEW]" if r["is_new"] else ""
|
||||||
|
decided_tag = f" — 🗂 {d['decision'].upper()}" if d else ""
|
||||||
loc_tag = "CH" if r["in_ch"] else ("Remote" if r["remote"] else "?")
|
loc_tag = "CH" if r["in_ch"] else ("Remote" if r["remote"] else "?")
|
||||||
lines.append(f"### [{r['score']}] {r['company']} - {r['title']}{new_tag}")
|
lines.append(f"### [{r['score']}] {r['company']} - {r['title']}{new_tag}{decided_tag}")
|
||||||
lines.append(f"- Location: {r['location']} *({loc_tag})*")
|
lines.append(f"- Location: {r['location']} *({loc_tag})*")
|
||||||
if r.get("posted"):
|
if r.get("posted"):
|
||||||
lines.append(f"- Posted: {r['posted']}")
|
lines.append(f"- Posted: {r['posted']}")
|
||||||
lines.append(f"- URL: {r['url']}")
|
lines.append(f"- URL: {r['url']}")
|
||||||
|
if d:
|
||||||
|
note = f" — {d['note']}" if d.get("note") else ""
|
||||||
|
lines.append(f"- 🗂 Decision: **{d['decision']}**{note} ({d.get('date','')})")
|
||||||
if r["pos"]:
|
if r["pos"]:
|
||||||
lines.append(f"- Positive: {', '.join(r['pos'])}")
|
lines.append(f"- Positive: {', '.join(r['pos'])}")
|
||||||
if r["neg"]:
|
if r["neg"]:
|
||||||
@@ -1086,17 +1117,36 @@ def write_report(path, results, errors, new_only, include_weak, stats=None, tota
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
only, new_only, include_weak = None, False, False
|
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
# Record a decision and exit: --decide "<url>" <status> [note words...]
|
||||||
|
if "--decide" in sys.argv:
|
||||||
|
rest = sys.argv[sys.argv.index("--decide") + 1:]
|
||||||
|
if len(rest) < 2:
|
||||||
|
print('Usage: --decide "<url>" <status> [note...]', file=sys.stderr)
|
||||||
|
return
|
||||||
|
url, status, note = rest[0], rest[1], " ".join(rest[2:])
|
||||||
|
decisions = load_decisions()
|
||||||
|
prev = decisions.get(url, {})
|
||||||
|
decisions[url] = {"company": prev.get("company", ""), "title": prev.get("title", ""),
|
||||||
|
"decision": status, "note": note, "date": today}
|
||||||
|
save_decisions(decisions)
|
||||||
|
print(f"Recorded: {status} — {url}", file=sys.stderr)
|
||||||
|
return
|
||||||
|
|
||||||
|
only, new_only, include_weak, hide_decided = None, False, False, False
|
||||||
for arg in sys.argv[1:]:
|
for arg in sys.argv[1:]:
|
||||||
if arg == "--new-only":
|
if arg == "--new-only":
|
||||||
new_only = True
|
new_only = True
|
||||||
elif arg == "--include-weak":
|
elif arg == "--include-weak":
|
||||||
include_weak = True
|
include_weak = True
|
||||||
|
elif arg == "--hide-decided":
|
||||||
|
hide_decided = True
|
||||||
elif arg.startswith("--only="):
|
elif arg.startswith("--only="):
|
||||||
only = arg.split("=", 1)[1]
|
only = arg.split("=", 1)[1]
|
||||||
|
|
||||||
seen = load_seen()
|
seen = load_seen()
|
||||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
decisions = load_decisions()
|
||||||
all_results, errors, stats = [], [], []
|
all_results, errors, stats = [], [], []
|
||||||
run_start = time.perf_counter()
|
run_start = time.perf_counter()
|
||||||
|
|
||||||
@@ -1181,7 +1231,8 @@ def main():
|
|||||||
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
report_path = REPORTS_DIR / f"{today}.md"
|
report_path = REPORTS_DIR / f"{today}.md"
|
||||||
write_report(report_path, all_results, errors, new_only, include_weak,
|
write_report(report_path, all_results, errors, new_only, include_weak,
|
||||||
stats=stats, total_secs=total_secs)
|
stats=stats, total_secs=total_secs,
|
||||||
|
decisions=decisions, hide_decided=hide_decided)
|
||||||
|
|
||||||
n_new = sum(1 for r in all_results if r["is_new"])
|
n_new = sum(1 for r in all_results if r["is_new"])
|
||||||
print(f"\nReport written: {report_path}", file=sys.stderr)
|
print(f"\nReport written: {report_path}", file=sys.stderr)
|
||||||
|
|||||||
@@ -0,0 +1,128 @@
|
|||||||
|
{
|
||||||
|
"https://job-boards.greenhouse.io/anthropic/jobs/5204086008": {
|
||||||
|
"company": "Anthropic",
|
||||||
|
"title": "Solutions Architect, Applied AI",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "Top fit: applied-AI SA in Zurich, on-thesis (platform/SA, not model-building). Top comp. Candidate for tailored package.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://www.google.com/about/careers/applications/jobs/results/116958340671513286-staff-forward-deployed-engineer-genai-google-cloud?location=Switzerland": {
|
||||||
|
"company": "Google",
|
||||||
|
"title": "Staff Forward Deployed Engineer, GenAI, Google Cloud",
|
||||||
|
"decision": "paused",
|
||||||
|
"note": "Overlaps the PAUSED 'Google FDE GenAI (Zurich)' session (GenAI evidence gap; was redirecting to data-eng/MLOps). Reassess via the Cloud/FDE platform angle rather than GenAI. Reuse existing FDE drafts.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://careers.cisco.com/global/en/job/2012401/Senior-Software-Engineer-Agent-Datapath-eBPF-Isovalent": {
|
||||||
|
"company": "Cisco",
|
||||||
|
"title": "Senior SW Engineer, Agent & Datapath (eBPF) - Isovalent",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "Cilium/eBPF cloud-native platform eng, CH-based. Strong on-thesis infra role.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://jobs.lever.co/quantco-/a7c4f142-b1ad-449c-a18e-bdbda41a3f86": {
|
||||||
|
"company": "QuantCo",
|
||||||
|
"title": "Cloud Engineer",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "k8s/cloud at high-comp boutique, Zurich hub. On platform thesis.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://jobs.lever.co/quantco-/67757fe8-3582-4bdd-aa7d-98fa490bde84": {
|
||||||
|
"company": "QuantCo",
|
||||||
|
"title": "AI Engineer",
|
||||||
|
"decision": "maybe",
|
||||||
|
"note": "High score but more ML-leaning than his platform thesis; prefer the Cloud Engineer role. Revisit only if framed as AI-platform/infra.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://nvidia.wd5.myworkdayjobs.com/job/UK-Remote/Senior-Solutions-Architect--HPC-and-AI_JR2007074": {
|
||||||
|
"company": "NVIDIA",
|
||||||
|
"title": "Senior Solutions Architect, HPC and AI",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "SA track (preferred over SWE at NVIDIA per profile). Remote UK/DE.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://jobs.ashbyhq.com/kraken.com/5d344194-bbf0-48ce-bafd-f27c51b185b8": {
|
||||||
|
"company": "Kraken",
|
||||||
|
"title": "Solutions Architect - Payward Services",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "Architect + crypto + trading; remote incl. CH. Kraken has a prior package (AI Infrastructure session, 84.5/100) to draw on. Crypto background differentiating.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://jobs.ashbyhq.com/kraken.com/4aacc5a1-a848-4b68-90b8-958b6e4a2d0b": {
|
||||||
|
"company": "Kraken",
|
||||||
|
"title": "AI Agents Solutions Architect - HR",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "Agentic + Python + crypto SA. HR-domain variant (also Finance/Compliance variants exist).",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://bitcoin-suisse.onlyfy.jobs/job/fkt5dq31d58r2hdoa1pnyp6gykgpuu9": {
|
||||||
|
"company": "Bitcoin Suisse",
|
||||||
|
"title": "Solution Architect Trading",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "Crypto-native, architect, trading; Zug. Crypto background differentiating.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://job.bkw.com/offene-stellen/solution-architect-energiehandel-alle/99b88701-e999-4ff3-9585-b19963ed8237": {
|
||||||
|
"company": "BKW (Bern)",
|
||||||
|
"title": "Solution Architect Energiehandel",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "Bern WLB tier (lower comp OK). Energy-trading architect, local. The flagged Energiehandel role.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://job.bkw.com/offene-stellen/senior-quant-risk-modeller-all/1bb5e5b5-35d8-410f-b3d1-c156e6f9114c": {
|
||||||
|
"company": "BKW (Bern)",
|
||||||
|
"title": "Senior Quant Risk Modeller",
|
||||||
|
"decision": "shortlist",
|
||||||
|
"note": "Bern WLB tier; trading/quant, local. Fits energy-trading interest.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://www.google.com/about/careers/applications/jobs/results/100214884188201670-senior-research-data-scientist-merchant-shopping-data-science?location=Switzerland": {
|
||||||
|
"company": "Google",
|
||||||
|
"title": "Senior Research Data Scientist, Merchant Shopping",
|
||||||
|
"decision": "skip",
|
||||||
|
"note": "Research / model-building, off-thesis (he targets data-eng/platform/infra, not DS research).",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://job-boards.greenhouse.io/anthropic/jobs/5197714008": {
|
||||||
|
"company": "Anthropic",
|
||||||
|
"title": "Senior Security SW Engineer, Linux Kernel Security",
|
||||||
|
"decision": "skip",
|
||||||
|
"note": "Kernel C / systems-security, off his stack (Python/Java/data-platform).",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://jobs.ashbyhq.com/kraken.com/e7480aaf-0a1b-4ed4-a047-37cc5b3b88a3": {
|
||||||
|
"company": "Kraken",
|
||||||
|
"title": "Account Executive, Trading-as-a-Service",
|
||||||
|
"decision": "skip",
|
||||||
|
"note": "Sales (AE), not engineering. Scored high on crypto/trading keywords only.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://jobs.ashbyhq.com/kraken.com/083afacb-edf7-4920-8428-e6c6fb7aecc9": {
|
||||||
|
"company": "Kraken",
|
||||||
|
"title": "Senior Sales Trader, EMEA",
|
||||||
|
"decision": "skip",
|
||||||
|
"note": "Sales/trading desk, not engineering.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://jobs.ashbyhq.com/kraken.com/ff2129b4-2fc9-4e8e-9c92-777279b377fe": {
|
||||||
|
"company": "Kraken",
|
||||||
|
"title": "Product Manager - Prop Trading - Breakout",
|
||||||
|
"decision": "skip",
|
||||||
|
"note": "PM, not engineering.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://jobs.ashbyhq.com/kraken.com/2bb8285f-d45a-441b-8628-983a101006ce": {
|
||||||
|
"company": "Kraken",
|
||||||
|
"title": "Staff Product Designer - Consumer",
|
||||||
|
"decision": "skip",
|
||||||
|
"note": "Design, not engineering.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
},
|
||||||
|
"https://www.google.com/about/careers/applications/jobs/results/126509648931889862-senior-staff-software-engineer-automotive-ai?location=Switzerland": {
|
||||||
|
"company": "Google",
|
||||||
|
"title": "Senior Staff SW Engineer, Automotive AI",
|
||||||
|
"decision": "skip",
|
||||||
|
"note": "C++/automotive, weaker fit vs his Python/data-platform stack.",
|
||||||
|
"date": "2026-06-01"
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user