From 5d5d962cd6af393e6abd50b6aff3d2f0a497d6b6 Mon Sep 17 00:00:00 2001
From: killertcell428 <killertcell428@gmail.com>
Date: Mon, 18 May 2026 23:55:21 +0900
Subject: [PATCH] auto-improvement: daily paper-review loop from
 Awesome-LLM4Cybersecurity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a half-automated loop that, every day at 00:15 UTC:

1. Fetches the upstream LITERATURES.md (639 entries today, growing).
2. Picks ~10 unseen newest entries and asks Claude Haiku 4.5 whether
   each one yields a concrete regex/substring-scoped Aigis detector.
3. Drafts a pending/ stub per relevant candidate (humans promote to
   real rule PRs; ROTATION-loop conventions preserved).
4. Files one summary issue + opens a bot PR with the new pending files
   and updated state.json.

No detector code is touched by the workflow itself — this is the
semi-automatic mode the user picked: candidates + draft patches in,
human review out.

Cost target: <$1/day (10 papers x Haiku ~500 out tokens).

Requires repo secret ANTHROPIC_API_KEY. Dry-run via workflow_dispatch
input dry_run=true exercises the parser without burning API credits.

Parser verified locally against the live source: 639 entries parsed,
newest-first ordering produces the expected top picks (SecLens 2026.04,
CyberThreat-Eval 2026.03, Minerva 2026.01).

Signed-off-by: killertcell428 <killertcell428@gmail.com>
---
 .github/workflows/paper-review.yml       |  91 +++++
 auto-improvement/README.md               |  22 +-
 auto-improvement/paper_review_state.json |   4 +
 auto-improvement/scripts/paper_review.py | 455 +++++++++++++++++++++++
 4 files changed, 571 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/paper-review.yml
 create mode 100644 auto-improvement/paper_review_state.json
 create mode 100644 auto-improvement/scripts/paper_review.py

diff --git a/.github/workflows/paper-review.yml b/.github/workflows/paper-review.yml
new file mode 100644
index 0000000..2103394
--- /dev/null
+++ b/.github/workflows/paper-review.yml
@@ -0,0 +1,91 @@
+name: Auto-improvement / Daily paper review
+
+# Reads ~10 unseen entries from tmylla/Awesome-LLM4Cybersecurity each day,
+# asks Claude Haiku whether they suggest a concrete Aigis hardening, and
+# files candidates as auto-improvement/pending/ stubs + one GitHub issue
+# summarising the batch. Code changes are NEVER made by this workflow —
+# humans promote pending entries into rule PRs.
+#
+# Cost guard: 10 papers × Haiku 4.5 (≈500 tokens out) ≈ a few cents/day.
+# State (paper_review_state.json) is committed back to master via a bot PR
+# so the next run advances; if the commit step fails the run still leaves
+# the issue + pending files visible.
+
+on:
+  schedule:
+    # 00:15 UTC daily — well clear of cflite/codeql peak times.
+    - cron: "15 0 * * *"
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Parse + pick only, no API calls or writes"
+        required: false
+        default: "false"
+        type: choice
+        options: ["false", "true"]
+      max_papers:
+        description: "How many unseen papers to review this run"
+        required: false
+        default: "10"
+
+permissions:
+  contents: write       # to commit state.json + pending stubs
+  issues: write         # to file the daily summary issue
+  pull-requests: write  # so the bot can open a PR with the new pending files
+
+concurrency:
+  group: paper-review
+  cancel-in-progress: false
+
+jobs:
+  review:
+    name: Review 10 papers and file pending candidates
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: "3.11"
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+
+      - name: Install anthropic SDK
+        run: uv pip install --system "anthropic>=0.40.0"
+
+      - name: Run paper review
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+        run: |
+          DRY="${{ inputs.dry_run || 'false' }}"
+          MAX="${{ inputs.max_papers || '10' }}"
+          ARGS="--max-papers $MAX"
+          if [ "$DRY" = "true" ]; then ARGS="$ARGS --dry-run"; fi
+          python auto-improvement/scripts/paper_review.py $ARGS
+
+      - name: Commit new pending + research + state on a bot branch
+        if: ${{ inputs.dry_run != 'true' }}
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          if git diff --quiet -- auto-improvement/; then
+            echo "No changes under auto-improvement/ — nothing to commit."
+            exit 0
+          fi
+          DATE="$(date -u +%Y-%m-%d)"
+          BRANCH="bot/paper-review/${DATE}"
+          git config user.name  "aigis-paper-review[bot]"
+          git config user.email "aigis-paper-review@users.noreply.github.com"
+          git checkout -b "$BRANCH"
+          git add auto-improvement/
+          git commit -m "auto-improvement: daily paper review ${DATE}"
+          git push -u origin "$BRANCH"
+          gh pr create \
+            --title "auto-improvement: daily paper review ${DATE}" \
+            --body "Bot PR with the daily batch of pending/ stubs and updated state. See the linked issue for the candidate list." \
+            --label "auto-improvement" || true
diff --git a/auto-improvement/README.md b/auto-improvement/README.md
index 0fb5daa..86e751d 100644
--- a/auto-improvement/README.md
+++ b/auto-improvement/README.md
@@ -9,9 +9,29 @@ aigis を 6 時間ごとに自動強化する保守ループの作業領域。
 |------|------|
 | `ROTATION.md` | 10 領域ローテ定義 + 現在のカウンタ。毎回 +1 (mod 10) される |
 | `INDEX.md` | 全実行回の時系列インデックス（1 行サマリ） |
-| `research/` | 各回のリサーチレポート (UTC 名: `YYYY-MM-DDTHH-MM_NN-<domain>.md`) |
+| `research/` | 各回のリサーチレポート (UTC 名: `YYYY-MM-DDTHH-MM_NN-<domain>.md` または `..._paper-batch.md`) |
 | `changes/` | 各回の改修記録（追加機能・テスト結果・対応リサーチへのリンク） |
 | `pending/` | 大幅方向転換の提案。実装は保留。人間が後で採否を判断 |
+| `paper_review_state.json` | 後述「論文レビューループ」で読み終えた URL/タイトルの台帳 |
+| `scripts/paper_review.py` | 論文レビューループ本体（毎日 GH Actions から起動） |
+
+## 論文レビューループ（2026-05 追加）
+
+[Awesome-LLM4Cybersecurity](https://github.com/tmylla/Awesome-LLM4Cybersecurity) を毎日 10 件ずつ読み進める半自動ループ。`.github/workflows/paper-review.yml` が 00:15 UTC に走り、`scripts/paper_review.py` が以下を行う：
+
+1. 上流 `LITERATURES.md` を fetch
+2. `paper_review_state.json` の既読 URL/タイトルを除外し、未読の新しい順から 10 件ピック
+3. 各論文を Claude Haiku 4.5 に渡し、「Aigis に regex/部分一致で落とせる検出器候補があるか」を JSON で判定
+4. relevant=true のものを `pending/YYYY-MM-DD_paper_<slug>.md` として draft 化
+5. バッチ全体のサマリを `research/YYYY-MM-DDTHH-MM_paper-batch.md` に書き出し
+6. `gh issue create` でレビュー依頼 Issue を 1 本オープン
+7. 変更を bot ブランチで PR 化（人間がレビュー → master へマージ）
+
+実装は一切しない。pending/ に積まれた候補は、既存のルール（[ROTATION.md](ROTATION.md)）と同じく、人間が個別 PR で `aigis/` 配下に昇格させる。
+
+**必要な secrets:** `ANTHROPIC_API_KEY`（Anthropic console から発行、Settings → Secrets → Actions に登録）。未設定なら workflow は失敗するが、`workflow_dispatch` から `dry_run=true` でドライ実行は可能。
+
+**コスト目安:** 10 件 × Haiku 4.5（≈500 出力トークン）≈ 数¢/日。月 $1 弱を想定。
 
 ## 運用ルール（保守エージェントが守る）
 
diff --git a/auto-improvement/paper_review_state.json b/auto-improvement/paper_review_state.json
new file mode 100644
index 0000000..23d93f7
--- /dev/null
+++ b/auto-improvement/paper_review_state.json
@@ -0,0 +1,4 @@
+{
+  "seen": {},
+  "runs": []
+}
diff --git a/auto-improvement/scripts/paper_review.py b/auto-improvement/scripts/paper_review.py
new file mode 100644
index 0000000..09f4be6
--- /dev/null
+++ b/auto-improvement/scripts/paper_review.py
@@ -0,0 +1,455 @@
+#!/usr/bin/env python3
+"""Daily paper-review loop for the Awesome-LLM4Cybersecurity list.
+
+Reads N (default 10) unseen entries from the upstream LITERATURES.md, asks
+Claude Haiku whether each one suggests a concrete Aigis hardening, and:
+
+  - writes a pending/ stub for each adopted candidate (human reviews & merges),
+  - writes a research/ roll-up for the batch (always, even with 0 candidates),
+  - opens one GitHub issue summarising the batch (one click per candidate to
+    promote into a real PR),
+  - records every seen URL/title into paper_review_state.json so the next run
+    moves forward.
+
+The script is intentionally additive: it only writes drafts. No detector code
+is touched. Human reviewers promote pending/ entries via a follow-up PR.
+
+Usage:
+    python auto-improvement/scripts/paper_review.py             # full run
+    python auto-improvement/scripts/paper_review.py --dry-run   # parse only
+    python auto-improvement/scripts/paper_review.py --no-issue  # skip gh
+
+Required env (non-dry-run): ANTHROPIC_API_KEY.
+Optional env: GITHUB_REPOSITORY (for issue URL composition under Actions).
+"""
+
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import hashlib
+import json
+import re
+import subprocess
+import sys
+import urllib.request
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+AI_DIR = REPO_ROOT / "auto-improvement"
+PENDING_DIR = AI_DIR / "pending"
+RESEARCH_DIR = AI_DIR / "research"
+STATE_PATH = AI_DIR / "paper_review_state.json"
+
+DEFAULT_SOURCE = (
+    "https://raw.githubusercontent.com/tmylla/Awesome-LLM4Cybersecurity/main/LITERATURES.md"
+)
+
+# Numbered list line: "12. Title | *Venue* | 2026.01.31 | [Paper Link](url)"
+# URL portion is optional; title may contain colons / pipes inside backticks.
+ENTRY_RE = re.compile(
+    r"^\s*\d+\.\s+(?P<title>.+?)\s*\|\s*\*(?P<venue>.+?)\*\s*\|\s*(?P<date>[\d.]+)"
+    r"(?:\s*\|\s*\[[^\]]*\]\((?P<url>[^)\s]+)\))?\s*$"
+)
+SECTION_RE = re.compile(r"^#{1,4}\s+(.+?)\s*$")
+
+# Aigis context used to bias the LLM toward concrete pattern proposals.
+AIGIS_CONTEXT = """\
+Aigis is a zero-dependency Python firewall for LLM agents.
+
+Detection model:
+- Each rule is a DetectionPattern (regex + score + input/output filter scope).
+- Rules live in modules like aigis/policies/, aigis/patterns.py, aigis/safety/,
+  aigis/supply_chain/, etc. Each rule has a stable rule_id (e.g. `sc_langflow_build_exec`,
+  `afe_python_mro_escape`, `pi_unicode_tag_block`).
+- The loop has a hard budget: one paper translates to at most ~10 LOC of pattern
+  code + a few regex tests. Proposals that need new dependencies, ML models,
+  trained classifiers, or rewriting >100 LOC are NOT relevant.
+
+What IS relevant:
+- A new attack technique with a string signature catchable by regex / substring /
+  small AST inspection (e.g. a specific API path, a specific f-string template,
+  a specific Unicode block, a specific obfuscation prefix).
+- A measurable improvement to an existing rule (e.g. extending a regex to catch
+  a documented bypass).
+- A new compliance template (NIST / EU AI Act / ISO) that maps cleanly onto an
+  existing policy_templates/ file.
+- A hardening guide (docs-only) that codifies operational guidance from the paper.
+
+What is NOT relevant:
+- ML-based detection, embeddings, training pipelines.
+- Pure benchmarks/datasets with no extractable detection rule.
+- Surveys that summarise prior work without introducing new attack signatures.
+- Agentic frameworks for offensive use that don't translate into a defensive signature.
+"""
+
+JUDGE_PROMPT = """\
+You are triaging one paper for the Aigis auto-improvement loop.
+
+{aigis_context}
+
+Paper:
+- Title: {title}
+- Venue: {venue}
+- Date: {date}
+- URL: {url}
+- List section: {section}
+
+Decide whether this paper plausibly yields a concrete, regex/substring-scoped
+detection rule or a small docs/template addition for Aigis. Be strict: when in
+doubt, say no. Most papers will be irrelevant; that is fine.
+
+Reply with one JSON object, no prose, no code fences:
+{{
+  "relevant": true | false,
+  "rule_id": "<short snake_case id if relevant, else null>",
+  "rule_category": "input | output | supply_chain | mcp | memory | compliance | docs | null",
+  "one_line": "<= 25 words explaining what the rule catches, or why irrelevant>",
+  "blocked_example": "<a literal example string the rule would flag, or null>",
+  "source_evidence": "<one sentence quoting or paraphrasing the paper's key finding>"
+}}
+"""
+
+
+@dataclass
+class Entry:
+    section: str
+    title: str
+    venue: str
+    date: str
+    url: str | None
+
+    @property
+    def key(self) -> str:
+        """Stable dedup key: URL when present, else title hash."""
+        if self.url:
+            return f"url:{self.url}"
+        return "title:" + hashlib.sha1(self.title.encode("utf-8")).hexdigest()[:16]
+
+    @property
+    def slug(self) -> str:
+        base = re.sub(r"[^a-z0-9]+", "-", self.title.lower()).strip("-")
+        return base[:60] or "paper"
+
+
+@dataclass
+class Verdict:
+    relevant: bool
+    rule_id: str | None
+    rule_category: str | None
+    one_line: str
+    blocked_example: str | None
+    source_evidence: str
+
+
+def fetch_source(url: str) -> str:
+    req = urllib.request.Request(url, headers={"User-Agent": "aigis-paper-review/1.0"})
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return resp.read().decode("utf-8")
+
+
+def parse_entries(text: str) -> list[Entry]:
+    entries: list[Entry] = []
+    section = "(unsectioned)"
+    for line in text.splitlines():
+        m = SECTION_RE.match(line)
+        if m:
+            section = m.group(1).strip()
+            continue
+        m = ENTRY_RE.match(line)
+        if not m:
+            continue
+        entries.append(
+            Entry(
+                section=section,
+                title=m.group("title").strip().strip("`"),
+                venue=m.group("venue").strip(),
+                date=m.group("date").strip(),
+                url=(m.group("url") or "").strip() or None,
+            )
+        )
+    return entries
+
+
+def load_state() -> dict:
+    if not STATE_PATH.exists():
+        return {"seen": {}, "runs": []}
+    data = json.loads(STATE_PATH.read_text(encoding="utf-8"))
+    data.setdefault("seen", {})
+    data.setdefault("runs", [])
+    return data
+
+
+def save_state(state: dict) -> None:
+    STATE_PATH.write_text(json.dumps(state, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+
+
+def pick_unseen(entries: list[Entry], seen: dict, limit: int) -> list[Entry]:
+    fresh = [e for e in entries if e.key not in seen]
+    # Prefer newest first (date string sorts because format is YYYY.MM.DD).
+    fresh.sort(key=lambda e: (e.date or ""), reverse=True)
+    return fresh[:limit]
+
+
+def judge_with_anthropic(entry: Entry) -> Verdict:
+    import anthropic  # noqa: PLC0415 — optional dep, only needed in non-dry-run
+
+    client = anthropic.Anthropic()
+    prompt = JUDGE_PROMPT.format(
+        aigis_context=AIGIS_CONTEXT,
+        title=entry.title,
+        venue=entry.venue,
+        date=entry.date,
+        url=entry.url or "(no URL)",
+        section=entry.section,
+    )
+    msg = client.messages.create(
+        model="claude-haiku-4-5-20251001",
+        max_tokens=500,
+        messages=[{"role": "user", "content": prompt}],
+    )
+    raw = "".join(block.text for block in msg.content if getattr(block, "text", None))
+    return parse_verdict(raw)
+
+
+def parse_verdict(raw: str) -> Verdict:
+    # Defensive parse: strip code fences if the model added any.
+    text = raw.strip()
+    if text.startswith("```"):
+        text = re.sub(r"^```[a-zA-Z]*\n?|```$", "", text).strip()
+    obj = json.loads(text)
+    return Verdict(
+        relevant=bool(obj.get("relevant")),
+        rule_id=obj.get("rule_id") or None,
+        rule_category=obj.get("rule_category") or None,
+        one_line=str(obj.get("one_line", ""))[:400],
+        blocked_example=obj.get("blocked_example") or None,
+        source_evidence=str(obj.get("source_evidence", ""))[:400],
+    )
+
+
+def write_pending(entry: Entry, verdict: Verdict, today: str) -> Path:
+    fname = f"{today}_paper_{entry.slug}.md"
+    path = PENDING_DIR / fname
+    body = f"""# Pending: {verdict.rule_id or entry.slug}
+
+## Title
+
+{verdict.one_line}
+
+## Source paper
+
+- **{entry.title}**
+- Venue: {entry.venue} ({entry.date})
+- URL: {entry.url or "(no URL provided in source list)"}
+- Discovered via: Awesome-LLM4Cybersecurity / `{entry.section}`
+
+## Why it might matter for Aigis
+
+{verdict.source_evidence}
+
+## Proposed rule (draft)
+
+- **rule_id (proposed):** `{verdict.rule_id or "TBD"}`
+- **category:** {verdict.rule_category or "TBD"}
+- **what it catches:** {verdict.one_line}
+
+### Example the rule should flag
+
+```
+{verdict.blocked_example or "(LLM did not surface a concrete example — verify against the paper before implementing.)"}
+```
+
+## Why this is in pending/ not implemented
+
+This entry was drafted by `auto-improvement/scripts/paper_review.py` from the
+Awesome-LLM4Cybersecurity reading list. A human reviewer must:
+
+1. Read the actual paper to confirm the technique is real and current.
+2. Decide if the regex/string signature above survives realistic adversarial
+   variations (or if a stricter pattern is needed).
+3. Promote the rule into the appropriate `aigis/` module via a normal PR, with
+   tests covering both the example above and at least one near-miss benign case.
+4. Or close this file with a note on why it was rejected.
+"""
+    path.write_text(body, encoding="utf-8")
+    return path
+
+
+def write_research(
+    today_path: str, batch: list[tuple[Entry, Verdict | None]], source_url: str
+) -> Path:
+    path = RESEARCH_DIR / f"{today_path}_paper-batch.md"
+    lines: list[str] = [
+        f"# Research: paper-batch — {today_path}",
+        "",
+        f"Source: {source_url}",
+        f"Papers reviewed this run: {len(batch)}",
+        f"Candidates surfaced (relevant=true): "
+        + str(sum(1 for _, v in batch if v and v.relevant)),
+        "",
+        "---",
+        "",
+    ]
+    for entry, verdict in batch:
+        lines.append(f"## {entry.title}")
+        lines.append("")
+        lines.append(f"- Venue: {entry.venue} ({entry.date})")
+        lines.append(f"- URL: {entry.url or '(none)'}")
+        lines.append(f"- Section: {entry.section}")
+        if verdict is None:
+            lines.append("- Verdict: SKIPPED (no API call this run)")
+        else:
+            lines.append(
+                f"- Verdict: {'RELEVANT' if verdict.relevant else 'not relevant'} "
+                f"— {verdict.one_line}"
+            )
+            if verdict.relevant:
+                lines.append(f"- Proposed rule_id: `{verdict.rule_id}` ({verdict.rule_category})")
+        lines.append("")
+    path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+    return path
+
+
+def open_issue(
+    today: str,
+    batch: list[tuple[Entry, Verdict]],
+    research_path: Path,
+    pending_paths: list[Path],
+) -> None:
+    relevant = [(e, v) for e, v in batch if v.relevant]
+    title = (
+        f"Paper review {today}: {len(batch)} papers reviewed, "
+        f"{len(relevant)} candidate rule(s)"
+    )
+    body_lines = [
+        f"Automated batch from `auto-improvement/scripts/paper_review.py`.",
+        "",
+        f"- Research roll-up: `{research_path.relative_to(REPO_ROOT).as_posix()}`",
+        f"- Candidates drafted under `auto-improvement/pending/`: {len(pending_paths)}",
+        "",
+        "## Candidates",
+        "",
+    ]
+    if not relevant:
+        body_lines.append("_No candidate surfaced relevant detector ideas this batch._")
+    for entry, verdict in relevant:
+        body_lines.extend(
+            [
+                f"### `{verdict.rule_id}` — {entry.title}",
+                "",
+                f"- Category: {verdict.rule_category}",
+                f"- Source: {entry.venue} ({entry.date}) — {entry.url or '(no URL)'}",
+                f"- Catches: {verdict.one_line}",
+                f"- Pending file: `auto-improvement/pending/{today}_paper_{entry.slug}.md`",
+                "",
+            ]
+        )
+    body_lines.extend(
+        [
+            "## All papers reviewed",
+            "",
+            *[
+                f"- {('✅' if v.relevant else '⏭️')} {e.title} ({e.venue}, {e.date})"
+                for e, v in batch
+            ],
+            "",
+            "---",
+            "",
+            "Reviewer checklist:",
+            "- [ ] Confirm each ✅ entry against the actual paper",
+            "- [ ] Promote accepted ones into the appropriate `aigis/` module via PR",
+            "- [ ] Close + delete pending files for rejected proposals",
+        ]
+    )
+    subprocess.run(
+        ["gh", "issue", "create", "--title", title, "--body", "\n".join(body_lines)],
+        check=True,
+        cwd=REPO_ROOT,
+    )
+
+
+def run(argv: Iterable[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--max-papers", type=int, default=10)
+    parser.add_argument("--source-url", default=DEFAULT_SOURCE)
+    parser.add_argument("--dry-run", action="store_true", help="Parse + pick only, no API/IO writes")
+    parser.add_argument("--no-issue", action="store_true", help="Skip the gh issue step")
+    args = parser.parse_args(list(argv) if argv is not None else None)
+
+    PENDING_DIR.mkdir(parents=True, exist_ok=True)
+    RESEARCH_DIR.mkdir(parents=True, exist_ok=True)
+
+    text = fetch_source(args.source_url)
+    entries = parse_entries(text)
+    if not entries:
+        print("ERROR: parsed 0 entries from source. Format may have changed.", file=sys.stderr)
+        return 2
+
+    state = load_state()
+    picked = pick_unseen(entries, state["seen"], args.max_papers)
+    print(f"Source entries: {len(entries)} | already seen: {len(state['seen'])} | picked: {len(picked)}")
+    for e in picked:
+        print(f"  - {e.date} | {e.title[:80]}")
+
+    if args.dry_run:
+        return 0
+
+    if not picked:
+        print("Nothing new to review.")
+        return 0
+
+    now = dt.datetime.utcnow()
+    today = now.strftime("%Y-%m-%d")
+    today_path = now.strftime("%Y-%m-%dT%H-%M")
+
+    batch: list[tuple[Entry, Verdict]] = []
+    pending_paths: list[Path] = []
+    for entry in picked:
+        try:
+            verdict = judge_with_anthropic(entry)
+        except Exception as exc:  # pragma: no cover — surfaced in the issue body
+            print(f"WARN: judge failed for {entry.title!r}: {exc}", file=sys.stderr)
+            verdict = Verdict(False, None, None, f"judge error: {exc}", None, "")
+        batch.append((entry, verdict))
+        if verdict.relevant:
+            pending_paths.append(write_pending(entry, verdict, today))
+        # Mark seen regardless of verdict so we don't re-judge it tomorrow.
+        state["seen"][entry.key] = {
+            "title": entry.title,
+            "date": entry.date,
+            "venue": entry.venue,
+            "url": entry.url,
+            "first_seen_utc": now.isoformat(timespec="seconds") + "Z",
+            "relevant": verdict.relevant,
+        }
+
+    research_path = write_research(today_path, [(e, v) for e, v in batch], args.source_url)
+
+    state["runs"].append(
+        {
+            "run_utc": now.isoformat(timespec="seconds") + "Z",
+            "picked": len(picked),
+            "relevant": sum(1 for _, v in batch if v.relevant),
+            "research": research_path.relative_to(REPO_ROOT).as_posix(),
+        }
+    )
+    save_state(state)
+
+    if not args.no_issue:
+        try:
+            open_issue(today, batch, research_path, pending_paths)
+        except subprocess.CalledProcessError as exc:
+            print(f"WARN: gh issue create failed: {exc}", file=sys.stderr)
+
+    print(
+        f"Done. Reviewed {len(batch)} | relevant {len(pending_paths)} | "
+        f"research: {research_path.relative_to(REPO_ROOT)}"
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(run())