From 5d5d962cd6af393e6abd50b6aff3d2f0a497d6b6 Mon Sep 17 00:00:00 2001 From: killertcell428 Date: Mon, 18 May 2026 23:55:21 +0900 Subject: [PATCH] auto-improvement: daily paper-review loop from Awesome-LLM4Cybersecurity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a half-automated loop that, every day at 00:15 UTC: 1. Fetches the upstream LITERATURES.md (639 entries today, growing). 2. Picks ~10 unseen newest entries and asks Claude Haiku 4.5 whether each one yields a concrete regex/substring-scoped Aigis detector. 3. Drafts a pending/ stub per relevant candidate (humans promote to real rule PRs; ROTATION-loop conventions preserved). 4. Files one summary issue + opens a bot PR with the new pending files and updated state.json. No detector code is touched by the workflow itself — this is the semi-automatic mode the user picked: candidates + draft patches in, human review out. Cost target: <$1/day (10 papers x Haiku ~500 out tokens). Requires repo secret ANTHROPIC_API_KEY. Dry-run via workflow_dispatch input dry_run=true exercises the parser without burning API credits. Parser verified locally against the live source: 639 entries parsed, newest-first ordering produces the expected top picks (SecLens 2026.04, CyberThreat-Eval 2026.03, Minerva 2026.01). Signed-off-by: killertcell428 --- .github/workflows/paper-review.yml | 91 +++++ auto-improvement/README.md | 22 +- auto-improvement/paper_review_state.json | 4 + auto-improvement/scripts/paper_review.py | 455 +++++++++++++++++++++++ 4 files changed, 571 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/paper-review.yml create mode 100644 auto-improvement/paper_review_state.json create mode 100644 auto-improvement/scripts/paper_review.py diff --git a/.github/workflows/paper-review.yml b/.github/workflows/paper-review.yml new file mode 100644 index 0000000..2103394 --- /dev/null +++ b/.github/workflows/paper-review.yml @@ -0,0 +1,91 @@ +name: Auto-improvement / Daily paper review + +# Reads ~10 unseen entries from tmylla/Awesome-LLM4Cybersecurity each day, +# asks Claude Haiku whether they suggest a concrete Aigis hardening, and +# files candidates as auto-improvement/pending/ stubs + one GitHub issue +# summarising the batch. Code changes are NEVER made by this workflow — +# humans promote pending entries into rule PRs. +# +# Cost guard: 10 papers × Haiku 4.5 (≈500 tokens out) ≈ a few cents/day. +# State (paper_review_state.json) is committed back to master via a bot PR +# so the next run advances; if the commit step fails the run still leaves +# the issue + pending files visible. + +on: + schedule: + # 00:15 UTC daily — well clear of cflite/codeql peak times. + - cron: "15 0 * * *" + workflow_dispatch: + inputs: + dry_run: + description: "Parse + pick only, no API calls or writes" + required: false + default: "false" + type: choice + options: ["false", "true"] + max_papers: + description: "How many unseen papers to review this run" + required: false + default: "10" + +permissions: + contents: write # to commit state.json + pending stubs + issues: write # to file the daily summary issue + pull-requests: write # so the bot can open a PR with the new pending files + +concurrency: + group: paper-review + cancel-in-progress: false + +jobs: + review: + name: Review 10 papers and file pending candidates + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.11" + + - name: Set up uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + + - name: Install anthropic SDK + run: uv pip install --system "anthropic>=0.40.0" + + - name: Run paper review + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: | + DRY="${{ inputs.dry_run || 'false' }}" + MAX="${{ inputs.max_papers || '10' }}" + ARGS="--max-papers $MAX" + if [ "$DRY" = "true" ]; then ARGS="$ARGS --dry-run"; fi + python auto-improvement/scripts/paper_review.py $ARGS + + - name: Commit new pending + research + state on a bot branch + if: ${{ inputs.dry_run != 'true' }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + if git diff --quiet -- auto-improvement/; then + echo "No changes under auto-improvement/ — nothing to commit." + exit 0 + fi + DATE="$(date -u +%Y-%m-%d)" + BRANCH="bot/paper-review/${DATE}" + git config user.name "aigis-paper-review[bot]" + git config user.email "aigis-paper-review@users.noreply.github.com" + git checkout -b "$BRANCH" + git add auto-improvement/ + git commit -m "auto-improvement: daily paper review ${DATE}" + git push -u origin "$BRANCH" + gh pr create \ + --title "auto-improvement: daily paper review ${DATE}" \ + --body "Bot PR with the daily batch of pending/ stubs and updated state. See the linked issue for the candidate list." \ + --label "auto-improvement" || true diff --git a/auto-improvement/README.md b/auto-improvement/README.md index 0fb5daa..86e751d 100644 --- a/auto-improvement/README.md +++ b/auto-improvement/README.md @@ -9,9 +9,29 @@ aigis を 6 時間ごとに自動強化する保守ループの作業領域。 |------|------| | `ROTATION.md` | 10 領域ローテ定義 + 現在のカウンタ。毎回 +1 (mod 10) される | | `INDEX.md` | 全実行回の時系列インデックス(1 行サマリ) | -| `research/` | 各回のリサーチレポート (UTC 名: `YYYY-MM-DDTHH-MM_NN-.md`) | +| `research/` | 各回のリサーチレポート (UTC 名: `YYYY-MM-DDTHH-MM_NN-.md` または `..._paper-batch.md`) | | `changes/` | 各回の改修記録(追加機能・テスト結果・対応リサーチへのリンク) | | `pending/` | 大幅方向転換の提案。実装は保留。人間が後で採否を判断 | +| `paper_review_state.json` | 後述「論文レビューループ」で読み終えた URL/タイトルの台帳 | +| `scripts/paper_review.py` | 論文レビューループ本体(毎日 GH Actions から起動) | + +## 論文レビューループ(2026-05 追加) + +[Awesome-LLM4Cybersecurity](https://github.com/tmylla/Awesome-LLM4Cybersecurity) を毎日 10 件ずつ読み進める半自動ループ。`.github/workflows/paper-review.yml` が 00:15 UTC に走り、`scripts/paper_review.py` が以下を行う: + +1. 上流 `LITERATURES.md` を fetch +2. `paper_review_state.json` の既読 URL/タイトルを除外し、未読の新しい順から 10 件ピック +3. 各論文を Claude Haiku 4.5 に渡し、「Aigis に regex/部分一致で落とせる検出器候補があるか」を JSON で判定 +4. relevant=true のものを `pending/YYYY-MM-DD_paper_.md` として draft 化 +5. バッチ全体のサマリを `research/YYYY-MM-DDTHH-MM_paper-batch.md` に書き出し +6. `gh issue create` でレビュー依頼 Issue を 1 本オープン +7. 変更を bot ブランチで PR 化(人間がレビュー → master へマージ) + +実装は一切しない。pending/ に積まれた候補は、既存のルール([ROTATION.md](ROTATION.md))と同じく、人間が個別 PR で `aigis/` 配下に昇格させる。 + +**必要な secrets:** `ANTHROPIC_API_KEY`(Anthropic console から発行、Settings → Secrets → Actions に登録)。未設定なら workflow は失敗するが、`workflow_dispatch` から `dry_run=true` でドライ実行は可能。 + +**コスト目安:** 10 件 × Haiku 4.5(≈500 出力トークン)≈ 数¢/日。月 $1 弱を想定。 ## 運用ルール(保守エージェントが守る) diff --git a/auto-improvement/paper_review_state.json b/auto-improvement/paper_review_state.json new file mode 100644 index 0000000..23d93f7 --- /dev/null +++ b/auto-improvement/paper_review_state.json @@ -0,0 +1,4 @@ +{ + "seen": {}, + "runs": [] +} diff --git a/auto-improvement/scripts/paper_review.py b/auto-improvement/scripts/paper_review.py new file mode 100644 index 0000000..09f4be6 --- /dev/null +++ b/auto-improvement/scripts/paper_review.py @@ -0,0 +1,455 @@ +#!/usr/bin/env python3 +"""Daily paper-review loop for the Awesome-LLM4Cybersecurity list. + +Reads N (default 10) unseen entries from the upstream LITERATURES.md, asks +Claude Haiku whether each one suggests a concrete Aigis hardening, and: + + - writes a pending/ stub for each adopted candidate (human reviews & merges), + - writes a research/ roll-up for the batch (always, even with 0 candidates), + - opens one GitHub issue summarising the batch (one click per candidate to + promote into a real PR), + - records every seen URL/title into paper_review_state.json so the next run + moves forward. + +The script is intentionally additive: it only writes drafts. No detector code +is touched. Human reviewers promote pending/ entries via a follow-up PR. + +Usage: + python auto-improvement/scripts/paper_review.py # full run + python auto-improvement/scripts/paper_review.py --dry-run # parse only + python auto-improvement/scripts/paper_review.py --no-issue # skip gh + +Required env (non-dry-run): ANTHROPIC_API_KEY. +Optional env: GITHUB_REPOSITORY (for issue URL composition under Actions). +""" + +from __future__ import annotations + +import argparse +import datetime as dt +import hashlib +import json +import re +import subprocess +import sys +import urllib.request +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + +REPO_ROOT = Path(__file__).resolve().parents[2] +AI_DIR = REPO_ROOT / "auto-improvement" +PENDING_DIR = AI_DIR / "pending" +RESEARCH_DIR = AI_DIR / "research" +STATE_PATH = AI_DIR / "paper_review_state.json" + +DEFAULT_SOURCE = ( + "https://raw.githubusercontent.com/tmylla/Awesome-LLM4Cybersecurity/main/LITERATURES.md" +) + +# Numbered list line: "12. Title | *Venue* | 2026.01.31 | [Paper Link](url)" +# URL portion is optional; title may contain colons / pipes inside backticks. +ENTRY_RE = re.compile( + r"^\s*\d+\.\s+(?P.+?)\s*\|\s*\*(?P<venue>.+?)\*\s*\|\s*(?P<date>[\d.]+)" + r"(?:\s*\|\s*\[[^\]]*\]\((?P<url>[^)\s]+)\))?\s*$" +) +SECTION_RE = re.compile(r"^#{1,4}\s+(.+?)\s*$") + +# Aigis context used to bias the LLM toward concrete pattern proposals. +AIGIS_CONTEXT = """\ +Aigis is a zero-dependency Python firewall for LLM agents. + +Detection model: +- Each rule is a DetectionPattern (regex + score + input/output filter scope). +- Rules live in modules like aigis/policies/, aigis/patterns.py, aigis/safety/, + aigis/supply_chain/, etc. Each rule has a stable rule_id (e.g. `sc_langflow_build_exec`, + `afe_python_mro_escape`, `pi_unicode_tag_block`). +- The loop has a hard budget: one paper translates to at most ~10 LOC of pattern + code + a few regex tests. Proposals that need new dependencies, ML models, + trained classifiers, or rewriting >100 LOC are NOT relevant. + +What IS relevant: +- A new attack technique with a string signature catchable by regex / substring / + small AST inspection (e.g. a specific API path, a specific f-string template, + a specific Unicode block, a specific obfuscation prefix). +- A measurable improvement to an existing rule (e.g. extending a regex to catch + a documented bypass). +- A new compliance template (NIST / EU AI Act / ISO) that maps cleanly onto an + existing policy_templates/ file. +- A hardening guide (docs-only) that codifies operational guidance from the paper. + +What is NOT relevant: +- ML-based detection, embeddings, training pipelines. +- Pure benchmarks/datasets with no extractable detection rule. +- Surveys that summarise prior work without introducing new attack signatures. +- Agentic frameworks for offensive use that don't translate into a defensive signature. +""" + +JUDGE_PROMPT = """\ +You are triaging one paper for the Aigis auto-improvement loop. + +{aigis_context} + +Paper: +- Title: {title} +- Venue: {venue} +- Date: {date} +- URL: {url} +- List section: {section} + +Decide whether this paper plausibly yields a concrete, regex/substring-scoped +detection rule or a small docs/template addition for Aigis. Be strict: when in +doubt, say no. Most papers will be irrelevant; that is fine. + +Reply with one JSON object, no prose, no code fences: +{{ + "relevant": true | false, + "rule_id": "<short snake_case id if relevant, else null>", + "rule_category": "input | output | supply_chain | mcp | memory | compliance | docs | null", + "one_line": "<= 25 words explaining what the rule catches, or why irrelevant>", + "blocked_example": "<a literal example string the rule would flag, or null>", + "source_evidence": "<one sentence quoting or paraphrasing the paper's key finding>" +}} +""" + + +@dataclass +class Entry: + section: str + title: str + venue: str + date: str + url: str | None + + @property + def key(self) -> str: + """Stable dedup key: URL when present, else title hash.""" + if self.url: + return f"url:{self.url}" + return "title:" + hashlib.sha1(self.title.encode("utf-8")).hexdigest()[:16] + + @property + def slug(self) -> str: + base = re.sub(r"[^a-z0-9]+", "-", self.title.lower()).strip("-") + return base[:60] or "paper" + + +@dataclass +class Verdict: + relevant: bool + rule_id: str | None + rule_category: str | None + one_line: str + blocked_example: str | None + source_evidence: str + + +def fetch_source(url: str) -> str: + req = urllib.request.Request(url, headers={"User-Agent": "aigis-paper-review/1.0"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return resp.read().decode("utf-8") + + +def parse_entries(text: str) -> list[Entry]: + entries: list[Entry] = [] + section = "(unsectioned)" + for line in text.splitlines(): + m = SECTION_RE.match(line) + if m: + section = m.group(1).strip() + continue + m = ENTRY_RE.match(line) + if not m: + continue + entries.append( + Entry( + section=section, + title=m.group("title").strip().strip("`"), + venue=m.group("venue").strip(), + date=m.group("date").strip(), + url=(m.group("url") or "").strip() or None, + ) + ) + return entries + + +def load_state() -> dict: + if not STATE_PATH.exists(): + return {"seen": {}, "runs": []} + data = json.loads(STATE_PATH.read_text(encoding="utf-8")) + data.setdefault("seen", {}) + data.setdefault("runs", []) + return data + + +def save_state(state: dict) -> None: + STATE_PATH.write_text(json.dumps(state, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") + + +def pick_unseen(entries: list[Entry], seen: dict, limit: int) -> list[Entry]: + fresh = [e for e in entries if e.key not in seen] + # Prefer newest first (date string sorts because format is YYYY.MM.DD). + fresh.sort(key=lambda e: (e.date or ""), reverse=True) + return fresh[:limit] + + +def judge_with_anthropic(entry: Entry) -> Verdict: + import anthropic # noqa: PLC0415 — optional dep, only needed in non-dry-run + + client = anthropic.Anthropic() + prompt = JUDGE_PROMPT.format( + aigis_context=AIGIS_CONTEXT, + title=entry.title, + venue=entry.venue, + date=entry.date, + url=entry.url or "(no URL)", + section=entry.section, + ) + msg = client.messages.create( + model="claude-haiku-4-5-20251001", + max_tokens=500, + messages=[{"role": "user", "content": prompt}], + ) + raw = "".join(block.text for block in msg.content if getattr(block, "text", None)) + return parse_verdict(raw) + + +def parse_verdict(raw: str) -> Verdict: + # Defensive parse: strip code fences if the model added any. + text = raw.strip() + if text.startswith("```"): + text = re.sub(r"^```[a-zA-Z]*\n?|```$", "", text).strip() + obj = json.loads(text) + return Verdict( + relevant=bool(obj.get("relevant")), + rule_id=obj.get("rule_id") or None, + rule_category=obj.get("rule_category") or None, + one_line=str(obj.get("one_line", ""))[:400], + blocked_example=obj.get("blocked_example") or None, + source_evidence=str(obj.get("source_evidence", ""))[:400], + ) + + +def write_pending(entry: Entry, verdict: Verdict, today: str) -> Path: + fname = f"{today}_paper_{entry.slug}.md" + path = PENDING_DIR / fname + body = f"""# Pending: {verdict.rule_id or entry.slug} + +## Title + +{verdict.one_line} + +## Source paper + +- **{entry.title}** +- Venue: {entry.venue} ({entry.date}) +- URL: {entry.url or "(no URL provided in source list)"} +- Discovered via: Awesome-LLM4Cybersecurity / `{entry.section}` + +## Why it might matter for Aigis + +{verdict.source_evidence} + +## Proposed rule (draft) + +- **rule_id (proposed):** `{verdict.rule_id or "TBD"}` +- **category:** {verdict.rule_category or "TBD"} +- **what it catches:** {verdict.one_line} + +### Example the rule should flag + +``` +{verdict.blocked_example or "(LLM did not surface a concrete example — verify against the paper before implementing.)"} +``` + +## Why this is in pending/ not implemented + +This entry was drafted by `auto-improvement/scripts/paper_review.py` from the +Awesome-LLM4Cybersecurity reading list. A human reviewer must: + +1. Read the actual paper to confirm the technique is real and current. +2. Decide if the regex/string signature above survives realistic adversarial + variations (or if a stricter pattern is needed). +3. Promote the rule into the appropriate `aigis/` module via a normal PR, with + tests covering both the example above and at least one near-miss benign case. +4. Or close this file with a note on why it was rejected. +""" + path.write_text(body, encoding="utf-8") + return path + + +def write_research( + today_path: str, batch: list[tuple[Entry, Verdict | None]], source_url: str +) -> Path: + path = RESEARCH_DIR / f"{today_path}_paper-batch.md" + lines: list[str] = [ + f"# Research: paper-batch — {today_path}", + "", + f"Source: {source_url}", + f"Papers reviewed this run: {len(batch)}", + f"Candidates surfaced (relevant=true): " + + str(sum(1 for _, v in batch if v and v.relevant)), + "", + "---", + "", + ] + for entry, verdict in batch: + lines.append(f"## {entry.title}") + lines.append("") + lines.append(f"- Venue: {entry.venue} ({entry.date})") + lines.append(f"- URL: {entry.url or '(none)'}") + lines.append(f"- Section: {entry.section}") + if verdict is None: + lines.append("- Verdict: SKIPPED (no API call this run)") + else: + lines.append( + f"- Verdict: {'RELEVANT' if verdict.relevant else 'not relevant'} " + f"— {verdict.one_line}" + ) + if verdict.relevant: + lines.append(f"- Proposed rule_id: `{verdict.rule_id}` ({verdict.rule_category})") + lines.append("") + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + return path + + +def open_issue( + today: str, + batch: list[tuple[Entry, Verdict]], + research_path: Path, + pending_paths: list[Path], +) -> None: + relevant = [(e, v) for e, v in batch if v.relevant] + title = ( + f"Paper review {today}: {len(batch)} papers reviewed, " + f"{len(relevant)} candidate rule(s)" + ) + body_lines = [ + f"Automated batch from `auto-improvement/scripts/paper_review.py`.", + "", + f"- Research roll-up: `{research_path.relative_to(REPO_ROOT).as_posix()}`", + f"- Candidates drafted under `auto-improvement/pending/`: {len(pending_paths)}", + "", + "## Candidates", + "", + ] + if not relevant: + body_lines.append("_No candidate surfaced relevant detector ideas this batch._") + for entry, verdict in relevant: + body_lines.extend( + [ + f"### `{verdict.rule_id}` — {entry.title}", + "", + f"- Category: {verdict.rule_category}", + f"- Source: {entry.venue} ({entry.date}) — {entry.url or '(no URL)'}", + f"- Catches: {verdict.one_line}", + f"- Pending file: `auto-improvement/pending/{today}_paper_{entry.slug}.md`", + "", + ] + ) + body_lines.extend( + [ + "## All papers reviewed", + "", + *[ + f"- {('✅' if v.relevant else '⏭️')} {e.title} ({e.venue}, {e.date})" + for e, v in batch + ], + "", + "---", + "", + "Reviewer checklist:", + "- [ ] Confirm each ✅ entry against the actual paper", + "- [ ] Promote accepted ones into the appropriate `aigis/` module via PR", + "- [ ] Close + delete pending files for rejected proposals", + ] + ) + subprocess.run( + ["gh", "issue", "create", "--title", title, "--body", "\n".join(body_lines)], + check=True, + cwd=REPO_ROOT, + ) + + +def run(argv: Iterable[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--max-papers", type=int, default=10) + parser.add_argument("--source-url", default=DEFAULT_SOURCE) + parser.add_argument("--dry-run", action="store_true", help="Parse + pick only, no API/IO writes") + parser.add_argument("--no-issue", action="store_true", help="Skip the gh issue step") + args = parser.parse_args(list(argv) if argv is not None else None) + + PENDING_DIR.mkdir(parents=True, exist_ok=True) + RESEARCH_DIR.mkdir(parents=True, exist_ok=True) + + text = fetch_source(args.source_url) + entries = parse_entries(text) + if not entries: + print("ERROR: parsed 0 entries from source. Format may have changed.", file=sys.stderr) + return 2 + + state = load_state() + picked = pick_unseen(entries, state["seen"], args.max_papers) + print(f"Source entries: {len(entries)} | already seen: {len(state['seen'])} | picked: {len(picked)}") + for e in picked: + print(f" - {e.date} | {e.title[:80]}") + + if args.dry_run: + return 0 + + if not picked: + print("Nothing new to review.") + return 0 + + now = dt.datetime.utcnow() + today = now.strftime("%Y-%m-%d") + today_path = now.strftime("%Y-%m-%dT%H-%M") + + batch: list[tuple[Entry, Verdict]] = [] + pending_paths: list[Path] = [] + for entry in picked: + try: + verdict = judge_with_anthropic(entry) + except Exception as exc: # pragma: no cover — surfaced in the issue body + print(f"WARN: judge failed for {entry.title!r}: {exc}", file=sys.stderr) + verdict = Verdict(False, None, None, f"judge error: {exc}", None, "") + batch.append((entry, verdict)) + if verdict.relevant: + pending_paths.append(write_pending(entry, verdict, today)) + # Mark seen regardless of verdict so we don't re-judge it tomorrow. + state["seen"][entry.key] = { + "title": entry.title, + "date": entry.date, + "venue": entry.venue, + "url": entry.url, + "first_seen_utc": now.isoformat(timespec="seconds") + "Z", + "relevant": verdict.relevant, + } + + research_path = write_research(today_path, [(e, v) for e, v in batch], args.source_url) + + state["runs"].append( + { + "run_utc": now.isoformat(timespec="seconds") + "Z", + "picked": len(picked), + "relevant": sum(1 for _, v in batch if v.relevant), + "research": research_path.relative_to(REPO_ROOT).as_posix(), + } + ) + save_state(state) + + if not args.no_issue: + try: + open_issue(today, batch, research_path, pending_paths) + except subprocess.CalledProcessError as exc: + print(f"WARN: gh issue create failed: {exc}", file=sys.stderr) + + print( + f"Done. Reviewed {len(batch)} | relevant {len(pending_paths)} | " + f"research: {research_path.relative_to(REPO_ROOT)}" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(run())