mathpluscode
diff --git a/‎.claude-plugin/marketplace.json‎
Lines changed: 1 addition & 1 deletion b/‎.claude-plugin/marketplace.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.claude-plugin/plugin.json‎
Lines changed: 1 addition & 1 deletion b/‎.claude-plugin/plugin.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 32 additions & 2 deletions b/‎README.md‎
Lines changed: 32 additions & 2 deletions
diff --git a/‎docs/build.py‎
Lines changed: 14 additions & 18 deletions b/‎docs/build.py‎
Lines changed: 14 additions & 18 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
@@ -13,7 +13,7 @@
       "name": "bibtools",
       "source": "./",
       "description": "A bibliography toolkit for LaTeX",
-      "version": "1.7.4",
+      "version": "1.7.5",
       "keywords": ["bibtex", "bibliography", "latex", "overleaf", "academic", "reference", "citation"],
       "category": "academic",
       "license": "MIT"
 
@@ -1,7 +1,7 @@
 {
   "name": "bibtools",
   "description": "A bibliography toolkit for LaTeX",
-  "version": "1.7.4",
+  "version": "1.7.5",
   "author": {
     "name": "Yunguan Fu"
   },
 
@@ -63,8 +63,38 @@ Or in Claude Code, use the slash command: `/bibtidy refs.bib`
 
 bibtidy verifies each entry against [Google Scholar](https://scholar.google.com/) and [CrossRef](https://search.crossref.org/), fixes errors, and upgrades stale preprints to published versions. Every change includes the original entry commented out above so you can compare or revert, plus one or more `% bibtidy:` URL lines for verification. We recommend using git to track changes. If using [Overleaf](https://www.overleaf.com/), this can be done with [git sync](https://docs.overleaf.com/integrations-and-add-ons/git-integration-and-github-synchronization). To remove bibtidy comments after review, ask your agent to remove all `bibtidy` comments from the file.
 
-Note that bibtidy assumes standard brace-style BibTeX like `@article{...}`. Parenthesized forms like `@article(...)` are not supported; convert them to brace style first.
+Note that bibtidy assumes standard brace-style BibTeX like `@article{...}`. Parenthesized forms like `@article(...)` are not supported. Special blocks such as `@string`, `@preamble`, and `@comment` are ignored by the parser.
+
+### How it works
+
+bibtidy walks each entry through a bounded state machine. Every entry has a **web-search budget of 1**, spent at most once across two possible waves:
+
+```mermaid
+flowchart TD
+    P1["Phase 1: duplicates.py (exact/subset, lossless)"]
+    P2["Phase 2: compare.py fetches CrossRef candidates"]
+    HAS{"candidates?"}
+    WA["Wave A web search<br/>(mandatory, budget spent)"]
+    P3{"Phase 3: agent decides per entry"}
+    BUDGET{"budget spent?"}
+    WB["Wave B web search<br/>(budget spent)"]
+    DECIDE2["decide again with combined info"]
+    REVIEW["add '% bibtidy: REVIEW' comment<br/>with URLs, bib entry unchanged"]
+    PATCH["build fix patch (or no-op)"]
+    P4["Phase 4: duplicates.py (post-fix)<br/>+ manual near-duplicate review"]
+
+    P1 --> P2 --> HAS
+    HAS -- yes --> P3
+    HAS -- no --> WA --> P3
+    P3 -- confident --> PATCH
+    P3 -- not confident --> BUDGET
+    BUDGET -- no --> WB --> DECIDE2 --> PATCH
+    BUDGET -- yes --> REVIEW
+    PATCH --> P4
+    REVIEW --> P4
+```
 
+Each entry ends in one of four states: **Clean** (no change, no comment), **Fix** (patch applied with URLs + explanation), **Not found** (hallucinated, entry commented out), or **Review** (budget spent, entry unchanged, comment added for human attention).
 
 ### Examples
 
@@ -390,7 +420,7 @@ You shouldn't, and that's by design. The point of bibtidy is to surface potentia
 
 **Why does bibtidy flag so many page number errors?**
 
-Google Scholar extracts metadata by scraping PDFs rather than querying publisher databases, so page numbers are frequently incorrect. Even official sources can disagree, for example, the same CVPR 2020 paper "Momentum Contrast for Unsupervised Visual Representation Learning" has pages 9729--9738 on [CVF Open Access](https://openaccess.thecvf.com/content_CVPR_2020/html/He_Momentum_Contrast_for_Unsupervised_Visual_Representation_Learning_CVPR_2020_paper.html) but pages 9726--9735 on [IEEE Xplore](https://ieeexplore.ieee.org/document/9157636), because IEEE re-paginates when compiling the full proceedings volume. bibtidy uses CrossRef as the authoritative source for page numbers. CrossRef gets metadata directly from publishers via DOI registration, so for IEEE/CVF conferences it returns the IEEE Xplore pagination (9726--9735 in the example above). When sources conflict, bibtidy applies the DOI-linked version and flags the entry with `% bibtidy: REVIEW` so you can verify.
+Google Scholar extracts metadata by scraping PDFs rather than querying publisher databases, so page numbers are frequently incorrect. Even official sources can disagree, for example, the same CVPR 2020 paper "Momentum Contrast for Unsupervised Visual Representation Learning" has pages 9729--9738 on [CVF Open Access](https://openaccess.thecvf.com/content_CVPR_2020/html/He_Momentum_Contrast_for_Unsupervised_Visual_Representation_Learning_CVPR_2020_paper.html) but pages 9726--9735 on [IEEE Xplore](https://ieeexplore.ieee.org/document/9157636), because IEEE re-paginates when compiling the full proceedings volume. bibtidy uses CrossRef as the authoritative source for page numbers. CrossRef gets metadata directly from publishers via DOI registration, so for IEEE/CVF conferences it returns the IEEE Xplore pagination (9726--9735 in the example above). bibtidy applies the DOI-linked version; you can verify via the DOI URL included in the `% bibtidy:` comments.
 
 ## License
 
 
@@ -35,8 +35,8 @@ def parse_entries(text: str) -> list[dict]:
     while i < len(lines):
         line = lines[i]
         stripped = line.strip()
-        # Skip @string, @preamble, and blank lines
-        if not stripped or re.match(r"^@(string|preamble)", stripped, re.IGNORECASE):
+        # Skip @string, @preamble, @comment, and blank lines
+        if not stripped or re.match(r"^@(string|preamble|comment)\b", stripped, re.IGNORECASE):
             i += 1
             continue
 
@@ -134,7 +134,7 @@ def classify_entry(bibtidy_comments: list[str], diff: list[tuple[str, str]]) ->
     return "badge-ok", "unchanged"
 
 
-_URL_RE = re.compile(r"(https?://[^\s,;)\"'&{}]+)")
+_URL_RE = re.compile(r"https?://[^\s,;)\"'{}<]+")
 
 
 def escape_html(s: str) -> str:
@@ -143,10 +143,17 @@ def escape_html(s: str) -> str:
 
 def linkify(s: str) -> str:
     """Escape HTML and convert URLs to clickable links."""
-    escaped = escape_html(s)
-    return _URL_RE.sub(
-        r'<a href="\1" target="_blank" rel="noopener" style="color:inherit;text-decoration:underline">\1</a>', escaped
-    )
+    parts = []
+    last = 0
+    for match in _URL_RE.finditer(s):
+        parts.append(escape_html(s[last : match.start()]))
+        url = escape_html(match.group(0))
+        parts.append(
+            f'<a href="{url}" target="_blank" rel="noopener" style="color:inherit;text-decoration:underline">{url}</a>'
+        )
+        last = match.end()
+    parts.append(escape_html(s[last:]))
+    return "".join(parts)
 
 
 def render_diff_row(typ: str, line: str) -> str:
@@ -599,17 +606,6 @@ def main() -> None:
     for key, inp in input_entries.items():
         if key in seen_keys:
             continue
-        # Find bibtidy comments for this key in expected text
-        bibtidy_comments = []
-        for line in expected_text.splitlines():
-            stripped = line.strip()
-            if (
-                stripped.startswith("% bibtidy:")
-                and key in expected_text[expected_text.index(stripped) : expected_text.index(stripped) + 500]
-            ):
-                bibtidy_comments.append(stripped)
-                break
-        # Simpler approach: scan expected text for bibtidy comments near the commented-out entry
         bibtidy_comments = []
         exp_lines = expected_text.splitlines()
         for idx, line in enumerate(exp_lines):
 
@@ -1,6 +1,6 @@
 [project]
 name = "bibtools"
-version = "1.7.4"
+version = "1.7.5"
 description = "A bibliography toolkit for LaTeX, built as agent skills"
 requires-python = ">=3.10"
 license = "MIT"
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "bibtools",`
`3`	`3`	`"description": "A bibliography toolkit for LaTeX",`
`4`		`- "version": "1.7.4",`
	`4`	`+ "version": "1.7.5",`
`5`	`5`	`"author": {`
`6`	`6`	`"name": "Yunguan Fu"`
`7`	`7`	`},`