KosinskiLab · DimaMolod · May 20, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/.gitignore b/.gitignore
@@ -256,3 +256,11 @@ test_data/**
 !test_data/af3/neg_dimers/
 !test_data/af3/neg_dimers/Q14974+Q13033/
 !test_data/af3/neg_dimers/Q14974+Q13033/**
+
+# Boltz-2 real prediction fixture
+!test_data/boltz2/
+!test_data/boltz2/6OGE_ABC_DSSO_CDI_seed_3/
+!test_data/boltz2/6OGE_ABC_DSSO_CDI_seed_3/6OGE_ABC_DSSO_CDI_Boltz2_model_0.cif
+!test_data/boltz2/6OGE_ABC_DSSO_CDI_seed_3/confidence_6OGE_ABC_DSSO_CDI_Boltz2_model_0.json
+!test_data/boltz2/6OGE_ABC_DSSO_CDI_seed_3/pae_6OGE_ABC_DSSO_CDI_Boltz2_model_0.npz
+!test_data/boltz2/6OGE_ABC_DSSO_CDI_seed_3/plddt_6OGE_ABC_DSSO_CDI_Boltz2_model_0.npz
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,6 @@
+# Changelog
+
+## 1.0.2 - 2026-05-20
+
+- Added official DeepMind AF3 layout and Boltz-2 parser support; custom parser subclasses should implement `BaseParser.detect` as `detect(d: Path) -> bool` because it is now a static method.
+- Unknown or missing AF3 confidence schemas now raise an error instead of fabricating default PAE values.
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ AlphaJudge evaluates AlphaFold-predicted protein complexes by merging AI-derived
 
 ## What it does
 
-AlphaJudge parses AF2 and AF3 outputs and summarizes per-model / per-interface metrics:
+AlphaJudge parses AF2, AF3, and Boltz-2 outputs and summarizes per-model / per-interface metrics:
 
 | category | metrics (examples) | notes |
 | --- | --- | --- |
@@ -28,10 +28,10 @@ Use cases: rank poses, sanity-check AF confidences, or export features for ML.
 ## Pipeline overview
 
 ```
-AlphaFold models (AF2 or AF3)  →  AlphaJudge  →  interfaces.csv
+AlphaFold or Boltz models  →  AlphaJudge  →  interfaces.csv
 ```
 
-- Detects AF2 vs AF3 automatically from the run directory
+- Detects AF2, AF3, and Boltz-2 automatically from the run directory
 - Loads structure and confidences, computes interface descriptors
 - Writes `interfaces.csv` into the same directory
 
@@ -105,9 +105,12 @@ Examples
 # Single AF2 run (directory contains ranking_debug.json, pae_*.json, and model files)
 alphajudge test_data/af2/pos_dimers/Q13148+Q92900
 
-# Single AF3 run (directory contains ranking_scores.csv, per-model summary/confidence files, and model files)
+# Single AF3 run (AlphaPulldown-style or official DeepMind AF3 output layout)
 alphajudge test_data/af3/pos_dimers/Q13148+Q92900 --models_to_analyse all
 
+# Boltz-2 prediction directory (for example out_dir/predictions/my_input)
+alphajudge out_dir/predictions/my_input --models_to_analyse all
+
 # Aggregate multiple runs into one summary
 alphajudge test_data/af2/pos_dimers/Q13148+Q92900 \
            test_data/af3/pos_dimers/Q13148+Q92900 \
@@ -151,12 +154,13 @@ Key outputs per interface include: `average_interface_pae`, `interface_average_p
 
 ## Expected input layout
 
-AlphaJudge expects standard AlphaFold run outputs.
+AlphaJudge expects standard prediction run outputs.
 
 - AF2: directory with `ranking_debug.json`, `pae_<model>.json`, and model structure files (`model.cif` or `*.pdb/*.cif`)
-- AF3: directory with `ranking_scores.csv`, per-model `summary_confidences.json` and `confidences.json` (or top-level `ranked_0_summary_confidences.json`), and structure files
+- AF3: AlphaPulldown/normalized layout with `ranking_scores.csv` and per-model `summary_confidences.json`/`confidences.json`, or official DeepMind AF3 layout with `<job_name>_ranking_scores.csv` and prefixed per-sample files such as `<job_name>_seed-<seed>_sample-<sample>_model.cif`
+- Boltz-2: prediction directory with ranked files such as `<input>_model_0.cif`, `confidence_<input>_model_0.json`, and optional `pae_<input>_model_0.npz` / `plddt_<input>_model_0.npz`
 
-The tool searches for `model.cif` inside each model subdirectory first; otherwise it tries to match `*<model>*.cif` or `*<model>*.pdb` at the run root.
+The tool searches for `model.cif` inside each model subdirectory first; otherwise it tries to match `*<model>*.cif` or `*<model>*.pdb` at the run root. AlphaJudge currently scores protein and nucleic-acid interfaces; ligands present in AF3 or Boltz-2 structures are ignored for interface construction. When confidence arrays include ligand tokens, supported parsers align or trim them to the scored protein/nucleic-acid residue block.
 
 ---
 
@@ -184,7 +188,7 @@ pip install -e ".[test]"
 pytest -q
 ```
 
-Tests exercise both AF2 and AF3 parsers and validate the CSV fields against bundled fixtures in `test_data/`. The slow CCP4 SC regression suite is opt-in and can be enabled with `ALPHAJUDGE_RUN_SLOW_SC_REFERENCE=1`; CI always runs it across Python 3.10–3.13.
+Tests exercise AF2, AF3, and Boltz-2 parsers and validate the CSV fields against bundled fixtures in `test_data/`. The slow CCP4 SC regression suite is opt-in and can be enabled with `ALPHAJUDGE_RUN_SLOW_SC_REFERENCE=1`; CI always runs it across Python 3.10–3.13.
 
 ---
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "alphajudge"
-version = "1.0.1"
+version = "1.0.2"
 description = "Evaluate AlphaFold-predicted protein complexes using confidence metrics and interface biophysics."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.10"

diff --git a/src/alphajudge/parsers/__init__.py b/src/alphajudge/parsers/__init__.py
@@ -4,9 +4,9 @@
 from typing import Any, Callable
 from abc import ABC, abstractmethod
 import json
-import numpy as np
 from Bio.PDB import PDBParser, MMCIFParser
 from ..confidence import Confidence
+from ..geometry import is_pae_token_residue, representative_atom
 
 @dataclass
 class Run:
@@ -17,8 +17,9 @@ class Run:
 class BaseParser(ABC):
     name: str = "base"
 
+    @staticmethod
     @abstractmethod
-    def detect(self, d: Path) -> bool: ...
+    def detect(d: Path) -> bool: ...
     @abstractmethod
     def parse_run(self, d: Path) -> Run: ...
 
@@ -45,30 +46,41 @@ def _guess_struct(d: Path, model: str) -> str:
 
     @staticmethod
     def _maps(struct):
-        model = next(struct.get_models()); chains = list(model.get_chains())
+        # Index PAE-token residues only (proteins with CA, nucleics with C1').
+        # This keeps `rim` / `cid` aligned with the residue-by-residue PAE matrix
+        # and with Complex._build_maps(), so plddt_residue[i] and pae_matrix[i, :]
+        # refer to the same residue.
+        model = next(struct.get_models())
+        chains = list(model.get_chains())
         rim, cid, idx = {}, {}, 0
         for ch in chains:
-            idxs = []
+            idxs: list[int] = []
             for res in ch:
+                if not is_pae_token_residue(res):
+                    continue
                 rim[(ch.id, res.id)] = idx
-                idxs.append(idx); idx += 1
+                idxs.append(idx)
+                idx += 1
+            # Always register the chain so AF3 PAE normalisation can iterate over
+            # `chains` and call cid[c.id] without KeyError; empty list is fine.
             cid[ch.id] = idxs
         return chains, rim, cid
 
     @staticmethod
     def _plddt(chains, rim) -> list[float]:
-        n = len(rim); out = [float('nan')] * n
+        # Use representative_atom (CB->CA for proteins, C1' for nucleics) so this
+        # matches Interface._avg_plddt_union(): same residue, same B-factor.
+        n = len(rim)
+        out = [float("nan")] * n
         for ch in chains:
             for res in ch:
                 i = rim.get((ch.id, res.id))
-                if i is None: continue
+                if i is None:
+                    continue
                 try:
-                    out[i] = float(res["CA"].get_bfactor()); continue
-                except Exception:
-                    pass
-                vals = [float(a.get_bfactor()) for a in res.get_atoms()
-                        if a.element and a.element.upper()!="H"]
-                out[i] = float(np.mean(vals)) if vals else float('nan')
+                    out[i] = float(representative_atom(res).get_bfactor())
+                except (KeyError, AttributeError):
+                    continue
         return out
 
     @staticmethod
@@ -113,9 +125,10 @@ def pick(self, d: Path) -> BaseParser:
 # import concrete parsers and register
 from .af2 import AF2Parser
 from .af3 import AF3Parser
+from .boltz import Boltz2Parser
 manager.register(AF2Parser)
 manager.register(AF3Parser)
+manager.register(Boltz2Parser)
 
 pick_parser = manager.pick
 register_parser = manager.register
-
diff --git a/src/alphajudge/parsers/af3.py b/src/alphajudge/parsers/af3.py
@@ -1,30 +1,50 @@
 from __future__ import annotations
 from pathlib import Path
 from typing import Any
-import csv, numpy as np
+import csv
+import logging
+import numpy as np
 from . import BaseParser, Run
 from ..confidence import Confidence
 
+logger = logging.getLogger(__name__)
+
 class AF3Parser(BaseParser):
     name = "af3"
 
-    def detect(self, d: Path) -> bool:
-        return (d / "ranking_scores.csv").exists()
+    @staticmethod
+    def detect(d: Path) -> bool:
+        return AF3Parser._ranking_scores_file(d) is not None
 
     def parse_run(self, d: Path) -> Run:
-        order = self._read_csv_order(d / "ranking_scores.csv")
+        ranking_file = self._ranking_scores_file(d)
+        if ranking_file is None:
+            raise ValueError(f"AF3 ranking scores file not found in {d}")
+        order, ranking_scores = self._read_csv_order(ranking_file)
+        job_prefix = self._job_prefix_from_ranking_file(ranking_file)
 
         def load_model(model: str):
             model_dir = d / model
-            struct = self._load_structure(self._guess_struct(d, model))
+            is_best_model = bool(order and model == order[0])
+            struct = self._load_structure(
+                self._guess_af3_struct(d, model, job_prefix, is_best_model)
+            )
             chains, rim, cid = self._maps(struct)
 
-            summary = self._read_json(model_dir / "summary_confidences.json")
-            matrix  = self._read_json(model_dir / "confidences.json") or summary
+            summary = self._read_json(
+                self._find_af3_json(
+                    d, model, "summary_confidences", job_prefix, is_best_model
+                )
+            )
+            matrix = self._read_json(
+                self._find_af3_json(d, model, "confidences", job_prefix, is_best_model)
+            ) or summary
 
             iptm = self._safe_float(summary.get("iptm"))
             ptm  = self._safe_float(summary.get("ptm"))
             ranking_score = self._safe_float(summary.get("ranking_score"))
+            if ranking_score is None:
+                ranking_score = ranking_scores.get(model)
             iptm_ptm = 0.2 * ptm + 0.8 * iptm if (iptm is not None and ptm is not None) else None
 
             chain_pair_iptm_raw = summary.get("chain_pair_iptm")
@@ -44,14 +64,97 @@ def load_model(model: str):
 
     # ---- AF3-specific helpers ----
     @staticmethod
-    def _read_csv_order(p: Path) -> list[str]:
+    def _ranking_scores_file(d: Path) -> Path | None:
+        plain = d / "ranking_scores.csv"
+        if plain.exists():
+            return plain
+        hits = sorted(d.glob("*_ranking_scores.csv"))
+        return hits[0] if hits else None
+
+    @staticmethod
+    def _job_prefix_from_ranking_file(p: Path) -> str | None:
+        if p.name == "ranking_scores.csv":
+            return None
+        suffix = "_ranking_scores"
+        if p.stem.endswith(suffix):
+            return p.stem[: -len(suffix)]
+        return None
+
+    @staticmethod
+    def _read_csv_order(p: Path) -> tuple[list[str], dict[str, float]]:
         with p.open(newline="") as f:
             rows = [r for r in csv.DictReader(f) if r]
         def pf(x: str | None) -> float:
             try: return float(x)  # type: ignore[arg-type]
             except Exception: return float("nan")
         rows.sort(key=lambda r: pf(r.get("ranking_score")), reverse=True)
-        return [f"seed-{r['seed']}_sample-{r['sample']}" for r in rows if 'seed' in r and 'sample' in r]
+        order: list[str] = []
+        scores: dict[str, float] = {}
+        for r in rows:
+            if "seed" not in r or "sample" not in r:
+                continue
+            model = f"seed-{r['seed']}_sample-{r['sample']}"
+            order.append(model)
+            score = pf(r.get("ranking_score"))
+            if np.isfinite(score):
+                scores[model] = score
+        return order, scores
+
+    @staticmethod
+    def _find_existing(paths: list[Path]) -> Path | None:
+        for p in paths:
+            if p.exists():
+                return p
+        return None
+
+    @classmethod
+    def _find_af3_json(
+        cls,
+        d: Path,
+        model: str,
+        kind: str,
+        job_prefix: str | None,
+        is_best_model: bool,
+    ) -> Path:
+        model_dir = d / model
+        candidates = [model_dir / f"{kind}.json"]
+        if job_prefix:
+            candidates.append(model_dir / f"{job_prefix}_{model}_{kind}.json")
+            if is_best_model:
+                candidates.append(d / f"{job_prefix}_{kind}.json")
+        if model_dir.is_dir():
+            candidates.extend(sorted(model_dir.glob(f"*_{kind}.json")))
+        if is_best_model:
+            candidates.append(d / f"ranked_0_{kind}.json")
+        return cls._find_existing(candidates) or candidates[0]
+
+    @classmethod
+    def _guess_af3_struct(
+        cls,
+        d: Path,
+        model: str,
+        job_prefix: str | None,
+        is_best_model: bool,
+    ) -> str:
+        model_dir = d / model
+        candidates: list[Path] = []
+        for ext in ("cif", "pdb"):
+            candidates.append(model_dir / f"model.{ext}")
+            if job_prefix:
+                candidates.append(model_dir / f"{job_prefix}_{model}_model.{ext}")
+        if model_dir.is_dir():
+            for ext in ("cif", "pdb"):
+                candidates.extend(sorted(model_dir.glob(f"*{model}*_model.{ext}")))
+                candidates.extend(sorted(model_dir.glob(f"*.{ext}")))
+        if job_prefix and is_best_model:
+            for ext in ("cif", "pdb"):
+                candidates.append(d / f"{job_prefix}_model.{ext}")
+        for ext in ("cif", "pdb"):
+            candidates.extend(sorted(d.glob(f"*{model}*.{ext}")))
+        found = cls._find_existing(candidates)
+        if found is not None:
+            return str(found)
+        raise ValueError(f"struct for model {model} not found")
 
     @staticmethod
     def _normalize_pae_af3(matrix: dict, chains, cid) -> tuple[np.ndarray, float]:
@@ -62,7 +165,14 @@ def _normalize_pae_af3(matrix: dict, chains, cid) -> tuple[np.ndarray, float]:
         if "predicted_aligned_error" in matrix:
             # some AF3 builds still store a full matrix in confidences.json
             m = np.array(matrix["predicted_aligned_error"], dtype=float)
-            if m.size: pae[:, :] = m
+            if m.size:
+                if m.shape == pae.shape:
+                    pae[:, :] = m
+                else:
+                    logger.warning(
+                        f"predicted_aligned_error shape {m.shape} != expected {pae.shape}; "
+                        "skipping PAE assignment."
+                    )
             max_pae = float(matrix.get("max_predicted_aligned_error", np.nan))
             if not np.isfinite(max_pae):
                 max_pae = float(np.nanmax(m)) if m.size else float('nan')
@@ -107,6 +217,9 @@ def _normalize_pae_af3(matrix: dict, chains, cid) -> tuple[np.ndarray, float]:
                     if ri and rj:
                         pae[np.ix_(ri, rj)] = 100.0 if val is None else val
         else:
-            raise ValueError("unknown AF3 confidences schema")
+            raise ValueError(
+                "unknown AF3 confidences schema: expected predicted_aligned_error, "
+                "pae with token_chain_ids, or chain_pair_pae_min"
+            )
 
         return pae, float(max_pae)