baker-laboratory · willsheffler · Apr 2, 2025 · Apr 8, 2025 · Apr 8, 2025 · Apr 9, 2025
diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml
@@ -13,6 +13,9 @@ jobs:
       - uses: actions/setup-python@v5
       - name: Install uv
         uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.6.8"
+          enable-cache: true
       - name: Sphinx build
         run: |
           cd docs
@@ -21,7 +24,7 @@ jobs:
           uv run --extra docs sphinx-build -M html "." "_build"
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@v3
-        if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
+        if: ${{ github.event_name == 'push' }} # && github.ref == 'refs/heads/main' }}
-        if: ${{ github.event_name == 'push' }} # && github.ref == 'refs/heads/main' }}
+        if: ${{ github.event_name == 'push' }} # Example: Add '&& github.ref == "refs/heads/main"' for main branch only
-        if: ${{ github.event_name == 'push' }} # && github.ref == 'refs/heads/main' }}
+        if: ${{ github.event_name == 'push' }} # Example: Add '&& github.ref == "refs/heads/main"' for main branch only
         with:
           publish_branch: gh-pages
           github_token: ${{ secrets.GITHUB_TOKEN }}

diff --git a/.github/workflows/run_test_matrix.yaml b/.github/workflows/run_test_matrix.yaml
@@ -8,19 +8,14 @@ on:
 
 jobs:
   nox:
-    name: Run Nox - Python ${{ matrix.python }} (extra=${{ matrix.extra }})
-    runs-on: ubuntu-latest
+    name: Run Nox - Python ${{ matrix.python }} on ${{ matrix.platform }} (extra=${{ matrix.extra }})
+    runs-on: ${{ matrix.platform }}
     strategy:
       fail-fast: false
       matrix:
-        # python: [3.13]
-        python: [3.9, 3.10, 3.11, 3.12, 3.13]
+        platform: [ubuntu-latest, macos-latest]
+        python: [3.10, 3.11, 3.12, 3.13]
         extra: ["", "all"]
-        # extra: [""]
-        exclude:
-          - python: 3.9
-            extra: "all"
-
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -31,9 +26,8 @@ jobs:
           version: "0.6.8"
           enable-cache: true
 
-      - name: Install nox using uv
-        run: uv venv --system-site-packages && uv pip install nox
-
       - name: Run Nox session
         run: |
+          uv venv --system-site-packages
+          uv pip install nox
           uv run nox --session test_matrix -- ${{ matrix.python }} ${{ matrix.extra }}
diff --git a/.github/workflows/run_tests_digs.yml b/.github/workflows/run_tests_digs.yml
diff --git a/.gitignore b/.gitignore
@@ -188,3 +188,4 @@ junit*.xml
 
 _autosummary
 _apidoc
+package-lock.json
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,50 @@
+# .pre-commit-config.yaml
+repos:
+
+  # ✅ Commitizen: Conventional Commit linter (Python-native)
+  - repo: https://github.com/commitizen-tools/commitizen
+    rev: v3.21.0
+    hooks:
+      - id: commitizen
+        stages: [commit-msg]
+        language_version: python3.12
+
+  # ✅ Ruff: linter, formatter, isort
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.3
+    hooks:
+      - id: ruff
+        args: [--fix, ipd]
+
+
+  # ✅ Check for trailing whitespace, tabs, EOFs
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+
+  # ✅ Detect merge conflict markers
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-merge-conflict
+
+  # ✅ Validate pyproject.toml metadata and structure
+  - repo: https://github.com/abravalheri/validate-pyproject
+    rev: v0.24.1
+    hooks:
+      - id: validate-pyproject
+        # Optional extra validations from SchemaStore:
+        additional_dependencies: ["validate-pyproject-schema-store[all]"]
+
+#  - repo: https://github.com/christophmeissner/pytest-pre-commit
+#    rev: 1.0.0
+#    hooks:
+#      - id: pytest
+#        additional_dependencies: ['evn', 'typing_extensions', 'hypothesis', 'numpy', 'pytest-xdist', 'jinja2', 'icecream','rapidfuzz','yapf', 'rich', 'toolz', 'pyyaml', 'tomli', 'assertpy', 'opt_einsum', 'requests', 'requests_cache','hgeom', 'RestrictedPython', 'evn>=0.7.5']
+#        args: [-n,auto,--config-file,pyproject.toml,-k,'not slow',ipd/tests,--ignore=ipd/tests/homog/test_hgeom_library.py,--ignore=ipd/tests/_prelude/test_import_util.py]
+#        pass_filenames: false
+#        always_run: true
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.13
diff --git a/README.md b/README.md
@@ -6,3 +6,5 @@
 
 # ipd
 Various base libraries for code at the IPD
+
+<https://baker-laboratory.github.io/ipd/index.html>
diff --git a/docs/conf.py b/docs/conf.py
@@ -1,9 +1,8 @@
 import os
 import sys
 import inspect
-from typing import Type
 
-sys.path.insert(0, os.path.abspath('..'))
+sys.path.append(os.path.abspath('..'))
 import importlib.metadata
 
 # -- Project information -----------------------------------------------------
@@ -28,6 +27,12 @@
     # 'sphinx_gallery.gen_gallery',  # For example galleries like Biotite
 ]
 
+autodoc_mock_imports = [
+    "ipd.cuda",
+    "ipd.cython",
+    "ipd/tests",
+]
+
 # -- HTML Theme Configuration ------------------------------------------------
 pygments_style = 'sphinx'
 

diff --git a/docs/dev_guide/coding.rst b/docs/dev_guide/coding.rst
@@ -47,6 +47,3 @@ All modules, classes, and functions should have docstrings. Use google style, an
             3
         """
         return a + b
-
-
-
diff --git a/docs/dev_guide/dependencies.rst b/docs/dev_guide/dependencies.rst
@@ -25,6 +25,3 @@ numpy
 ~~~~~~~~
 You know what this is. (If you don't, you're in the wrong place.) (courtney: I don't know what this is)
 This strange and snarky comment courtesy of github copilot, thank you AI overloards.
-
-
-
diff --git a/docs/tutorials/contact_matrix.rst b/docs/tutorials/contact_matrix.rst
@@ -10,7 +10,7 @@ Cumulative sums primer and rapid contact matrix processing
 Cumulative sums, 1D basics
 --------------------------
 
-ContactMatrixStack uses a precomputed 2D partialsum array for efficient region-based queries. To explain
+ContactBlockMatrix uses a precomputed 2D partialsum array for efficient region-based queries. To explain
 start with the 1D partialsum case. ``DATA`` is a 1D array and ``SUMS`` is the cumulative sum
 of ``DATA`` (``ipd.partialsum(DATA)``. If we want the sum of ``DATA[i:j]`` we can compute it as
-of ``DATA`` (``ipd.partialsum(DATA)``. If we want the sum of ``DATA[i:j]`` we can compute it as
+of ``DATA`` (``ipd.partialsum(DATA)`. If we want the sum of ``DATA[i:j]`` we can compute it as
-of ``DATA`` (``ipd.partialsum(DATA)``. If we want the sum of ``DATA[i:j]`` we can compute it as
+of ``DATA`` (``ipd.partialsum(DATA)`. If we want the sum of ``DATA[i:j]`` we can compute it as
 ``SUMS[j] - SUMS[i]``.
@@ -83,11 +83,11 @@ Note how much faster the partialsum + broadcasting version was for the 1D versio
 It makes an even bigger difference in the 2D case because the arrays tend to be much larger.
 
 .. figure:: ../_static/img/partialsum2d.png
-   :alt: partialsum2d illustration 
+   :alt: partialsum2d illustration
 
    Illustration of data 2D with pink region to be "summed" and 2D cumulative sum array from which four points are needed to computs the "sum:" ``sum = CSUM[ub1,ub2] (red point) + CSUM[lb1,lb2] (green point) - CUSM[ub1,lb2] (blue point) - CSUM[lb1,lb2] (blue point``.
-   Illustration of data 2D with pink region to be "summed" and 2D cumulative sum array from which four points are needed to computs the "sum:" ``sum = CSUM[ub1,ub2] (red point) + CSUM[lb1,lb2] (green point) - CUSM[ub1,lb2] (blue point) - CSUM[lb1,lb2] (blue point``.
+   Illustration of data 2D with pink region to be "summed" and 2D cumulative sum array from which four points are needed to compute the "sum:" ``sum = CSUM[ub1,ub2] (red point) + CSUM[lb1,lb2] (green point) - CUSM[ub1,lb2] (blue point) - CSUM[lb1,lb2] (blue point``.
-   Illustration of data 2D with pink region to be "summed" and 2D cumulative sum array from which four points are needed to computs the "sum:" ``sum = CSUM[ub1,ub2] (red point) + CSUM[lb1,lb2] (green point) - CUSM[ub1,lb2] (blue point) - CSUM[lb1,lb2] (blue point``.
+   Illustration of data 2D with pink region to be "summed" and 2D cumulative sum array from which four points are needed to compute the "sum:" ``sum = CSUM[ub1,ub2] (red point) + CSUM[lb1,lb2] (green point) - CUSM[ub1,lb2] (blue point) - CSUM[lb1,lb2] (blue point``.
 
-The method :py:meth:`ContactMatrixStack.fragment_contact` uses this idea to compute the total contacts of all
+The method :py:meth:`ContactBlockMatrix.fragment_contact` uses this idea to compute the total contacts of all
 pairs of fragments of a given length using a 2D partialsum array. The stride parameter allows for computing only evey Nth value. Note, even on large inputs, this function is fast enough to
 compute every fragment pair, so stride is mainly useful as simple way to reduce redundancy.
 
@@ -100,16 +100,16 @@ compute every fragment pair, so stride is mainly useful as simple way to reduce
 
 This function retuns an ``S x M x N`` array containing the total contacts for all pairs of fragments for each contact matrix s in the stack: ``fragment1`` starting at m ending at ``m + fragsize``, to fragment2 starting at ``n`` and ending at ``n - fragsize``.
 
-The method :py:meth:`ContactMatrixStack.topk_fragment_contact_by_subset_summary` uses the
+The method :py:meth:`ContactBlockMatrix.topk_fragment_contact_by_subset_summary` uses the
 arrays produced by
-:py:meth:`ContactMatrixStack.fragment_contact` to search for subsets of subunits that
+:py:meth:`ContactBlockMatrix.fragment_contact` to search for subsets of subunits that
 all "multibody" contacts by enumerating all subsets of contacting subunits, and taking
 the minimum number of contacts for each fragment pair. See the example below.
 
 
 .. _contact_matrix_overview:
 
-ContactMatrixStack Example
+ContactBlockMatrix Example
 ---------------------------
 
 Setup, reading in and positioning some data
@@ -124,9 +124,9 @@ Setup, reading in and positioning some data
 
 Get best pair of fragment
 
->>> cmat = contacts.contact_matrix_stack()
+>>> cmat = contacts.contact_blocks()
 >>> cmat
-ContactMatrixStack(shape: (4, 92, 335) subs: [ 2  6  8 10])
+ContactBlockMatrix(shape: (4, 92, 335) subs: [ 2  6  8 10])
 >>> # 4 contact matrices, thus top7 contacts 4 (of 12) subunit in dxh
 >>> pair_frag_contacts = cmat.fragment_contact(fragsize=20, stride=5)
 >>> isub, itop7, idxh = np.unravel_index(np.argmax(pair_frag_contacts), pair_frag_contacts.shape)
@@ -183,3 +183,53 @@ and see if it's legit.
 Note: :py:func:`ipd.viz.pymol_viz.showme` (just call ipd.showme) is super useful for visualizing all kinds of things, mainly in pymol.
  It can show AtomArrays, Bodies, Symbodies, homogeneous transforms, stacks of xyz coords, symmetry
  elements, crystal lattices, etc etc. All you need is pymol in your conda environment, and runnable.
+
+
+Top-k Fragment Contact Subset Summary Output
+----------------------------------------------
+
+The method :meth:`ContactBlockMatrix.topk_fragment_contact_by_subset_summary` returns an
+:class:`ipd.Bunch` object that acts like a dictionary, with two primary entries:
+
+- ``index``: a dictionary mapping each subset of contacting subunits to the fragment pairs
+  that exhibit high contact across **all** members of that subset.
+- ``vals``: a dictionary mapping each subset to the corresponding contact values for the
+  fragment pairs stored in ``index``.
+
+Each key in these dictionaries is a tuple of subunit indices, corresponding to a subset of
+the `ContactBlockMatrix`. For example, a key ``(0, 2)`` refers to fragment pairs that simultaneously contact
+**both** subunit 0 and subunit 2.
+
+Each ``index[subset]`` value is a 2D NumPy array of shape ``(2, k)``, where:
+
+- ``index[subset][0, i]`` is the start index of the fragment in the **target** structure (e.g. top7).
+- ``index[subset][1, i]`` is the start index of the fragment in the **subset** structure (e.g. symbody).
+- The fragments are assumed to span ``fragsize`` residues starting from these indices.
+- These indices are **unstrided**: the stride factor has been multiplied back in, so they reflect real positions.
+
+Each ``vals[subset]`` is a 1D NumPy array of length ``k``, storing the **summary contact value**
+associated with each fragment pair in ``index[subset]``. This value is computed using the
+user-supplied ``summary`` function (e.g., ``np.min``), which aggregates the contact counts
+across all matrices in the subset.
+
+Fragment pairs with zero contacts across all subset matrices are excluded from the result.
+
+Example:
+
+.. code-block:: python
+
+    result = cmat.topk_fragment_contact_by_subset_summary(fragsize=10, stride=4, k=20, summary=np.min)
+    result.index.keys()
+    # dict_keys([(0, 2), (0,), (1,), (2,), (3,)])
+
+    result.index[(0, 2)].shape
+    # (2, 7) – seven top fragment pairs for subset (0, 2)
+
+    result.vals[(0, 2)]
+    # array([11.,  6.,  6.,  1.,  1.,  1.,  1.])
+
+    # Each pair (top_idx, sym_idx, contact_value):
+    np.concatenate([result.index[(0, 2)].T, result.vals[(0, 2)][:, None]], axis=1)
+    # array([[ 32, 112,  11],
+    #        [ 28, 112,   6],
+    #        [ 28,
diff --git a/docs/tutorials/symmetry_detection.rst b/docs/tutorials/symmetry_detection.rst
@@ -202,4 +202,3 @@ Conclusion
 ^^^^^^^^^^^
 
 The symmetry detection system in IPD is highly flexible and powerful. It combines biological sequence matching with geometric reasoning to robustly identify symmetrical assemblies. The use of transform decomposition and axis consolidation enables consistent classification across structures with noise or imperfect symmetry.
-
diff --git a/docs/tutorials/using_homog.rst b/docs/tutorials/using_homog.rst
@@ -353,4 +353,3 @@ coordinate, then:
     >>> d = h.xform(T, d)  # updates d.coords via transformation
     >>> np.allclose(d.coords, T @ h.point([1, 2, 3]))
     True
-
diff --git a/docs/tutorials/working_with_atoms.rst b/docs/tutorials/working_with_atoms.rst
@@ -138,4 +138,3 @@ Notes
 
 ^ For real PDB/CIF/BCIF data, Biotite must be installed and able to access the internet or your test data.
 ^ The IPD module adds rich metadata and assembly parsing features over the base Biotite readers.
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -188,3 +188,4 @@ junit*.xml

		_autosummary
		_apidoc
		package-lock.json
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,3 +6,5 @@

		# ipd
		Various base libraries for code at the IPD

		<https://baker-laboratory.github.io/ipd/index.html>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -47,6 +47,3 @@ All modules, classes, and functions should have docstrings. Use google style, an
		3
		"""
		return a + b
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,6 +25,3 @@ numpy
		~~~~~~~~
		You know what this is. (If you don't, you're in the wrong place.) (courtney: I don't know what this is)
		This strange and snarky comment courtesy of github copilot, thank you AI overloards.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -202,4 +202,3 @@ Conclusion
		^^^^^^^^^^^

		The symmetry detection system in IPD is highly flexible and powerful. It combines biological sequence matching with geometric reasoning to robustly identify symmetrical assemblies. The use of transform decomposition and axis consolidation enables consistent classification across structures with noise or imperfect symmetry.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -353,4 +353,3 @@ coordinate, then:
		>>> d = h.xform(T, d) # updates d.coords via transformation
		>>> np.allclose(d.coords, T @ h.point([1, 2, 3]))
		True
Original file line number	Diff line number	Diff line change
Expand Up		@@ -138,4 +138,3 @@ Notes

		^ For real PDB/CIF/BCIF data, Biotite must be installed and able to access the internet or your test data.
		^ The IPD module adds rich metadata and assembly parsing features over the base Biotite readers.