afloresep · afloresep · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,48 @@
+name: Build & Publish to PyPI
+
+on:
+  push:
+    tags: ["v*"]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Build sdist & wheel
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Build
+        run: |
+          pip install build
+          python -m build
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/*
+
+  publish:
+    name: Publish to PyPI
+    needs: [build]
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/v')
+    permissions:
+      id-token: write
+
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -16,7 +16,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.11", "3.12", "3.13"]
     steps:
       - name: Check out repository
         uses: actions/checkout@v4

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,33 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.2] - 2026-04-08
+
+### Added
+- Full GPU acceleration for the PQ pipeline (fit, transform, predict) via PyTorch + Triton kernels
+- Triton JIT-compiled `_pq_assign_kernel` for cluster assignment — never materializes the N×K distance matrix
+- GPU-accelerated `PQEncoder.fit()` with `device='gpu'|'auto'` parameter
+- GPU-accelerated `PQEncoder.transform()` with automatic VRAM-aware batching
+- GPU-accelerated `PQKMeans.fit()` and `.predict()` with automatic CPU fallback
+- Early-stopping tolerance (`tol`) parameter for `PQKMeans` GPU training path
+- New `_update_centers()` function with chunked histogram accumulation for bounded memory
+- New scripts: `benchmark_1B_pipeline.py`, `benchmark_gpu_predict.py`, `cluster_smiles.py`, `k_selection_gpu.py`
+- Comprehensive GPU test suite in `test_clustering.py`
+
+### Changed
+- `PQEncoder.fit()` now accepts `device` parameter (`'cpu'`, `'gpu'`, `'auto'`)
+- `PQEncoder.transform()` automatically uses GPU when available
+- `PQKMeans` uses GPU path when CUDA/Triton are detected, with transparent CPU fallback
+- MQN fingerprint dtype changed to `int16` for correctness
+- Migrated from `tmap-silicon` + `faerun` to `tmap2` for visualization
+- Rewrote `visualization.py` to use tmap2's `TMAP`, `TmapViz`, and chemistry utilities
+- Removed `pandarallel` dependency (parallelism now handled by tmap2)
+- Default Morgan fingerprint bits changed from 1024 to 2048 (tmap2 default)
+
+### Fixed
+- Stale codebook cache bug in encoder
+- PyTorch monkeypatch guard for CI compatibility
+
 ## [0.2.1] - 2026-03-06
 
 ### Added

diff --git a/chelombus/__init__.py b/chelombus/__init__.py
@@ -47,7 +47,7 @@ def _cluster_io_not_available(*args, **kwargs):
     query_clusters_batch = _cluster_io_not_available
     sample_from_cluster = _cluster_io_not_available
 
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 __all__ = [
     # Core classes
     "DataStreamer",