diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..5c00379
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,48 @@
+name: Build & Publish to PyPI
+
+on:
+  push:
+    tags: ["v*"]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Build sdist & wheel
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Build
+        run: |
+          pip install build
+          python -m build
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/*
+
+  publish:
+    name: Publish to PyPI
+    needs: [build]
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/v')
+    permissions:
+      id-token: write
+
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index fcdaf29..29b48dc 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,7 +16,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.11", "3.12", "3.13"]
     steps:
       - name: Check out repository
         uses: actions/checkout@v4
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b46da29..fe2d913 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,33 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.2.2] - 2026-04-08
+
+### Added
+- Full GPU acceleration for the PQ pipeline (fit, transform, predict) via PyTorch + Triton kernels
+- Triton JIT-compiled `_pq_assign_kernel` for cluster assignment — never materializes the N×K distance matrix
+- GPU-accelerated `PQEncoder.fit()` with `device='gpu'|'auto'` parameter
+- GPU-accelerated `PQEncoder.transform()` with automatic VRAM-aware batching
+- GPU-accelerated `PQKMeans.fit()` and `.predict()` with automatic CPU fallback
+- Early-stopping tolerance (`tol`) parameter for `PQKMeans` GPU training path
+- New `_update_centers()` function with chunked histogram accumulation for bounded memory
+- New scripts: `benchmark_1B_pipeline.py`, `benchmark_gpu_predict.py`, `cluster_smiles.py`, `k_selection_gpu.py`
+- Comprehensive GPU test suite in `test_clustering.py`
+
+### Changed
+- `PQEncoder.fit()` now accepts `device` parameter (`'cpu'`, `'gpu'`, `'auto'`)
+- `PQEncoder.transform()` automatically uses GPU when available
+- `PQKMeans` uses GPU path when CUDA/Triton are detected, with transparent CPU fallback
+- MQN fingerprint dtype changed to `int16` for correctness
+- Migrated from `tmap-silicon` + `faerun` to `tmap2` for visualization
+- Rewrote `visualization.py` to use tmap2's `TMAP`, `TmapViz`, and chemistry utilities
+- Removed `pandarallel` dependency (parallelism now handled by tmap2)
+- Default Morgan fingerprint bits changed from 1024 to 2048 (tmap2 default)
+
+### Fixed
+- Stale codebook cache bug in encoder
+- PyTorch monkeypatch guard for CI compatibility
+
 ## [0.2.1] - 2026-03-06
 
 ### Added
diff --git a/chelombus/__init__.py b/chelombus/__init__.py
index 1ad1a59..9524403 100644
--- a/chelombus/__init__.py
+++ b/chelombus/__init__.py
@@ -47,7 +47,7 @@ def _cluster_io_not_available(*args, **kwargs):
     query_clusters_batch = _cluster_io_not_available
     sample_from_cluster = _cluster_io_not_available
 
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 __all__ = [
     # Core classes
     "DataStreamer",
diff --git a/chelombus/utils/visualization.py b/chelombus/utils/visualization.py
index 920a355..7a156eb 100644
--- a/chelombus/utils/visualization.py
+++ b/chelombus/utils/visualization.py
@@ -1,98 +1,23 @@
 """TMAP visualization utilities for molecular datasets.
 
 Provides functions to create TMAPs from SMILES strings with customizable
-molecular properties and fingerprint options.
+molecular properties and fingerprint options. Built on top of tmap2.
 """
-from rdkit import Chem
-from rdkit.Chem import Descriptors, rdMolDescriptors
-from faerun import Faerun
-import tmap as tm
 import argparse
-import pandas as pd
-from pandarallel import pandarallel
-from chelombus.utils.fingerprints import FingerprintCalculator
 import numpy as np
-from sklearn.neighbors import NearestNeighbors
-from typing import Callable
-
-# Available molecular properties with display names and calculation functions
-AVAILABLE_PROPERTIES: dict[str, tuple[str, Callable]] = {
-    'hac': ('HAC', lambda mol: mol.GetNumHeavyAtoms()),
-    'num_aromatic_atoms': ('Number Aromatic Atoms', lambda mol: sum(1 for atom in mol.GetAtoms() if atom.GetIsAromatic())),
-    'num_hba': ('NumHBA', rdMolDescriptors.CalcNumHBA),
-    'num_hbd': ('NumHBD', rdMolDescriptors.CalcNumHBD),
-    'num_rings': ('Number of Rings', rdMolDescriptors.CalcNumRings),
-    'mw': ('MW', Descriptors.ExactMolWt),
-    'clogp': ('cLogP', Descriptors.MolLogP),
-    'fsp3': ('Fraction Csp3', Descriptors.FractionCSP3),
-}
-
-DEFAULT_PROPERTIES = ['hac', 'num_aromatic_atoms', 'num_hba', 'num_hbd', 'num_rings', 'mw', 'clogp', 'fsp3']
-
-
-def _mol_properties_from_smiles(smiles: str, properties: list[str]) -> tuple | None:
-    """Get molecular properties from a single SMILES string."""
-    mol = Chem.MolFromSmiles(smiles)
-    if mol is None:
-        return None
-
-    values = []
-    for prop in properties:
-        display_name, calc_func = AVAILABLE_PROPERTIES[prop]
-        values.append(calc_func(mol))
-
-    return tuple(values)
-
-
-def calc_properties(smiles: list[str], properties: list[str] | None = None) -> pd.DataFrame:
-    """Calculate selected molecular properties for a list of SMILES.
-
-    Args:
-        smiles: List of SMILES strings.
-        properties: List of property identifiers to calculate.
-                   If None, uses DEFAULT_PROPERTIES.
-                   Available: hac, num_aromatic_atoms, num_hba, num_hbd,
-                             num_rings, mw, clogp, fsp3
-
-    Returns:
-        DataFrame with 'smiles' column and selected property columns.
-
-    Raises:
-        ValueError: If unknown property identifiers are provided.
-    """
-    if properties is None:
-        properties = DEFAULT_PROPERTIES
-
-    # Validate properties
-    invalid = set(properties) - set(AVAILABLE_PROPERTIES.keys())
-    if invalid:
-        raise ValueError(f"Unknown properties: {invalid}. Available: {list(AVAILABLE_PROPERTIES.keys())}")
-
-    pandarallel.initialize(progress_bar=False, verbose=0)
-
-    dataframe = pd.DataFrame({'smiles': smiles})
-
-    # Get display names for columns
-    column_names = [AVAILABLE_PROPERTIES[prop][0] for prop in properties]
-
-    dataframe[column_names] = dataframe['smiles'].apply(
-        lambda s: _mol_properties_from_smiles(s, properties)
-    ).parallel_apply(pd.Series)
-
-    return dataframe
+import pandas as pd
+from typing import Sequence
 
+from tmap import TMAP
+from tmap.utils import fingerprints_from_smiles, molecular_properties, AVAILABLE_PROPERTIES
 
-def _calculate_fingerprint(smiles: list[str], fp: str, bits: int = 1024, radius: int = 2) -> np.ndarray:
-    """Calculate fingerprints for a list of SMILES."""
-    fp_calc = FingerprintCalculator()
-    fp_arr = fp_calc.FingerprintFromSmiles(smiles, fp=fp, fpSize=bits, radius=radius)
-    return fp_arr
+DEFAULT_PROPERTIES = list(AVAILABLE_PROPERTIES)
 
 
 def create_tmap(
     smiles: list[str],
     fingerprint: str = 'morgan',
-    fingerprint_bits: int = 1024,
+    fingerprint_bits: int = 2048,
     fingerprint_radius: int = 2,
     properties: list[str] | None = None,
     tmap_name: str = 'my_tmap',
@@ -100,199 +25,121 @@ def create_tmap(
 ) -> None:
     """Create a TMAP visualization from SMILES strings.
 
-    Uses LSH Forest for layout generation.
-
     Args:
         smiles: List of SMILES strings to visualize.
         fingerprint: Fingerprint type ('morgan' or 'mqn').
-        fingerprint_bits: Number of bits for fingerprint (default: 1024).
+        fingerprint_bits: Number of bits for Morgan fingerprint (default: 2048).
         fingerprint_radius: Radius for Morgan fingerprint (default: 2).
         properties: List of molecular properties to display as color channels.
-                   If None, uses all default properties.
-        tmap_name: Name for the output TMAP file.
+                   If None, uses all available properties.
+        tmap_name: Name for the output TMAP HTML file.
         k_neighbors: Number of neighbors for layout (default: 20).
     """
     if properties is None:
         properties = DEFAULT_PROPERTIES
 
-    # Validate properties
-    invalid = set(properties) - set(AVAILABLE_PROPERTIES.keys())
-    if invalid:
-        raise ValueError(f"Unknown properties: {invalid}. Available: {list(AVAILABLE_PROPERTIES.keys())}")
-
-    # Calculate fingerprints
-    fp_arr = _calculate_fingerprint(smiles, fingerprint, fingerprint_bits, fingerprint_radius)
-
-    # Calculate molecular properties
-    descriptors_df = calc_properties(smiles, properties)
-
-    # Create TMAP fingerprints
-    tm_fingerprint = [tm.VectorUint(fp) for fp in fp_arr]
-
-    # Build LSH Forest
-    lf = tm.LSHForest(fingerprint_bits)
-    lf.batch_add(tm_fingerprint)
-    lf.index()
-
-    # Layout configuration
-    cfg = tm.LayoutConfiguration()
-    cfg.node_size = 1/30
-    cfg.mmm_repeats = 2
-    cfg.sl_extra_scaling_steps = 10
-    cfg.k = k_neighbors
-    cfg.sl_scaling_type = tm.RelativeToAvgLength
-    x, y, s, t, _ = tm.layout_from_lsh_forest(lf, cfg)
-
-    # Create labels
-    labels = descriptors_df['smiles'].tolist()
-
-    # Get color data for selected properties
-    column_names = [AVAILABLE_PROPERTIES[prop][0] for prop in properties]
-    c = [descriptors_df[col].to_numpy() for col in column_names]
-
-    # Plotting
-    f = Faerun(
-        view="front",
-        coords=False,
-        title="",
-        clear_color="#FFFFFF",
-    )
+    _validate_properties(properties)
 
-    f.add_scatter(
-        tmap_name + "_TMAP",
-        {
-            "x": x,
-            "y": y,
-            "c": np.array(c),
-            "labels": labels,
-        },
-        shader="smoothCircle",
-        point_scale=2.5,
-        max_point_size=20,
-        interactive=True,
-        series_title=column_names,
-        has_legend=True,
-        colormap=['rainbow'] * len(properties),
-        categorical=[False] * len(properties),
-    )
+    # Compute fingerprints via tmap2
+    fp_kwargs = {}
+    if fingerprint == 'morgan':
+        fp_kwargs = {'n_bits': fingerprint_bits, 'radius': fingerprint_radius}
+
+    fps = fingerprints_from_smiles(smiles, fp_type=fingerprint, **fp_kwargs)
+
+    # Fit TMAP
+    model = TMAP(metric='jaccard', n_neighbors=k_neighbors).fit(fps)
+
+    # Build visualization
+    viz = model.to_tmapviz(include_edges=True)
+    viz.title = tmap_name
+    viz.background_color = "#FFFFFF"
 
-    f.add_tree(tmap_name + "_TMAP_tree", {"from": s, "to": t}, point_helper=tmap_name + "_TMAP")
-    f.plot(tmap_name + "_TMAP", template='smiles')
+    # Add molecular properties as color layers
+    props = molecular_properties(smiles, properties=properties)
+    for prop_name, values in props.items():
+        viz.add_color_layout(prop_name, values, categorical=False, color='rainbow')
+
+    # Add SMILES for structure rendering in tooltips
+    viz.add_smiles(smiles)
+
+    viz.write_html(f"{tmap_name}.html")
 
 
 def representative_tmap(
     smiles: list[str],
-    cluster_ids: list[int | str] | None = None,
+    cluster_ids: Sequence[int | str] | None = None,
     fingerprint: str = 'mqn',
-    fingerprint_bits: int = 1024,
+    fingerprint_bits: int = 2048,
+    fingerprint_radius: int = 2,
     properties: list[str] | None = None,
     tmap_name: str = 'representative_tmap',
     k_neighbors: int = 21,
 ) -> None:
-    """Create a TMAP for representative molecules using edge list layout.
+    """Create a TMAP for representative molecules.
 
-    Uses scikit-learn's NearestNeighbors for edge list construction and
-    TMAP's layout_from_edge_list for positioning. Recommended for primary
-    TMAPs showing cluster representatives.
+    Recommended for primary TMAPs showing cluster representatives.
 
     Args:
         smiles: List of SMILES strings (typically cluster representatives).
         cluster_ids: Optional list of cluster IDs corresponding to each SMILES.
-                    If provided, cluster ID will be shown in labels.
+                    If provided, added as a categorical color channel.
         fingerprint: Fingerprint type ('mqn' recommended, or 'morgan').
-        fingerprint_bits: Number of bits for fingerprint (default: 1024).
+        fingerprint_bits: Number of bits for Morgan fingerprint (default: 2048).
+        fingerprint_radius: Radius for Morgan fingerprint (default: 2).
         properties: List of molecular properties to display as color channels.
-                   If None, uses all default properties.
-        tmap_name: Name for the output TMAP file.
+                   If None, uses all available properties.
+        tmap_name: Name for the output TMAP HTML file.
         k_neighbors: Number of neighbors for KNN graph (default: 21).
     """
     if properties is None:
         properties = DEFAULT_PROPERTIES
 
-    # Validate properties
-    invalid = set(properties) - set(AVAILABLE_PROPERTIES.keys())
-    if invalid:
-        raise ValueError(f"Unknown properties: {invalid}. Available: {list(AVAILABLE_PROPERTIES.keys())}")
-
-    # Calculate fingerprints (default MQN for representatives)
-    fp_arr = _calculate_fingerprint(smiles, fingerprint, fingerprint_bits)
-
-    # Build edge list using scikit-learn
-    nbrs = NearestNeighbors(n_neighbors=k_neighbors, metric='euclidean').fit(fp_arr)
-    distances, indices = nbrs.kneighbors(fp_arr)
-
-    edge_list = []
-    for i in range(len(fp_arr)):
-        for neighbor_idx, dist in zip(indices[i, 1:], distances[i, 1:]):
-            edge_list.append((i, neighbor_idx, float(dist)))
-
-    # Calculate molecular properties
-    descriptors_df = calc_properties(smiles, properties)
-
-    # Layout configuration
-    cfg = tm.LayoutConfiguration()
-    cfg.node_size = 1/30
-    cfg.mmm_repeats = 2
-    cfg.sl_extra_scaling_steps = 10
-    cfg.k = k_neighbors
-    cfg.sl_scaling_type = tm.RelativeToAvgLength
-    x, y, s, t, _ = tm.layout_from_edge_list(len(fp_arr), edge_list, cfg)
-
-    # Create labels with optional cluster_id
-    labels = []
-    for idx, row in descriptors_df.iterrows():
-        smi = row['smiles']
-        if cluster_ids is not None:
-            labels.append(f"{smi}" + "__" + f"Cluster ID: {cluster_ids[idx]}")
-        else:
-            labels.append(smi)
+    _validate_properties(properties)
 
-    # Get color data for selected properties
-    column_names = [AVAILABLE_PROPERTIES[prop][0] for prop in properties]
-    c = [descriptors_df[col].to_numpy() for col in column_names]
+    # Compute fingerprints via tmap2
+    fp_kwargs = {}
+    if fingerprint == 'morgan':
+        fp_kwargs = {'n_bits': fingerprint_bits, 'radius': fingerprint_radius}
 
-    # Add cluster_id as a color channel if provided
+    fps = fingerprints_from_smiles(smiles, fp_type=fingerprint, **fp_kwargs)
+
+    # Fit TMAP — use euclidean for dense descriptors like MQN
+    metric = 'euclidean' if fingerprint == 'mqn' else 'jaccard'
+    model = TMAP(metric=metric, n_neighbors=k_neighbors).fit(fps)
+
+    # Build visualization
+    viz = model.to_tmapviz(include_edges=True)
+    viz.title = tmap_name
+    viz.background_color = "#FFFFFF"
+
+    # Add molecular properties as color layers
+    props = molecular_properties(smiles, properties=properties)
+    for prop_name, values in props.items():
+        viz.add_color_layout(prop_name, values, categorical=False, color='rainbow')
+
+    # Add cluster ID as categorical color channel
     if cluster_ids is not None:
-        # Convert cluster_ids to numeric for coloring
-        unique_clusters = list(set(cluster_ids))
-        cluster_to_idx = {c: i for i, c in enumerate(unique_clusters)}
-        cluster_colors = np.array([cluster_to_idx[cid] for cid in cluster_ids])
-        c.append(cluster_colors)
-        column_names.append('Cluster ID')
-        properties = list(properties) + ['cluster_id']  # For categorical list
-
-    # Plotting
-    f = Faerun(
-        view="front",
-        coords=False,
-        title="",
-        clear_color="#FFFFFF",
-    )
+        viz.add_color_layout(
+            'Cluster ID',
+            [str(cid) for cid in cluster_ids],
+            categorical=True,
+            color='tab20',
+        )
+
+    # Add SMILES for structure rendering in tooltips
+    viz.add_smiles(smiles)
+
+    viz.write_html(f"{tmap_name}.html")
 
-    # Set categorical for cluster_id if present
-    categorical = [False] * (len(properties) - 1) + [True] if cluster_ids is not None else [False] * len(properties)
-    colormaps = ['rainbow'] * (len(properties) - 1) + ['tab20'] if cluster_ids is not None else ['rainbow'] * len(properties)
-
-    f.add_scatter(
-        tmap_name + "_TMAP",
-        {
-            "x": x,
-            "y": y,
-            "c": np.array(c),
-            "labels": labels,
-        },
-        shader="smoothCircle",
-        point_scale=2.5,
-        max_point_size=20,
-        interactive=True,
-        series_title=column_names,
-        has_legend=True,
-        colormap=colormaps,
-        categorical=categorical,
-    )
 
-    f.add_tree(tmap_name + "_TMAP_tree", {"from": s, "to": t}, point_helper=tmap_name + "_TMAP")
-    f.plot(tmap_name + "_TMAP", template='smiles')
+def _validate_properties(properties: list[str]) -> None:
+    """Raise ValueError if any property name is not recognized by tmap2."""
+    invalid = set(properties) - set(AVAILABLE_PROPERTIES)
+    if invalid:
+        raise ValueError(
+            f"Unknown properties: {invalid}. Available: {AVAILABLE_PROPERTIES}"
+        )
 
 
 def main():
@@ -301,11 +148,11 @@ def main():
         description="Generate TMAP visualizations from SMILES",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=f"""
-Available properties: {', '.join(AVAILABLE_PROPERTIES.keys())}
+Available properties: {', '.join(AVAILABLE_PROPERTIES)}
 
 Examples:
   chelombus-tmap --smiles molecules.smi
-  chelombus-tmap --smiles molecules.smi --fingerprint mqn --properties hac,mw,clogp
+  chelombus-tmap --smiles molecules.smi --fingerprint mqn --properties mw,logp,qed
   chelombus-tmap --smiles molecules.smi --cluster-file clusters.csv
         """
     )
@@ -314,16 +161,16 @@ def main():
     parser.add_argument('--fingerprint', type=str, default='morgan',
                        choices=['morgan', 'mqn'],
                        help="Fingerprint type (default: morgan)")
-    parser.add_argument('--bits', type=int, default=1024,
-                       help="Fingerprint bits (default: 1024)")
+    parser.add_argument('--bits', type=int, default=2048,
+                       help="Morgan fingerprint bits (default: 2048)")
     parser.add_argument('--radius', type=int, default=2,
                        help="Morgan fingerprint radius (default: 2)")
     parser.add_argument('--properties', type=str, default=None,
-                       help=f"Comma-separated properties (default: all). Available: {', '.join(AVAILABLE_PROPERTIES.keys())}")
+                       help=f"Comma-separated properties (default: all). Available: {', '.join(AVAILABLE_PROPERTIES)}")
     parser.add_argument('--output', type=str, default='my_tmap',
                        help="Output TMAP name (default: my_tmap)")
     parser.add_argument('--cluster-file', type=str, default=None,
-                       help="Optional CSV/parquet file with 'smiles' and 'cluster_id' columns for representative TMAP")
+                       help="CSV/parquet file with 'smiles' and 'cluster_id' columns for representative TMAP")
     parser.add_argument('--representative', action='store_true',
                        help="Use representative_tmap layout (edge list based)")
 
@@ -336,7 +183,6 @@ def main():
 
     # Load data
     if args.cluster_file:
-        # Load from CSV/parquet with cluster_ids
         if args.cluster_file.endswith('.parquet'):
             df = pd.read_parquet(args.cluster_file)
         elif args.cluster_file.endswith('.csv'):
@@ -352,11 +198,11 @@ def main():
             cluster_ids=cluster_ids,
             fingerprint=args.fingerprint,
             fingerprint_bits=args.bits,
+            fingerprint_radius=args.radius,
             properties=properties,
             tmap_name=args.output,
         )
     else:
-        # Load from simple SMILES file
         with open(args.smiles, 'r') as file:
             smiles = [line.strip() for line in file.readlines() if line.strip()]
 
@@ -365,6 +211,7 @@ def main():
                 smiles=smiles,
                 fingerprint=args.fingerprint,
                 fingerprint_bits=args.bits,
+                fingerprint_radius=args.radius,
                 properties=properties,
                 tmap_name=args.output,
             )
diff --git a/docs/api.md b/docs/api.md
index 50ab3ed..c05995a 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -18,6 +18,12 @@
 
 ::: chelombus.clustering.PyQKmeans.PQKMeans
 
+## Visualization
+
+::: chelombus.utils.visualization.create_tmap
+
+::: chelombus.utils.visualization.representative_tmap
+
 ## Cluster I/O
 
 ::: chelombus.utils.cluster_io
diff --git a/docs/choosing-k.md b/docs/choosing-k.md
index de866b5..5b54407 100644
--- a/docs/choosing-k.md
+++ b/docs/choosing-k.md
@@ -41,11 +41,24 @@ Benchmark configuration: AMD Ryzen 7, 64GB RAM, 10 PQk-means iterations per k.
 - **k = 100,000** if you need tighter clusters and can tolerate ~27% empty clusters.
 - Beyond 200K, over a third of clusters are empty — diminishing returns.
 
+## GPU-accelerated k selection
+
+If you have a CUDA GPU, use `scripts/k_selection_gpu.py` for significantly faster sweeps:
+
+```bash
+python scripts/k_selection_gpu.py \
+    --pq-codes data/pq_codes.npy \
+    --encoder models/encoder.joblib \
+    --k-values 10000 50000 100000 200000
+```
+
+The GPU path uses Triton kernels for the predict step and is typically 8-10x faster than CPU for large k values.
+
 ## Scaling estimates
 
 Fit time scales linearly with both n (number of molecules) and k:
 
-| Scenario | Estimated Fit Time |
+| Scenario | CPU Fit Time |
 |---|---|
 | 1B molecules, k=50K | ~2.6 days |
 | 1B molecules, k=100K | ~5.2 days |
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 9fcebb1..c8f843a 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -8,6 +8,16 @@
 pip install chelombus
 ```
 
+### GPU Support (optional)
+
+GPU acceleration requires PyTorch and Triton. Install them separately for your CUDA version:
+
+```bash
+pip install torch triton
+```
+
+When CUDA is available, `PQEncoder.fit()`, `.transform()`, `PQKMeans.fit()`, and `.predict()` automatically use the GPU. No code changes needed — the library falls back to CPU transparently if CUDA is not detected.
+
 ### From Source
 
 ```bash
@@ -25,17 +35,18 @@ pip install -e .
 from chelombus import DataStreamer, FingerprintCalculator, PQEncoder, PQKMeans
 
 # 1. Stream SMILES in chunks
-streamer = DataStreamer(path='molecules.smi', chunksize=100000)
+streamer = DataStreamer()
+smiles_gen = streamer.parse_input('molecules.smi', chunksize=100000, smiles_col=0)
 
 # 2. Calculate MQN fingerprints
 fp_calc = FingerprintCalculator()
-for smiles_chunk in streamer.parse_input():
+for smiles_chunk in smiles_gen:
     fingerprints = fp_calc.FingerprintFromSmiles(smiles_chunk, fp='mqn')
     # Save fingerprints...
 
-# 3. Train PQ encoder on sample
+# 3. Train PQ encoder (uses GPU automatically if available)
 encoder = PQEncoder(k=256, m=6, iterations=20)
-encoder.fit(training_fingerprints)
+encoder.fit(training_fingerprints, device='auto')
 
 # 4. Transform all fingerprints to PQ codes
 pq_codes = encoder.transform(fingerprints)
@@ -45,19 +56,82 @@ clusterer = PQKMeans(encoder, k=100000)
 labels = clusterer.fit_predict(pq_codes)
 ```
 
+## GPU Acceleration
+
+When PyTorch and Triton are installed with CUDA support, the entire PQ pipeline runs on GPU.
+
+**GPU benchmarks on 10M Enamine MQN fingerprints** (RTX 4070 Ti SUPER 16GB, k=10,000):
+
+| Stage | Time |
+|:---|---:|
+| `encoder.fit()` (10M, m=6, k=256) | 12.1s |
+| `encoder.transform()` (10M) | 1.8s |
+| `clusterer.fit()` (10M, k=10K, 5 iters) | 15.4s |
+| `clusterer.predict()` (10M) | 2.1s |
+| **Total** | **31.4s** |
+
+At 1B scale with k=100K, the full pipeline completes in ~2.9 hours on the same GPU.
+
+### Explicit device control
+
+```python
+# Force GPU
+encoder.fit(X_train, device='gpu')
+
+# Force CPU
+encoder.fit(X_train, device='cpu')
+
+# Auto-detect (default for fit; transform/predict auto-detect always)
+encoder.fit(X_train, device='auto')
+```
+
+The GPU path uses VRAM-aware batching — it queries free GPU memory and sizes batches automatically so you don't need to worry about OOM errors.
+
+## Visualization
+
+Chelombus uses [tmap2](https://github.com/afloresep/tmap2) for interactive TMAP visualizations.
+
+### From Python
+
+```python
+from chelombus import sample_from_cluster
+from chelombus.utils.visualization import create_tmap, representative_tmap
+
+# Visualize molecules from a cluster
+df = sample_from_cluster('results/', cluster_id=42, n=1000)
+create_tmap(
+    smiles=df['smiles'].tolist(),
+    fingerprint='morgan',
+    properties=['mw', 'logp', 'qed', 'n_rings'],
+    tmap_name='cluster_42',
+)
+
+# Representative TMAP with cluster IDs
+representative_tmap(
+    smiles=rep_smiles,
+    cluster_ids=rep_cluster_ids,
+    fingerprint='mqn',
+    tmap_name='representatives',
+)
+```
+
+### From CLI
+
+```bash
+chelombus-tmap --smiles molecules.smi --fingerprint morgan --output my_tmap
+chelombus-tmap --cluster-file representatives.csv --output rep_tmap
+```
+
 ## Pipeline Scripts
 
 For large-scale processing, use the pipeline scripts in `scripts/`:
 
 | Script | Description |
 |---|---|
-| `calculate_fingerprints.py` | Compute MQN fingerprints from SMILES |
-| `train_encoder.py` | Train the PQ encoder on a sample |
-| `generate_pqcodes.py` | Transform fingerprints to PQ codes |
-| `fit_pqkmeans.py` | Fit PQk-means clustering |
-| `assign_clusters.py` | Assign molecules to clusters |
-| `select_k.py` | Hyperparameter sweep for choosing k |
-| `run_pipeline.py` | Run the full pipeline end-to-end |
+| `cluster_smiles.py` | End-to-end: SMILES to clustered parquet |
+| `benchmark_1B_pipeline.py` | Full pipeline benchmark at billion scale |
+| `benchmark_gpu_predict.py` | GPU vs CPU predict benchmarks |
+| `k_selection_gpu.py` | GPU-accelerated k hyperparameter sweep |
 
 ## Testing
 
diff --git a/docs/index.md b/docs/index.md
index 12ce0d0..0d2efd8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -18,19 +18,20 @@ SMILES → MQN Fingerprints → PQ Encoding → PQk-means Clustering → Nested
 
 - **Scalability**: Stream billions of molecules without loading everything into memory
 - **Efficiency**: Compress 42-dimensional MQN vectors to 6-byte PQ codes (28x compression)
-- **Visualization**: Navigate from global overview to individual molecules in two clicks
-- **Accessibility**: Runs on commodity hardware (tested: AMD Ryzen 7, 64GB RAM)
+- **GPU Acceleration**: Full CUDA pipeline via PyTorch + Triton kernels for fit, transform, and predict
+- **Visualization**: Interactive TMAPs powered by [tmap2](https://github.com/afloresep/tmap2) — navigate from global overview to individual molecules in two clicks
+- **Accessibility**: Runs on commodity hardware (tested: AMD Ryzen 7, 64GB RAM); GPU optional
 
 ## Project Structure
 
 ```
 chelombus/
 ├── chelombus/
-│   ├── encoder/          # Product Quantization encoder
-│   ├── clustering/       # PQk-means wrapper
+│   ├── encoder/          # Product Quantization encoder (CPU + GPU)
+│   ├── clustering/       # PQk-means wrapper + Triton kernels
 │   ├── streamer/         # Memory-efficient data streaming
-│   └── utils/            # Fingerprints, visualization, helpers
-├── scripts/              # Pipeline scripts
+│   └── utils/            # Fingerprints, visualization, cluster I/O
+├── scripts/              # Pipeline and benchmark scripts
 ├── examples/             # Tutorial notebooks
 └── tests/                # Unit tests
 ```
diff --git a/examples/enamine_1B_clustering.ipynb b/examples/enamine_1B_clustering.ipynb
index b550bee..ea25073 100644
--- a/examples/enamine_1B_clustering.ipynb
+++ b/examples/enamine_1B_clustering.ipynb
@@ -89,18 +89,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "67096f1e",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Shape: (10000000, 42), MB: 840.0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import numpy as np \n",
     "\n",
@@ -118,18 +110,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "3108cd8f",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Training PQ-codes: 100%|██████████| 6/6 [15:30<00:00, 155.03s/it]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from chelombus import PQEncoder\n",
     "pq_encoder = PQEncoder(k=256, m=6, iterations=10)\n",
@@ -146,7 +130,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "ef22b025",
    "metadata": {},
    "outputs": [],
@@ -174,20 +158,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "10b8a8c1",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The shape of the codebook is:  (6, 256, 7)\n",
-      "Is the encoder trained?  True\n",
-      "The total number of lables [164 183 183 ...   9  20  86]  is: 10000000\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(\"The shape of the codebook is: \", pq_encoder.codewords.shape)\n",
     "print(\"Is the encoder trained? \", pq_encoder.encoder_is_trained)\n",
@@ -205,32 +179,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "763f58a5",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:03<00:00,  1.50it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Shape: (10000000, 6), MB: 60.0\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "pq_codes = pq_encoder.transform(chunk)\n",
     "print(f\"Shape: {pq_codes.shape}, MB: {pq_codes.nbytes/1e6}\")\n"
@@ -247,37 +199,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "c8af696e",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[23  0  0  0  0  0  0  1  4  1  0 29 13  1  0  9  8  0  8  7  6  1  1  0\n",
-      "  0  5  4  3  0 11  6  0  0  0  1  2  0  0  0  0  0  0] (1000000, 42)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 15.82it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[  3 140 163  49  11   1]\n",
-      "[23  0  0  0  0  0  0  1  4  1  0 29 13  0  0  8  8  0  8  8  5  1  1  0\n",
-      "  0  4  3  3  0 11  6  0  0  0  1  2  0  0  0  0  0  0] (1000000, 42)\n",
-      "Reconversion process:\n",
-      "Reconstruction fidelity: 92.02%\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Load a different chunk\n",
     "mqn_chunk_2 = np.load('/mnt/samsung_2tb/tmp/enamine-chunk_00001.npy')\n",
@@ -368,29 +293,7 @@
    "execution_count": null,
    "id": "1f2e9277",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([49161, 13845, 59452, 70857, 92866, 53390, 86229, 76216, 10800,\n",
-       "       35907, 28614, 36577, 37133,  7373, 78079, 86816, 32896, 54950,\n",
-       "       50991, 61871, 41623, 62782, 13150, 18079, 17881, 90621, 16919,\n",
-       "       89129, 41955, 51549, 28733,  6753,  5147, 10976, 94913, 24994,\n",
-       "       41253, 39846, 23494, 39025, 92625, 32737, 32737, 92866, 10623,\n",
-       "       66455, 41056, 32782, 60229, 86723, 70828, 25645, 19690, 66414,\n",
-       "       57948, 62575, 37133, 79107, 80172, 25645, 42460, 87881, 64548,\n",
-       "       84159, 22125, 75078, 86370, 85735, 45628, 55806, 71367, 31446,\n",
-       "        7686,  2500, 65864, 95833, 19290, 87959, 84159, 86816, 90085,\n",
-       "       60947, 17623, 66380, 68923, 12279, 32309, 55541, 30332, 76162,\n",
-       "       94913, 18079, 79672, 24077, 50207, 88405, 33924, 89928, 25457,\n",
-       "       92866])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "kmeans.predict(pq_codes[:100])\n"
    ]
@@ -400,35 +303,7 @@
    "execution_count": null,
    "id": "00100a94",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.59it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.19it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.64it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.00it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.99it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.34it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.17it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.37it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.33it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.97it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.26it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.07it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.15it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.54it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.23it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.58it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.01it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 13.85it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.38it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.25it/s]\n",
-      "Generating PQ-codes: 100%|██████████| 6/6 [00:00<00:00, 14.50it/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "# Cluster the file with the SMILES strings\n",
@@ -451,11 +326,126 @@
     "    df.to_parquet(os.path.join(output_path, f'clustered_chunk_{i}.parquet'), index=False)\n",
     "    \n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d670042",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cluster the file with the SMILES strings\n",
+    "from chelombus import PQEncoder, DataStreamer, FingerprintCalculator\n",
+    "from chelombus.clustering.PyQKmeans import PQKMeans\n",
+    "import pyarrow as pa, pyarrow.parquet as pq\n",
+    "from chelombus.utils import format_time\n",
+    "encoder = PQEncoder.load('../models/paper_encoder.joblib')\n",
+    "clusterer = PQKMeans.load('../models/paper_clusterer.joblib')\n",
+    "stream = DataStreamer()\n",
+    "fp_calc = FingerprintCalculator()\n",
+    "import timeit\n",
+    "\n",
+    "for i, chunk in enumerate(stream.parse_input(\n",
+    "    '/mnt/10tb_hdd/cleaned_enamine_10b/output_file_0.cxsmiles',\n",
+    "    chunksize=1_000_000, smiles_col=1, verbose=0,\n",
+    ")):\n",
+    "    fps = fp_calc.FingerprintFromSmiles(chunk, 'mqn')\n",
+    "    t = timeit.timeit()\n",
+    "    pq_codes = encoder.transform(fps, verbose=0,  device='gpu')     \n",
+    "    print(\"GPU transform: \", timeit.timeit() - t)\n",
+    "    t = timeit.timeit()\n",
+    "    pq_codes = encoder.transform(fps, verbose=0,  device='cpu')     \n",
+    "    print(\"CPU transform: \", format_time(timeit.timeit() - t))\n",
+    "    t = timeit.timeit()\n",
+    "    labels = clusterer.predict(pq_codes, device='cpu')             \n",
+    "    print(\"CPU predict: \", format_time(timeit.timeit() - t))\n",
+    "    t = timeit.timeit()\n",
+    "    labels = clusterer.predict(pq_codes, device='gpu')             \n",
+    "    print(\"GPU predict: \", format_time(timeit.timeit() - t))\n",
+    "    # table = pa.table({\"smiles\": chunk, \"cluster_id\": labels})\n",
+    "    # pq.write_table(table, f'/mnt/samsung_2tb/tmp/chunk_{i:05d}.parquet')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fa294f09",
+   "metadata": {},
+   "source": [
+    "# Visualization with TMAP\n",
+    "\n",
+    "After clustering, we can create interactive TMAPs to explore the chemical space. The simplest approach: sample representatives from each cluster, compute MQN fingerprints, and let `TMAP` handle the layout."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55a7899a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from chelombus import sample_from_cluster\n",
+    "from chelombus.utils.visualization import representative_tmap\n",
+    "# Sample representatives from several clusters\n",
+    "results_dir = '/mnt/2tb/tmp/'  # Path to your clustered parquet files\n",
+    "rep_smiles = []\n",
+    "rep_cids = []\n",
+    "\n",
+    "for cid in range(100):  # First 100 clusters\n",
+    "    df = sample_from_cluster(results_dir, cluster_id=cid, n=1)\n",
+    "    if len(df) > 0:\n",
+    "        rep_smiles.extend(df['smiles'].tolist())\n",
+    "        rep_cids.extend([cid] * len(df))\n",
+    "\n",
+    "print(f\"Collected {len(rep_smiles)} representatives from {len(set(rep_cids))} clusters\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adf5d309",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the representative TMAP, MQN + euclidean is the natural choice for representatives\n",
+    "representative_tmap(\n",
+    "    smiles=rep_smiles,\n",
+    "    cluster_ids=rep_cids,\n",
+    "    fingerprint='mqn',\n",
+    "    properties=['mw', 'logp', 'qed', 'n_rings', 'hba', 'hbd'],\n",
+    "    tmap_name='enamine_representative',\n",
+    "    k_neighbors=20,\n",
+    ")\n",
+    "\n",
+    "print(\"Representative TMAP generated: enamine_representative.html\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "269e44c0",
+   "metadata": {},
+   "source": [
+    "You can also build a TMAP directly using tmap2 if you need more control:\n",
+    "\n",
+    "```python\n",
+    "from tmap import TMAP\n",
+    "from tmap.utils import fingerprints_from_smiles, molecular_properties\n",
+    "\n",
+    "fps = fingerprints_from_smiles(rep_smiles, fp_type='mqn')\n",
+    "model = TMAP(metric='euclidean', n_neighbors=20).fit(fps)\n",
+    "\n",
+    "viz = model.to_tmapviz(include_edges=True)\n",
+    "props = molecular_properties(rep_smiles, properties=['mw', 'logp', 'qed'])\n",
+    "for name, values in props.items():\n",
+    "    viz.add_color_layout(name, values, categorical=False, color='rainbow')\n",
+    "viz.add_smiles(rep_smiles)\n",
+    "viz.write_html('custom_tmap.html')\n",
+    "```"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "spiq",
+   "display_name": "venv (3.12.6)",
    "language": "python",
    "name": "python3"
   },
@@ -469,7 +459,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.2"
+   "version": "3.12.6"
   }
  },
  "nbformat": 4,
diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb
index d291536..a1e7b58 100644
--- a/examples/tutorial.ipynb
+++ b/examples/tutorial.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -33,18 +33,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fingerprints shape: (4, 1024)\n",
-      "Fingerprint 1 [0 0 1 ... 0 0 0]\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Define a list of SMILES strings\n",
     "smiles_list = [\"CCO\", \"C1CCCCC1\", \"O=C=O\", \"O=C=O\"]\n",
@@ -72,18 +63,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'generator'>\n",
-      " Fingerprints calculated: 100,000"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Import iterator method\n",
     "ds = DataStreamer()\n",
@@ -108,17 +90,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " Fingerprints calculated: 100,000"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from chelombus.utils.helper_functions import save_chunk\n",
     "\n",
@@ -141,17 +115,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(100000, 1024) 102.4  MB\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# At this step we could load Fingerprints computed in the previous step -likely what you'd do for large datasets\n",
     "# or in case it fits in memory just pass the fingerprint matrix\n",
@@ -177,17 +143,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Training PQ-encoder: 100%|██████████| 4/4 [00:12<00:00,  3.12s/it]\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from chelombus.encoder.encoder import PQEncoder\n",
     "\n",
@@ -210,19 +168,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The shape of the codebook is:  (4, 256, 256)\n",
-      "Is the encoder trained?  True\n",
-      "The lables: KMeans(max_iter=10, n_clusters=256) have length: 100000\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "print(\"The shape of the codebook is: \", pq_encoder.codewords.shape)\n",
     "print(\"Is the encoder trained? \", pq_encoder.encoder_is_trained)\n",
@@ -239,38 +187,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "10,000 fingerprints of 1024 dimensions to be transformed into PQ-codes\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Generating PQ-codes: 100%|██████████| 4/4 [00:00<00:00, 85.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Transforming 10,000 fingeprints took 0.05 seconds\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import numpy as np \n",
     "import time\n",
@@ -294,43 +213,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[137,  84, 139,  81],\n",
-       "       [137,  36,  39,  81],\n",
-       "       [137,  84, 139,  53],\n",
-       "       ...,\n",
-       "       [250,  44, 200,  16],\n",
-       "       [250,  44, 200, 238],\n",
-       "       [144, 253, 218, 166]], shape=(10000, 4), dtype=uint8)"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from chelombus import PQEncoder\n",
+    "\n",
+    "encoder = PQEncoder.load('/home/afloresep/work/Chelombus/models/paper_encoder.joblib')\n",
+    "encoder.transform(np.random.randint(0, 200, (10_000_000, 42)))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "X_pq_code\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of unique 4-dim vectors: 6116\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import numpy as np\n",
     "# Count unique rows\n",
@@ -348,18 +254,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Original input of shape: (10000, 1024) and size of 10,240,000 bytes is now transformed into shape (10000, 4) and size of 40,000 bytes\n",
-      "This is 256 times more memory efficient\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "print(f\"Original input of shape: {X_test.shape} and size of {X_test.nbytes:,} bytes is now transformed into shape {X_pq_code.shape} and size of {X_pq_code.nbytes:,} bytes\")\n",
     "print(f\"This is {int(X_test.nbytes / X_pq_code.nbytes)} times more memory efficient\")\n"
@@ -376,19 +273,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Assigned 10000 molecules to 100 clusters\n",
-      "Labels shape: (10000,)\n",
-      "Sample labels: [83 76 35 76 60 94 94 94 76 94]\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from chelombus import PQKMeans\n",
     "\n",
@@ -414,22 +301,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cluster centers shape: (100, 4)\n",
-      "\n",
-      "Cluster size statistics:\n",
-      "  Min cluster size: 4\n",
-      "  Max cluster size: 346\n",
-      "  Mean cluster size: 100.0\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Cluster centers are stored as PQ codes\n",
     "print(f\"Cluster centers shape: {clusterer.cluster_centers_.shape}\")\n",
@@ -451,17 +325,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Predicted labels: [47 27 20 47 20 27 33 27 47 33]\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Fit on training data\n",
     "clusterer2 = PQKMeans(encoder=pq_encoder, k=50, iteration=15)\n",
@@ -483,17 +349,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loaded model predictions match: True\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Save the trained clusterer\n",
     "clusterer2.save(\"data/pqkmeans_model.joblib\")\n",
@@ -517,17 +375,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Created 2 parquet chunks\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# First, let's create sample parquet files for demonstration\n",
     "import os\n",
@@ -551,31 +401,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of clusters: 100\n",
-      "Total molecules: 10,000\n",
-      "\n",
-      "Top 5 largest clusters:\n",
-      "    cluster_id  molecule_count\n",
-      "76          76             346\n",
-      "93          93             285\n",
-      "58          58             273\n",
-      "73          73             239\n",
-      "60          60             234\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from chelombus import get_cluster_stats, query_cluster, sample_from_cluster\n",
     "\n",
     "# Get statistics for all clusters\n",
-    "stats = get_cluster_stats(\"data/results/\")\n",
+    "stats = get_cluster_stats(\"/mnt/10tb_hdd/enamine_clustered_october_2025/build_clickhouse_db_enamine_files\", \"2B_cluster_id\")\n",
     "print(f\"Number of clusters: {len(stats)}\")\n",
     "print(f\"Total molecules: {stats['molecule_count'].sum():,}\")\n",
     "print(f\"\\nTop 5 largest clusters:\")\n",
@@ -584,23 +417,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cluster 0 contains 51 molecules\n",
-      "                                              smiles  cluster_id\n",
-      "0  CC1=CC=CC=C1N1N=NN=C1SCC(=O)N1CCN(C2=CC=C(F)C=...           0\n",
-      "1  [N-]=[N+]=NCCCC(=O)CSC1=NN=C(C2=CC=C(F)C=C2)N1...           0\n",
-      "2  O=C(O)C1=CN=C(SCC(=O)N2CCSC3=CC=CC=C32)N1C1=CC...           0\n",
-      "3  CC(C1=NN=C(SCCC2=CC=NC=C2)N1C1=CC=C(F)C=C1)N1C...           0\n",
-      "4  CC(C1=NN=C(SCC2CC(=O)N(C)C2)N1C1=CC=C(F)C=C1)N...           0\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Query all molecules from a specific cluster\n",
     "cluster_id = stats.iloc[0]['cluster_id']  # Get largest cluster\n",
@@ -611,18 +430,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Random sample of 5 molecules from cluster 0:\n",
-      "['CCC(C1=NN=C(SCC(=O)[C@]2(C)C[C@H]2CC)N1C1=CC=C(OC)C=C1)N(C)C', 'CC(C1=NN=C(SCCC2=CC=NC=C2)N1C1=CC=C(F)C=C1)N1CCCCC1', 'CC(C)OC(=O)CCSC1=NN=C(C(C)N2CCCCC2)N1C1=CC=C(Cl)C=C1', 'NC1=NC(Cl)=CC(CSC2=NN=C(CN3CCCCC3)N2C2=CC=C(F)C=C2)=N1', 'CC1=CC(C(=O)CSC2=NN=CN2C2=CC=CC=C2)=C(C)N1C1=CC=C(F)C=C1']\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Get a random sample from a cluster (useful for previewing)\n",
     "sample = sample_from_cluster(\"data/results/\", cluster_id=int(cluster_id), n=5, random_state=42)\n",
@@ -632,17 +442,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Exported 51 molecules to data/cluster_export.parquet\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from chelombus import export_cluster\n",
     "\n",
@@ -658,144 +460,21 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": [
-    "### Visualization with TMAP\n",
-    "\n",
-    "Create interactive tree maps (TMAPs) to visualize molecular clusters. TMAPs use LSH Forest to find nearest neighbors and create a tree-like layout."
-   ]
+   "source": "### Visualization with TMAP\n\nCreate interactive tree maps (TMAPs) to visualize molecular clusters. Chelombus uses [tmap2](https://github.com/afloresep/tmap2) under the hood, just pass SMILES and it handles fingerprints, layout, and rendering."
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/afloresep/miniforge3/envs/tmapnew/lib/python3.12/site-packages/faerun/faerun.py:409: RuntimeWarning: invalid value encountered in divide\n",
-      "  data_c[s] = (data_c[s] - min_c[s]) / (max_c[s] - min_c[s])\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"100%\"\n",
-       "            height=\"500\"\n",
-       "            src=\"./my_tmap_TMAP.html\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "            \n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7bd86477ff80>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<a href='./my_tmap_TMAP.html' target='_blank'>./my_tmap_TMAP.html</a><br>"
-      ],
-      "text/plain": [
-       "/home/afloresep/work/Chelombus/examples/my_tmap_TMAP.html"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TMAP generated: sample_tmap_TMAP.html\n"
-     ]
-    }
-   ],
-   "source": [
-    "from chelombus.utils.visualization import create_tmap\n",
-    "# Create a TMAP for a subset of molecules (for demonstration)\n",
-    "sample_smiles = smiles[:1000]\n",
-    "\n",
-    "# This will generate an interactive HTML file\n",
-    "create_tmap(\n",
-    "      smiles=sample_smiles, \n",
-    "      fingerprint = 'morgan', # or 'mqn'\n",
-    "      fingerprint_bits= 1024,      # for 'morgan'\n",
-    "      fingerprint_radius = 2, # for 'morgan'\n",
-    "    #   properties= [\"MW\", \"HAC\"], # default is all properties included\n",
-    "      tmap_name= 'my_tmap',\n",
-    "      k_neighbors= 20,\n",
-    "  )\n",
-    "\n",
-    "print(\"TMAP generated: sample_tmap_TMAP.html\")\n"
-   ]
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "from chelombus.utils.visualization import create_tmap\n\n# Create a TMAP for a subset of molecules\nsample_smiles = smiles[:1000]\n\n# This will generate an interactive HTML file\ncreate_tmap(\n    smiles=sample_smiles,\n    fingerprint='morgan',\n    properties=['mw', 'logp', 'qed', 'n_rings'],\n    tmap_name='my_tmap',\n    k_neighbors=20,\n)\n\nprint(\"TMAP generated: my_tmap.html\")"
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/afloresep/miniforge3/envs/tmapnew/lib/python3.12/site-packages/faerun/faerun.py:409: RuntimeWarning: invalid value encountered in divide\n",
-      "  data_c[s] = (data_c[s] - min_c[s]) / (max_c[s] - min_c[s])\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"100%\"\n",
-       "            height=\"500\"\n",
-       "            src=\"./my_tmap_TMAP.html\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "            \n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7bd86477fe00>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<a href='./my_tmap_TMAP.html' target='_blank'>./my_tmap_TMAP.html</a><br>"
-      ],
-      "text/plain": [
-       "/home/afloresep/work/Chelombus/examples/my_tmap_TMAP.html"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# representative_tmap for the first tmap\n",
-    "import numpy as np\n",
-    "from chelombus.utils.visualization import representative_tmap\n",
-    "representative_tmap(\n",
-    "      smiles=smiles[:2000], \n",
-    "      cluster_ids = list(np.random.randint(0, 100000, 2000)), # cluster_ids, this generates the label visible when clicking on the node\n",
-    "      tmap_name= 'my_tmap',\n",
-    "      k_neighbors= 20,\n",
-    "      #properties = [\"MW\"], default is all properties included\n",
-    "  )\n"
-   ]
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Representative TMAP with cluster IDs\nimport numpy as np\nfrom chelombus.utils.visualization import representative_tmap\n\nrepresentative_tmap(\n    smiles=smiles[:2000],\n    cluster_ids=list(np.random.randint(0, 100000, 2000)),\n    fingerprint='mqn',\n    tmap_name='representative_tmap',\n    k_neighbors=20,\n)\n\nprint(\"Representative TMAP generated: representative_tmap.html\")"
   },
   {
    "cell_type": "markdown",
@@ -832,7 +511,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "tmapnew",
+   "display_name": "venv (3.12.6)",
    "language": "python",
    "name": "python3"
   },
@@ -846,9 +525,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.12"
+   "version": "3.12.6"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 7aa2ca5..7dd84a4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,20 +4,19 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "chelombus"
-version = "0.2.1"
+version = "0.2.2"
 description = "Billion-scale molecular clustering and visualization using Product Quantization and nested TMAPs."
 readme = "README.md"
 license = {text = "MIT"}
 authors = [
   { name = "Alejandro Flores", email = "afloresep01@gmail.com" }
 ]
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 classifiers = [
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.9",
-  "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
   "License :: OSI Approved :: MIT License",
   "Operating System :: OS Independent",
   "Intended Audience :: Science/Research",
@@ -30,9 +29,7 @@ dependencies = [
   "rdkit>=2022.03",
   "tqdm>=4.60.0",
   "scikit-learn>=1.0.0",
-  "tmap-silicon>=1.0.19",
-  "pandarallel>=1.6.0",
-  "faerun>=0.4.0",
+  "tmap2>=0.2.0",
   "pyarrow>=10.0.0",
   "duckdb>=0.9.0",
   "pqkmeans",
diff --git a/requirements.txt b/requirements.txt
index 5a15640..e8d61c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,9 +13,8 @@ numba>=0.57.0
 pyarrow>=10.0.0
 duckdb>=0.9.0
 
-# Visualization (optional)
-# tmap>=1.0.0
-# faerun>=0.4.0
+# Visualization
+tmap2>=0.2.0
 
 # Development
 pytest>=7.0.0