DamLabResources · SamuelKrause133 · Feb 27, 2026
diff --git a/example_config.toml b/example_config.toml
@@ -44,24 +44,54 @@ leiden_resolution = "0.2,0.5,1.0"
 # Generate and save UMAP plots
 save_plots = true
 
-[annotate]
+[quantitate]
 # Path to input clustered .zarr file
 input = "clustered.zarr"
 # Modify the input file in place instead of creating a new file
 inplace = false
 # Path to output .zarr file (required unless inplace = true)
-output = "annotated.zarr"
+output = "scored.zarr"
 # Path to CSV file with marker genes (columns: cell_type, gene)
+# At least one of markers or preset_resources is required
 markers = "markers.csv"
-# Cluster column key to use for annotation (e.g., "leiden_res0p5")
-# If null, will use all leiden_res* columns found
-cluster_key = null
-# Pre-calculate MLM enrichment scores for pathway/TF resources
-calculate_ulm = true
-# Minimum sensitivity for PanglaoDB markers in MLM (default: 0.5)
-panglao_min_sensitivity = 0.5
-# Minimum number of marker genes per cell type for MLM annotation
+# Key suffix for custom marker scores (stored as obsm['score_mlm_<score_key>'])
+score_key = "custom"
+# Scoring method: "mlm" (default) or "ulm"
+method = "mlm"
+# Minimum number of targets per source for decoupler
 tmin = 2
+# Comma-separated built-in resources to score: panglao, hallmark, collectri, dorothea, progeny
+# Leave empty or remove to skip preset scoring
+preset_resources = null
+# Minimum sensitivity for PanglaoDB markers (used when panglao is in preset_resources)
+panglao_min_sensitivity = 0.5
+# Only use canonical PanglaoDB markers
+panglao_canonical_only = true
+# Optional cell filter: "column==value" (e.g. "cell_type==Fibroblast")
+# If null, all cells are scored
+filter_obs = null
+# Generate and save enrichment heatmap plots
+save_plots = false
+
+[assign]
+# Path to input scored .zarr file (produced by quantitate)
+input = "scored.zarr"
+# Modify the input file in place instead of creating a new file
+inplace = false
+# Path to output .zarr file (required unless inplace = true)
+output = "annotated.zarr"
+# Full obsm key of the score matrix to use (must match quantitate output)
+# e.g. "score_mlm_custom" or "score_mlm_PanglaoDB"
+score_key = "score_mlm_custom"
+# Cluster column key to assign (e.g. "leiden_res0p5")
+# If null, all leiden_res* columns are used
+cluster_key = null
+# obs column name for cell type labels; defaults to "cell_type_res{resolution}"
+annotation_key = null
+# Assignment strategy: "top_positive" (default)
+strategy = "top_positive"
+# Run differential expression per cluster key
+run_de = true
 # Generate and save annotation plots
 save_plots = true
 

diff --git a/makefile b/makefile
@@ -19,7 +19,7 @@ help:
 	@echo "  make format            - Format code with black"
 	@echo "  make clean             - Remove build artifacts and caches"
 	@echo "  make clean-all         - Remove build artifacts, caches, and venv"
-	@echo "  make run ROOT=/path    - Run full pipeline using config.toml in ROOT directory"
+	@echo "  make run ROOT=/path    - Run full pipeline (6 steps) using config.toml in ROOT directory"
 
 # Create virtual environment
 venv:
@@ -140,11 +140,15 @@ run:
 	xenium_process cluster --config "config.toml" || exit 1
 	@cd "$(ROOT)" && \
 	echo "" && \
-	echo "Step 4: Annotate cell types" && \
-	xenium_process annotate --config "config.toml" || exit 1
+	echo "Step 4: Quantitate enrichment scores" && \
+	xenium_process quantitate --config "config.toml" || exit 1
 	@cd "$(ROOT)" && \
 	echo "" && \
-	echo "Step 5: Differential expression analysis" && \
+	echo "Step 5: Assign cell type labels" && \
+	xenium_process assign --config "config.toml" || exit 1
+	@cd "$(ROOT)" && \
+	echo "" && \
+	echo "Step 6: Differential expression analysis" && \
 	xenium_process differential --config "config.toml" || exit 1
 	@echo ""
 	@echo "=========================================="

diff --git a/projects/PDAC_HIV/config.toml b/projects/PDAC_HIV/config.toml
@@ -33,23 +33,47 @@ leiden_resolution = "0.2,0.5,1.0"
 save_plots = true
 resume = true
 
-[annotate]
+[quantitate]
 # Path to input clustered .zarr file
 input = "data.zarr"
 # Modify the input file in place instead of creating a new file
 inplace = true
 # Path to CSV file with marker genes (columns: cell_type, gene)
 markers = "markers.csv"
-
-# Pre-calculate MLM enrichment scores for pathway/TF resources
-calculate_ulm = true
-# Minimum sensitivity for PanglaoDB markers in MLM (default: 0.5)
-panglao_min_sensitivity = 0.5
-# Minimum number of marker genes per cell type for MLM annotation
+# Key suffix for custom marker scores
+score_key = "custom"
+# Scoring method
+method = "mlm"
+# Minimum number of targets per source for decoupler
 tmin = 2
+# Also score against built-in decoupler resources
+preset_resources = "panglao"
+# Minimum sensitivity for PanglaoDB markers
+panglao_min_sensitivity = 0.5
+# Only use canonical PanglaoDB markers
+panglao_canonical_only = true
+# No cell filter — score all cells
+filter_obs = null
+# Generate and save enrichment heatmap plots
+save_plots = true
+
+[assign]
+# Path to input scored .zarr file
+input = "data.zarr"
+# Modify the input file in place
+inplace = true
+# obsm key of the scores to assign from (produced by quantitate)
+score_key = "score_mlm_custom"
+# Use all leiden_res* columns
+cluster_key = null
+# Default annotation key naming
+annotation_key = null
+# Assignment strategy
+strategy = "top_positive"
+# Run differential expression
+run_de = true
 # Generate and save annotation plots
 save_plots = true
-resume = true
 
 #[differential]
 # Path to input .zarr file with annotations

diff --git a/tests/functional/test_config_integration.py b/tests/functional/test_config_integration.py
@@ -33,12 +33,27 @@ def test_config_file(tmp_path):
 leiden_resolution = "0.3,0.6"
 save_plots = true
 
-[annotate]
+[quantitate]
 input = "placeholder.zarr"
-output = "config_annotated.zarr"
-calculate_ulm = true
-panglao_min_sensitivity = 0.6
+output = "config_scored.zarr"
+markers = "markers.csv"
+score_key = "custom"
+method = "mlm"
 tmin = 3
+preset_resources = null
+panglao_min_sensitivity = 0.6
+panglao_canonical_only = true
+filter_obs = null
+save_plots = false
+
+[assign]
+input = "config_scored.zarr"
+output = "config_annotated.zarr"
+score_key = "score_mlm_custom"
+cluster_key = null
+annotation_key = null
+strategy = "top_positive"
+run_de = true
 save_plots = false
 
 [differential]

diff --git a/tests/functional/test_full_pipeline.py b/tests/functional/test_full_pipeline.py
@@ -59,18 +59,31 @@ def test_full_pipeline_end_to_end(test_samples_csv, test_markers_csv, tmp_zarr_c
 
     assert result.returncode == 0, f"Cluster failed: {result.stderr}"
 
-    # Step 4: Annotate (inplace)
+    # Step 4: Quantitate – score cells against the marker gene list (inplace)
     result = subprocess.run([
         sys.executable, '-m', 'xenium_process.cli',
-        'annotate',
+        'quantitate',
         '--input', str(concat_output),
         '--inplace',
-        '--markers', str(test_markers_csv)
+        '--markers', str(test_markers_csv),
+        '--tmin', '1',
     ], capture_output=True, text=True)
-
-    assert result.returncode == 0, f"Annotate failed: {result.stderr}"
-
-    # Step 5: Differential analysis
+
+    assert result.returncode == 0, f"Quantitate failed: {result.stderr}"
+
+    # Step 5: Assign – label clusters from the scored obsm matrix (inplace)
+    result = subprocess.run([
+        sys.executable, '-m', 'xenium_process.cli',
+        'assign',
+        '--input', str(concat_output),
+        '--inplace',
+        '--score-key', 'score_mlm_custom',
+        '--cluster-key', 'leiden_res0p5',
+    ], capture_output=True, text=True)
+
+    assert result.returncode == 0, f"Assign failed: {result.stderr}"
+
+    # Step 6: Differential analysis
     diff_output_dir = tmp_zarr_cleanup / "differential"
     result = subprocess.run([
         sys.executable, '-m', 'xenium_process.cli',