Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 40 additions & 10 deletions example_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,54 @@ leiden_resolution = "0.2,0.5,1.0"
# Generate and save UMAP plots
save_plots = true

[annotate]
[quantitate]
# Path to input clustered .zarr file
input = "clustered.zarr"
# Modify the input file in place instead of creating a new file
inplace = false
# Path to output .zarr file (required unless inplace = true)
output = "annotated.zarr"
output = "scored.zarr"
# Path to CSV file with marker genes (columns: cell_type, gene)
# At least one of markers or preset_resources is required
markers = "markers.csv"
# Cluster column key to use for annotation (e.g., "leiden_res0p5")
# If null, will use all leiden_res* columns found
cluster_key = null
# Pre-calculate MLM enrichment scores for pathway/TF resources
calculate_ulm = true
# Minimum sensitivity for PanglaoDB markers in MLM (default: 0.5)
panglao_min_sensitivity = 0.5
# Minimum number of marker genes per cell type for MLM annotation
# Key suffix for custom marker scores (stored as obsm['score_mlm_<score_key>'])
score_key = "custom"
# Scoring method: "mlm" (default) or "ulm"
method = "mlm"
# Minimum number of targets per source for decoupler
tmin = 2
# Comma-separated built-in resources to score: panglao, hallmark, collectri, dorothea, progeny
# Leave empty or remove to skip preset scoring
preset_resources = null
# Minimum sensitivity for PanglaoDB markers (used when panglao is in preset_resources)
panglao_min_sensitivity = 0.5
# Only use canonical PanglaoDB markers
panglao_canonical_only = true
# Optional cell filter: "column==value" (e.g. "cell_type==Fibroblast")
# If null, all cells are scored
filter_obs = null
# Generate and save enrichment heatmap plots
save_plots = false

[assign]
# Path to input scored .zarr file (produced by quantitate)
input = "scored.zarr"
# Modify the input file in place instead of creating a new file
inplace = false
# Path to output .zarr file (required unless inplace = true)
output = "annotated.zarr"
# Full obsm key of the score matrix to use (must match quantitate output)
# e.g. "score_mlm_custom" or "score_mlm_PanglaoDB"
score_key = "score_mlm_custom"
# Cluster column key to assign (e.g. "leiden_res0p5")
# If null, all leiden_res* columns are used
cluster_key = null
# obs column name for cell type labels; defaults to "cell_type_res{resolution}"
annotation_key = null
# Assignment strategy: "top_positive" (default)
strategy = "top_positive"
# Run differential expression per cluster key
run_de = true
# Generate and save annotation plots
save_plots = true

Expand Down
12 changes: 8 additions & 4 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ help:
@echo " make format - Format code with black"
@echo " make clean - Remove build artifacts and caches"
@echo " make clean-all - Remove build artifacts, caches, and venv"
@echo " make run ROOT=/path - Run full pipeline using config.toml in ROOT directory"
@echo " make run ROOT=/path - Run full pipeline (6 steps) using config.toml in ROOT directory"

# Create virtual environment
venv:
Expand Down Expand Up @@ -140,11 +140,15 @@ run:
xenium_process cluster --config "config.toml" || exit 1
@cd "$(ROOT)" && \
echo "" && \
echo "Step 4: Annotate cell types" && \
xenium_process annotate --config "config.toml" || exit 1
echo "Step 4: Quantitate enrichment scores" && \
xenium_process quantitate --config "config.toml" || exit 1
@cd "$(ROOT)" && \
echo "" && \
echo "Step 5: Differential expression analysis" && \
echo "Step 5: Assign cell type labels" && \
xenium_process assign --config "config.toml" || exit 1
@cd "$(ROOT)" && \
echo "" && \
echo "Step 6: Differential expression analysis" && \
xenium_process differential --config "config.toml" || exit 1
@echo ""
@echo "=========================================="
Expand Down
40 changes: 32 additions & 8 deletions projects/PDAC_HIV/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,47 @@ leiden_resolution = "0.2,0.5,1.0"
save_plots = true
resume = true

[annotate]
[quantitate]
# Path to input clustered .zarr file
input = "data.zarr"
# Modify the input file in place instead of creating a new file
inplace = true
# Path to CSV file with marker genes (columns: cell_type, gene)
markers = "markers.csv"

# Pre-calculate MLM enrichment scores for pathway/TF resources
calculate_ulm = true
# Minimum sensitivity for PanglaoDB markers in MLM (default: 0.5)
panglao_min_sensitivity = 0.5
# Minimum number of marker genes per cell type for MLM annotation
# Key suffix for custom marker scores
score_key = "custom"
# Scoring method
method = "mlm"
# Minimum number of targets per source for decoupler
tmin = 2
# Also score against built-in decoupler resources
preset_resources = "panglao"
# Minimum sensitivity for PanglaoDB markers
panglao_min_sensitivity = 0.5
# Only use canonical PanglaoDB markers
panglao_canonical_only = true
# No cell filter — score all cells
filter_obs = null
# Generate and save enrichment heatmap plots
save_plots = true

[assign]
# Path to input scored .zarr file
input = "data.zarr"
# Modify the input file in place
inplace = true
# obsm key of the scores to assign from (produced by quantitate)
score_key = "score_mlm_custom"
# Use all leiden_res* columns
cluster_key = null
# Default annotation key naming
annotation_key = null
# Assignment strategy
strategy = "top_positive"
# Run differential expression
run_de = true
# Generate and save annotation plots
save_plots = true
resume = true

#[differential]
# Path to input .zarr file with annotations
Expand Down
23 changes: 19 additions & 4 deletions tests/functional/test_config_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,27 @@ def test_config_file(tmp_path):
leiden_resolution = "0.3,0.6"
save_plots = true

[annotate]
[quantitate]
input = "placeholder.zarr"
output = "config_annotated.zarr"
calculate_ulm = true
panglao_min_sensitivity = 0.6
output = "config_scored.zarr"
markers = "markers.csv"
score_key = "custom"
method = "mlm"
tmin = 3
preset_resources = null
panglao_min_sensitivity = 0.6
panglao_canonical_only = true
filter_obs = null
save_plots = false

[assign]
input = "config_scored.zarr"
output = "config_annotated.zarr"
score_key = "score_mlm_custom"
cluster_key = null
annotation_key = null
strategy = "top_positive"
run_de = true
save_plots = false

[differential]
Expand Down
27 changes: 20 additions & 7 deletions tests/functional/test_full_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,31 @@ def test_full_pipeline_end_to_end(test_samples_csv, test_markers_csv, tmp_zarr_c

assert result.returncode == 0, f"Cluster failed: {result.stderr}"

# Step 4: Annotate (inplace)
# Step 4: Quantitate – score cells against the marker gene list (inplace)
result = subprocess.run([
sys.executable, '-m', 'xenium_process.cli',
'annotate',
'quantitate',
'--input', str(concat_output),
'--inplace',
'--markers', str(test_markers_csv)
'--markers', str(test_markers_csv),
'--tmin', '1',
], capture_output=True, text=True)

assert result.returncode == 0, f"Annotate failed: {result.stderr}"

# Step 5: Differential analysis

assert result.returncode == 0, f"Quantitate failed: {result.stderr}"

# Step 5: Assign – label clusters from the scored obsm matrix (inplace)
result = subprocess.run([
sys.executable, '-m', 'xenium_process.cli',
'assign',
'--input', str(concat_output),
'--inplace',
'--score-key', 'score_mlm_custom',
'--cluster-key', 'leiden_res0p5',
], capture_output=True, text=True)

assert result.returncode == 0, f"Assign failed: {result.stderr}"

# Step 6: Differential analysis
diff_output_dir = tmp_zarr_cleanup / "differential"
result = subprocess.run([
sys.executable, '-m', 'xenium_process.cli',
Expand Down
Loading