From a1da3a26918ef73de6006243ca3e44db201bd9b2 Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Tue, 9 Jun 2026 12:06:34 +0200 Subject: [PATCH] Add pairtools min_mapq option and release 0.2.1 Expose pairtools min_mapq in config.yaml and schema.yaml, pass --min-mapq to pairtools parse so multi-mapped reads (BWA MAPQ 0) can be kept in the contact map. Switch bwa_split options to -5SP and raise pairtools memory to 8G. --- README.rst | 3 +++ pyproject.toml | 2 +- sequana_pipelines/hic/config.yaml | 9 +++++++-- sequana_pipelines/hic/hic.rules | 3 ++- sequana_pipelines/hic/schema.yaml | 3 +++ 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 65ca8c5..8b9f4f9 100644 --- a/README.rst +++ b/README.rst @@ -111,6 +111,9 @@ Changelog ========= ==================================================================== Version Description ========= ==================================================================== +0.2.1 Expose pairtools ``min_mapq`` in config/schema and pass ``--min-mapq`` + to pairtools parse; switch bwa_split options to ``-5SP`` for Hi-C; + raise pairtools memory to 8G. 0.2.0 Production release. 0.1.0 Migration to modern sequana_pipetools framework (get_shell/get_run, schema validation, apptainer support, Python 3.10+). diff --git a/pyproject.toml b/pyproject.toml index 8023e22..034d6a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "sequana-hic" -version = "0.2.0" +version = "0.2.1" description = "Hi-C analysis, snakemake, sequana, container, reproducibility" authors = ["Sequana Team"] license = "BSD-3" diff --git a/sequana_pipelines/hic/config.yaml b/sequana_pipelines/hic/config.yaml index cde1abd..b7b440a 100644 --- a/sequana_pipelines/hic/config.yaml +++ b/sequana_pipelines/hic/config.yaml @@ -97,11 +97,16 @@ fastp: mem: 8G +# 5any vs 5unique: takes 5'-most alignment regardless of uniqueness → repeat reads survive pairtools: walk_policy: "5unique" + # Minimum MAPQ to keep an alignment in pairtools parse (default pairtools value is 1). + # Set to 0 to keep multi-mapped reads (BWA assigns MAPQ 0 to multimappers), so they + # appear in the Hi-C contact map. + min_mapq: 1 threads: 4 resources: - mem: 4G + mem: 8G ############################################################################# @@ -134,7 +139,7 @@ bwa: bwa_split: nreads: 100000 index_algorithm: is - options: -5SP -M + options: -5SP threads: 4 tmp_directory: ./tmp resources: diff --git a/sequana_pipelines/hic/hic.rules b/sequana_pipelines/hic/hic.rules index 832a626..c13f74c 100644 --- a/sequana_pipelines/hic/hic.rules +++ b/sequana_pipelines/hic/hic.rules @@ -422,6 +422,7 @@ rule pairtools: params: # walk_policy could be mask, 5any, 5unique, 3any|3unique|all walk_policy=config["pairtools"]["walk_policy"], + min_mapq=config["pairtools"].get("min_mapq", 1), mem=config["pairtools"]["resources"]["mem"], threads: config["pairtools"]["threads"] resources: @@ -431,7 +432,7 @@ rule pairtools: # pairtools parse generate 1 large file. pairtools sort as well. Better to pipe them to get # only one file. pairs number is identical of course. same content but sorted so statistics # would be the same - pairtools parse --nproc-in {threads} --nproc-out {threads} {input.bam} -c {input.chrom_size} --walks-policy {params.walk_policy} | \ + pairtools parse --nproc-in {threads} --nproc-out {threads} {input.bam} -c {input.chrom_size} --walks-policy {params.walk_policy} --min-mapq {params.min_mapq} | \ pairtools sort --nproc {threads} --memory {params.mem} -o {output.sort} >{log} 2>&1 """ diff --git a/sequana_pipelines/hic/schema.yaml b/sequana_pipelines/hic/schema.yaml index e23537c..49019cd 100644 --- a/sequana_pipelines/hic/schema.yaml +++ b/sequana_pipelines/hic/schema.yaml @@ -163,6 +163,9 @@ mapping: type: str required: True enum: ["mask", "5any", "5unique", "3any", "3unique", "all"] + "min_mapq": + type: int + required: False "threads": type: int required: True