From ac6f4edcb83b190af4c28b85af9ff289cf66eb54 Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Mon, 8 Jun 2026 12:33:27 +0200 Subject: [PATCH] Release 0.2.0: fix Hi-C QC report rendering and pipeline improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix per-sample QC report showing raw Markdown instead of HTML: the report template opened with a block-level
tag, which made python-markdown skip conversion of all enclosed content. Enable the md_in_html extension and add markdown="1" to the wrapping block tags. - Add table styling to the QC report CSS (collapsed borders, padded cells, dark header row). - bwa_split: replace 32-bit seqkit split2 (crashes on large inputs) with sequana fastq-split; use -5SP -M bwa options and split_fasta container. - Add qc3c report to pipeline targets for both chromap and bwa paths. - Bump version to 0.2.0, set status to Production, update changelog. - Expand test suite with lightweight setup/config-validation tests. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- README.rst | 3 +- pyproject.toml | 2 +- sequana_pipelines/hic/config.yaml | 8 +-- sequana_pipelines/hic/hic.rules | 11 ++- sequana_pipelines/hic/src/hic_qc.py | 2 +- sequana_pipelines/hic/src/report_template.md | 12 ++-- sequana_pipelines/hic/src/style.css | 16 +++++ test/test_main.py | 72 ++++++++++++++++++++ 8 files changed, 110 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index cfbb2d7..65ca8c5 100644 --- a/README.rst +++ b/README.rst @@ -19,7 +19,7 @@ This is the **Hi-C** pipeline from the `Sequana {log} 2>&1 + # seqkit split2 (32-bit binary) crashes on large inputs; use sequana fastq-split instead + mkdir -p {wildcards.sample}/split + sequana fastq-split {input[0]} --by-size {params.nreads} --gzip --pattern {wildcards.sample}/split/{wildcards.sample}_R1.#.fastq.gz > {log} 2>&1 + sequana fastq-split {input[1]} --by-size {params.nreads} --gzip --pattern {wildcards.sample}/split/{wildcards.sample}_R2.#.fastq.gz >> {log} 2>&1 """ def get_bwa_input(): diff --git a/sequana_pipelines/hic/src/hic_qc.py b/sequana_pipelines/hic/src/hic_qc.py index 1189bac..930bafb 100644 --- a/sequana_pipelines/hic/src/hic_qc.py +++ b/sequana_pipelines/hic/src/hic_qc.py @@ -1067,7 +1067,7 @@ def write_report(self, quiet=False): sub_str = template_string.format(**self.out_stats) # splat the statistics and path into the markdown, render as html html = """""" - html += md.markdown(sub_str, extensions=['markdown.extensions.tables', 'markdown.extensions.nl2br']) + html += md.markdown(sub_str, extensions=['markdown.extensions.tables', 'markdown.extensions.nl2br', 'md_in_html']) html += "" # write out just html diff --git a/sequana_pipelines/hic/src/report_template.md b/sequana_pipelines/hic/src/report_template.md index 56557e3..cd97845 100644 --- a/sequana_pipelines/hic/src/report_template.md +++ b/sequana_pipelines/hic/src/report_template.md @@ -1,4 +1,4 @@ -
+
# Hi-C Library QC Report @@ -19,7 +19,7 @@ | Same strand high-quality* (HQ) read pairs (RPs) | {same_strand_hq_html} | > {same_strand_threshold}% | | Informative RPs** | {informative_read_pairs_html} | > {informative_read_pairs_threshold}% | -
+

*High-quality (HQ) read pairs have minimum mapping quality >= 20, maximum edit distance <= 5, and are not duplicates.
**Informative read pairs are read pairs which have MAPQ > 0, are not PCR duplicates, and map to different contigs or >10kb apart. @@ -34,7 +34,7 @@ | Fraction of HQ RPs Intercontig (CTGs >10kb)** | {intercontig_hq_contacts_html} | > {intercontig_hq_contacts_threshold}% | | Informative HQ RPs per-contig per-1M-RPs (CTGs >5kb)*** | {usable_hq_gt_5k_per_million_html} | > {min_usable_reads_per_contig_per_million_threshold} | -
+

*The proportion of read pairs that span at least 10kb, out of all read pairs that map (a) with high-quality, (b) to the same contig, (c) where that contig is at least 10kb long.
**The proportion of read pairs mapping to two different contigs each greater than 10kb, out of all read pairs that map with high-quality.
@@ -51,7 +51,7 @@ | Zero MAPQ reads | {many_zero_mapq_reads_html} | <= {many_zero_mapq_threshold}% | | Unmapped reads | {many_unmapped_reads_html} | <= {many_unmapped_threshold}% | -
+

*Note that the sum of informative and noninformative read pairs is not 100% because read pairs with mapping distance between 1b and 10kb are not classified as either informative or noninformative.
Because noninformative reads can belong to more than one category, these numbers may sum to a value larger than the overall noninformative read pair amount at the top of the report.
@@ -76,7 +76,7 @@ Because noninformative reads can belong to more than one category, these numbers ## Extended Library Statistics -
+
| Label | Library statistics | Expected values | | :----------- | --------------------:| --------------------:| @@ -94,7 +94,7 @@ Because noninformative reads can belong to more than one category, these numbers | Restriction Enzyme(s) | {lib_enzyme} | N/A
-
+

*The average number of usable high-quality read pairs per contig, for contigs greater than 5kb. Read pairs are "usable" if they map (a) with high-quality, (b) to different contigs, (c) where each of those contigs are greater than 5kb and (d) both mappings are high-quality.
diff --git a/sequana_pipelines/hic/src/style.css b/sequana_pipelines/hic/src/style.css index b6e67ad..942234f 100644 --- a/sequana_pipelines/hic/src/style.css +++ b/sequana_pipelines/hic/src/style.css @@ -50,6 +50,22 @@ block_img { clear: left; } +table { + border-collapse: collapse; + margin: 12px auto; + font-size: 13px; +} + +th, td { + border: 1px solid #cccccc; + padding: 4px 10px; +} + +th { + background-color: #34495e; + color: white; +} + tr:nth-child(even) { background-color: #DDDDDD; } diff --git a/test/test_main.py b/test/test_main.py index 0d1217a..91afae6 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -2,6 +2,7 @@ import subprocess import tempfile +import yaml from click.testing import CliRunner from sequana_pipelines.hic.main import main @@ -12,6 +13,23 @@ reference = f"{sharedir}/chr1.fa" +def _run(tmp_path, *extra): + runner = CliRunner() + return runner.invoke( + main, + [ + "--input-directory", + sharedir, + "--reference-file", + reference, + "--working-directory", + str(tmp_path / "wk"), + "--force", + *extra, + ], + ) + + def test_standalone_script(tmp_path): runner = CliRunner() results = runner.invoke( @@ -48,6 +66,60 @@ def test_standalone_script_bwa_split(tmp_path): assert results.exit_code == 0 +def test_standalone_script_chromap(tmp_path): + results = _run(tmp_path, "--aligner-choice", "chromap") + assert results.exit_code == 0 + + +def test_invalid_aligner_fails(tmp_path): + results = _run(tmp_path, "--aligner-choice", "notanaligner") + assert results.exit_code != 0 + + +def test_missing_reference_fails(tmp_path): + runner = CliRunner() + results = runner.invoke( + main, + [ + "--input-directory", + sharedir, + "--working-directory", + str(tmp_path / "wk"), + "--force", + ], + ) + assert results.exit_code != 0 + + +def test_setup_artifacts(tmp_path): + wk = tmp_path / "wk" + results = _run(tmp_path) + assert results.exit_code == 0 + # launcher and config produced + assert (wk / "hic.sh").exists() + assert (wk / "config.yaml").exists() + # custom files copied into .sequana by main() + for name in ("hic_qc.py", "hic_qc_style.css", "report_template.md"): + assert (wk / ".sequana" / name).exists() + + +def test_config_reflects_options(tmp_path): + wk = tmp_path / "wk" + results = _run(tmp_path, "--aligner-choice", "chromap", "--enable-fastp") + assert results.exit_code == 0 + cfg = yaml.safe_load((wk / "config.yaml").read_text()) + assert cfg["general"]["mapper"] == "chromap" + assert cfg["fastp"]["do"] is True + + +def test_config_fastp_disabled_by_default(tmp_path): + wk = tmp_path / "wk" + results = _run(tmp_path) + assert results.exit_code == 0 + cfg = yaml.safe_load((wk / "config.yaml").read_text()) + assert cfg["fastp"]["do"] is False + + def test_version(): runner = CliRunner() results = runner.invoke(main, ["--version"])