diff --git a/README.rst b/README.rst index cfbb2d7..65ca8c5 100644 --- a/README.rst +++ b/README.rst @@ -19,7 +19,7 @@ This is the **Hi-C** pipeline from the `Sequana {log} 2>&1 + # seqkit split2 (32-bit binary) crashes on large inputs; use sequana fastq-split instead + mkdir -p {wildcards.sample}/split + sequana fastq-split {input[0]} --by-size {params.nreads} --gzip --pattern {wildcards.sample}/split/{wildcards.sample}_R1.#.fastq.gz > {log} 2>&1 + sequana fastq-split {input[1]} --by-size {params.nreads} --gzip --pattern {wildcards.sample}/split/{wildcards.sample}_R2.#.fastq.gz >> {log} 2>&1 """ def get_bwa_input(): diff --git a/sequana_pipelines/hic/src/hic_qc.py b/sequana_pipelines/hic/src/hic_qc.py index 1189bac..930bafb 100644 --- a/sequana_pipelines/hic/src/hic_qc.py +++ b/sequana_pipelines/hic/src/hic_qc.py @@ -1067,7 +1067,7 @@ def write_report(self, quiet=False): sub_str = template_string.format(**self.out_stats) # splat the statistics and path into the markdown, render as html html = """""" - html += md.markdown(sub_str, extensions=['markdown.extensions.tables', 'markdown.extensions.nl2br']) + html += md.markdown(sub_str, extensions=['markdown.extensions.tables', 'markdown.extensions.nl2br', 'md_in_html']) html += "" # write out just html diff --git a/sequana_pipelines/hic/src/report_template.md b/sequana_pipelines/hic/src/report_template.md index 56557e3..cd97845 100644 --- a/sequana_pipelines/hic/src/report_template.md +++ b/sequana_pipelines/hic/src/report_template.md @@ -1,4 +1,4 @@ -
+
# Hi-C Library QC Report @@ -19,7 +19,7 @@ | Same strand high-quality* (HQ) read pairs (RPs) | {same_strand_hq_html} | > {same_strand_threshold}% | | Informative RPs** | {informative_read_pairs_html} | > {informative_read_pairs_threshold}% | -
+

*High-quality (HQ) read pairs have minimum mapping quality >= 20, maximum edit distance <= 5, and are not duplicates.
**Informative read pairs are read pairs which have MAPQ > 0, are not PCR duplicates, and map to different contigs or >10kb apart. @@ -34,7 +34,7 @@ | Fraction of HQ RPs Intercontig (CTGs >10kb)** | {intercontig_hq_contacts_html} | > {intercontig_hq_contacts_threshold}% | | Informative HQ RPs per-contig per-1M-RPs (CTGs >5kb)*** | {usable_hq_gt_5k_per_million_html} | > {min_usable_reads_per_contig_per_million_threshold} | -
+

*The proportion of read pairs that span at least 10kb, out of all read pairs that map (a) with high-quality, (b) to the same contig, (c) where that contig is at least 10kb long.
**The proportion of read pairs mapping to two different contigs each greater than 10kb, out of all read pairs that map with high-quality.
@@ -51,7 +51,7 @@ | Zero MAPQ reads | {many_zero_mapq_reads_html} | <= {many_zero_mapq_threshold}% | | Unmapped reads | {many_unmapped_reads_html} | <= {many_unmapped_threshold}% | -
+

*Note that the sum of informative and noninformative read pairs is not 100% because read pairs with mapping distance between 1b and 10kb are not classified as either informative or noninformative.
Because noninformative reads can belong to more than one category, these numbers may sum to a value larger than the overall noninformative read pair amount at the top of the report.
@@ -76,7 +76,7 @@ Because noninformative reads can belong to more than one category, these numbers ## Extended Library Statistics -
+
| Label | Library statistics | Expected values | | :----------- | --------------------:| --------------------:| @@ -94,7 +94,7 @@ Because noninformative reads can belong to more than one category, these numbers | Restriction Enzyme(s) | {lib_enzyme} | N/A
-
+

*The average number of usable high-quality read pairs per contig, for contigs greater than 5kb. Read pairs are "usable" if they map (a) with high-quality, (b) to different contigs, (c) where each of those contigs are greater than 5kb and (d) both mappings are high-quality.
diff --git a/sequana_pipelines/hic/src/style.css b/sequana_pipelines/hic/src/style.css index b6e67ad..942234f 100644 --- a/sequana_pipelines/hic/src/style.css +++ b/sequana_pipelines/hic/src/style.css @@ -50,6 +50,22 @@ block_img { clear: left; } +table { + border-collapse: collapse; + margin: 12px auto; + font-size: 13px; +} + +th, td { + border: 1px solid #cccccc; + padding: 4px 10px; +} + +th { + background-color: #34495e; + color: white; +} + tr:nth-child(even) { background-color: #DDDDDD; } diff --git a/test/test_main.py b/test/test_main.py index 0d1217a..91afae6 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -2,6 +2,7 @@ import subprocess import tempfile +import yaml from click.testing import CliRunner from sequana_pipelines.hic.main import main @@ -12,6 +13,23 @@ reference = f"{sharedir}/chr1.fa" +def _run(tmp_path, *extra): + runner = CliRunner() + return runner.invoke( + main, + [ + "--input-directory", + sharedir, + "--reference-file", + reference, + "--working-directory", + str(tmp_path / "wk"), + "--force", + *extra, + ], + ) + + def test_standalone_script(tmp_path): runner = CliRunner() results = runner.invoke( @@ -48,6 +66,60 @@ def test_standalone_script_bwa_split(tmp_path): assert results.exit_code == 0 +def test_standalone_script_chromap(tmp_path): + results = _run(tmp_path, "--aligner-choice", "chromap") + assert results.exit_code == 0 + + +def test_invalid_aligner_fails(tmp_path): + results = _run(tmp_path, "--aligner-choice", "notanaligner") + assert results.exit_code != 0 + + +def test_missing_reference_fails(tmp_path): + runner = CliRunner() + results = runner.invoke( + main, + [ + "--input-directory", + sharedir, + "--working-directory", + str(tmp_path / "wk"), + "--force", + ], + ) + assert results.exit_code != 0 + + +def test_setup_artifacts(tmp_path): + wk = tmp_path / "wk" + results = _run(tmp_path) + assert results.exit_code == 0 + # launcher and config produced + assert (wk / "hic.sh").exists() + assert (wk / "config.yaml").exists() + # custom files copied into .sequana by main() + for name in ("hic_qc.py", "hic_qc_style.css", "report_template.md"): + assert (wk / ".sequana" / name).exists() + + +def test_config_reflects_options(tmp_path): + wk = tmp_path / "wk" + results = _run(tmp_path, "--aligner-choice", "chromap", "--enable-fastp") + assert results.exit_code == 0 + cfg = yaml.safe_load((wk / "config.yaml").read_text()) + assert cfg["general"]["mapper"] == "chromap" + assert cfg["fastp"]["do"] is True + + +def test_config_fastp_disabled_by_default(tmp_path): + wk = tmp_path / "wk" + results = _run(tmp_path) + assert results.exit_code == 0 + cfg = yaml.safe_load((wk / "config.yaml").read_text()) + assert cfg["fastp"]["do"] is False + + def test_version(): runner = CliRunner() results = runner.invoke(main, ["--version"])