From 67f83e989554c425f2fde4a01d0c5cc54fefed1f Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 18 Mar 2025 08:11:57 -0700 Subject: [PATCH 01/16] update --- pixi.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pixi.toml b/pixi.toml index 83111f3d2..1fb30628d 100644 --- a/pixi.toml +++ b/pixi.toml @@ -20,7 +20,9 @@ test-data = { cmd = [ "s3", "--no-sign-request", "sync", - "s3://stergachis-public1/FIRE/test-data", + "--endpoint-url", + "https://s3.kopah.orci.washington.edu", + "s3://stergachis/public/FIRE/test-data", "fire-test-data/", ] } test = { cmd = [ From 197c52ebec1c3d8e8efc75c97239b26364893853 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 18 Mar 2025 08:30:07 -0700 Subject: [PATCH 02/16] check for no reads passing filters --- pixi.toml | 4 ++-- workflow/rules/apply-model.smk | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pixi.toml b/pixi.toml index 1fb30628d..ac4bab560 100644 --- a/pixi.toml +++ b/pixi.toml @@ -35,7 +35,7 @@ test = { cmd = [ "--configfile", "test.yaml", "-k", -], depends_on = [ +], depends-on = [ "test-data", ], clean-env = true } @@ -64,7 +64,7 @@ snakemake = "==8.21" snakemake-executor-plugin-slurm = ">=0.11.2" snakefmt = "*" ruff = "*" -awscli = "2.22" +awscli = "2.22.*" taplo = "*" [pypi-dependencies] diff --git a/workflow/rules/apply-model.smk b/workflow/rules/apply-model.smk index 9e6ad9559..8a2a6bf7d 100644 --- a/workflow/rules/apply-model.smk +++ b/workflow/rules/apply-model.smk @@ -36,6 +36,13 @@ rule fire: --output-fmt-option embed_ref=1 \ --input-fmt-option required_fields=0x1bff \ --write-index -o {output.cram} + + # check if the cram file has zero reads + reads_in_header=$(samtools view {output.cram} | head | wc -l || true) + if [ $reads_in_header -eq 0 ]; then + printf "\nNo reads passed filters exiting...\n\nPlease review https://fiberseq.github.io/quick-start.html to make sure the input BAM has been correctly processed.\n\n" + exit 1 + fi """ From c6cb476ce42cbcc0946a5fec3844fdfc7af59b62 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 18 Mar 2025 09:06:29 -0700 Subject: [PATCH 03/16] Fix odd too many files open error in bigtools by not reading from stdin --- workflow/rules/coverages.smk | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/workflow/rules/coverages.smk b/workflow/rules/coverages.smk index 05f537c45..c354c76cc 100644 --- a/workflow/rules/coverages.smk +++ b/workflow/rules/coverages.smk @@ -151,6 +151,7 @@ rule unreliable_coverage_regions: output: bed="results/{sm}/additional-outputs-{v}/coverage/unreliable-coverage-regions.bed.gz", bed_tbi="results/{sm}/additional-outputs-{v}/coverage/unreliable-coverage-regions.bed.gz.tbi", + tmp=temp("temp/{sm}/additional-outputs-{v}/unreliable-coverage-regions.bed"), bb="results/{sm}/trackHub-{v}/bb/unreliable-coverage-regions.bb", threads: 4 params: @@ -171,10 +172,11 @@ rule unreliable_coverage_regions: > {output.bed} # bigbed - bgzip -cd {output.bed} -@ {threads} \ - | bigtools bedtobigbed \ + # for some reason bigtools gives a too many files open error when reading from stdin + bedtools merge -i {output.bed} > {output.tmp} + bigtools bedtobigbed \ -s start -a {params.bed3_as} \ - - {input.fai} {output.bb} + {output.tmp} {input.fai} {output.bb} # index tabix -f -p bed {output.bed} From 0de1ee470a32332aab5a3ab1fc20585ba65a3466 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 18 Mar 2025 09:10:39 -0700 Subject: [PATCH 04/16] bump version --- CHANGELOG.md | 6 ++++ pixi.toml | 80 ++++++++++++++++++++++++++-------------------------- 2 files changed, 46 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0713c5e75..5769cd383 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## v0.1.2 + +- fix #34 +- fix #33 +- Update to pull test data from a new s3 bucket + ## v0.1.1 Added more informative error messages if an FDR distribution cannot be made or there is not enough coverage. diff --git a/pixi.toml b/pixi.toml index ac4bab560..080eedf31 100644 --- a/pixi.toml +++ b/pixi.toml @@ -4,58 +4,58 @@ channels = ["conda-forge", "bioconda"] description = "Add a short description here" name = "FIRE" platforms = ["osx-64", "linux-64"] -version = "0.1.1" +version = "0.1.2" [tasks] fmt = "ruff format . && taplo format pixi.toml && snakefmt workflow/" test-data = { cmd = [ - "cd", - "$INIT_CWD", - "&&", - "mkdir", - "-p", - "fire-test-data", - "&&", - "aws", - "s3", - "--no-sign-request", - "sync", - "--endpoint-url", - "https://s3.kopah.orci.washington.edu", - "s3://stergachis/public/FIRE/test-data", - "fire-test-data/", + "cd", + "$INIT_CWD", + "&&", + "mkdir", + "-p", + "fire-test-data", + "&&", + "aws", + "s3", + "--no-sign-request", + "sync", + "--endpoint-url", + "https://s3.kopah.orci.washington.edu", + "s3://stergachis/public/FIRE/test-data", + "fire-test-data/", ] } test = { cmd = [ - "cd", - "$INIT_CWD/fire-test-data", - "&&", - "snakemake", - "-s", - "$PIXI_PROJECT_ROOT/workflow/Snakefile", - "--configfile", - "test.yaml", - "-k", + "cd", + "$INIT_CWD/fire-test-data", + "&&", + "snakemake", + "-s", + "$PIXI_PROJECT_ROOT/workflow/Snakefile", + "--configfile", + "test.yaml", + "-k", ], depends-on = [ - "test-data", + "test-data", ], clean-env = true } fire = { cmd = [ - "cd", - "$INIT_CWD", - "&&", - "snakemake", - "-s", - "$PIXI_PROJECT_ROOT/workflow/Snakefile", + "cd", + "$INIT_CWD", + "&&", + "snakemake", + "-s", + "$PIXI_PROJECT_ROOT/workflow/Snakefile", ] } slurm = { cmd = [ - "cd", - "$INIT_CWD", - "&&", - "snakemake", - "-s", - "$PIXI_PROJECT_ROOT/workflow/Snakefile", - "--profile", - "$PIXI_PROJECT_ROOT/profiles/slurm-executor", + "cd", + "$INIT_CWD", + "&&", + "snakemake", + "-s", + "$PIXI_PROJECT_ROOT/workflow/Snakefile", + "--profile", + "$PIXI_PROJECT_ROOT/profiles/slurm-executor", ] } [dependencies] From 3a8ed917d0a691139e71643580e97b3198ac2836 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 1 Apr 2025 13:50:47 -0700 Subject: [PATCH 05/16] consistent param order --- workflow/rules/decorated-reads.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/decorated-reads.smk b/workflow/rules/decorated-reads.smk index 344218596..20d877c79 100644 --- a/workflow/rules/decorated-reads.smk +++ b/workflow/rules/decorated-reads.smk @@ -99,7 +99,7 @@ rule decorate_fibers_2: --inmemory \ --block-size {params.block_size} --items-per-slot {params.items_per_slot} \ --nzooms {params.nzooms} \ - -a {params.dec_as} -s start \ + -s start -a {params.dec_as} \ - {input.fai} {output.bb} """ From d151d13aeef243b8063cd014e0f0c4f964570f9e Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 1 Apr 2025 14:32:41 -0700 Subject: [PATCH 06/16] explicit threads --- workflow/rules/decorated-reads.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/decorated-reads.smk b/workflow/rules/decorated-reads.smk index 20d877c79..9ebba9598 100644 --- a/workflow/rules/decorated-reads.smk +++ b/workflow/rules/decorated-reads.smk @@ -55,7 +55,7 @@ rule decorate_fibers_1: cat {input.bed} \ | bgzip -cd -@ {threads} \ | bigtools bedtobigbed \ - --inmemory \ + --inmemory -t {threads} \ --block-size {params.block_size} --items-per-slot {params.items_per_slot} \ --nzooms {params.nzooms} \ -s start -a {params.bed_as} \ @@ -96,7 +96,7 @@ rule decorate_fibers_2: | rg -v '^#' \ | rg -vw 'NUC' \ | bigtools bedtobigbed \ - --inmemory \ + --inmemory -t {threads} \ --block-size {params.block_size} --items-per-slot {params.items_per_slot} \ --nzooms {params.nzooms} \ -s start -a {params.dec_as} \ From 345864123f16ccd058d2cd89877c8ff9415136b2 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 1 Apr 2025 14:48:38 -0700 Subject: [PATCH 07/16] run decorate early --- workflow/rules/decorated-reads.smk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflow/rules/decorated-reads.smk b/workflow/rules/decorated-reads.smk index 9ebba9598..b39eb5f6f 100644 --- a/workflow/rules/decorated-reads.smk +++ b/workflow/rules/decorated-reads.smk @@ -14,6 +14,8 @@ rule decorate_fibers_chromosome: threads: 4 resources: mem_mb=get_mem_mb, + # the following steps can take a while so this helps the pipeline start this earlier. + priority: 10 conda: DEFAULT_ENV shell: @@ -42,6 +44,7 @@ rule decorate_fibers_1: threads: 8 resources: runtime=240, + priority: 10 conda: DEFAULT_ENV params: @@ -79,6 +82,7 @@ rule decorate_fibers_2: threads: 8 resources: runtime=60 * 16, + priority: 10 conda: DEFAULT_ENV params: From 7f0adbfccd7712bd85d3a9f905333d8cbed86e8e Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 1 Apr 2025 15:32:27 -0700 Subject: [PATCH 08/16] bump bigtools --- workflow/envs/env.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/envs/env.yaml b/workflow/envs/env.yaml index 04b2f46b2..07984df4c 100644 --- a/workflow/envs/env.yaml +++ b/workflow/envs/env.yaml @@ -13,4 +13,4 @@ dependencies: - ripgrep - csvtk - mosdepth==0.3.7 - - bioconda::bigtools==0.5.4 + - bioconda::bigtools==0.5.5 From 5dff725256f9be073365d167f5552fed16708f09 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 15 Apr 2025 07:44:41 -0700 Subject: [PATCH 09/16] only use major and minor version --- workflow/Snakefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 3322c700a..14903e0b2 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -19,7 +19,9 @@ include: "rules/common.smk" # load the version from the environment -VERSION = f"v{os.environ.get("PIXI_PROJECT_VERSION", "UNK")}" +FULL_VERSION = f"v{os.environ.get("PIXI_PROJECT_VERSION", "UNK")}" +# trim the FULL_VERSION to just the just the major and minor version +VERSION = ".".join(FULL_VERSION.split(".")[:2]) # thread options MAX_THREADS = config.get("max_threads", 4) From 909178c121c66743a79b647015a744f3fe3d2d00 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 1 Apr 2025 13:50:47 -0700 Subject: [PATCH 10/16] consistent param order --- workflow/rules/decorated-reads.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/decorated-reads.smk b/workflow/rules/decorated-reads.smk index 344218596..20d877c79 100644 --- a/workflow/rules/decorated-reads.smk +++ b/workflow/rules/decorated-reads.smk @@ -99,7 +99,7 @@ rule decorate_fibers_2: --inmemory \ --block-size {params.block_size} --items-per-slot {params.items_per_slot} \ --nzooms {params.nzooms} \ - -a {params.dec_as} -s start \ + -s start -a {params.dec_as} \ - {input.fai} {output.bb} """ From 5949fb1f63c412422db97d140f9f119bdc931f44 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 1 Apr 2025 14:32:41 -0700 Subject: [PATCH 11/16] explicit threads --- workflow/rules/decorated-reads.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/decorated-reads.smk b/workflow/rules/decorated-reads.smk index 20d877c79..9ebba9598 100644 --- a/workflow/rules/decorated-reads.smk +++ b/workflow/rules/decorated-reads.smk @@ -55,7 +55,7 @@ rule decorate_fibers_1: cat {input.bed} \ | bgzip -cd -@ {threads} \ | bigtools bedtobigbed \ - --inmemory \ + --inmemory -t {threads} \ --block-size {params.block_size} --items-per-slot {params.items_per_slot} \ --nzooms {params.nzooms} \ -s start -a {params.bed_as} \ @@ -96,7 +96,7 @@ rule decorate_fibers_2: | rg -v '^#' \ | rg -vw 'NUC' \ | bigtools bedtobigbed \ - --inmemory \ + --inmemory -t {threads} \ --block-size {params.block_size} --items-per-slot {params.items_per_slot} \ --nzooms {params.nzooms} \ -s start -a {params.dec_as} \ From 35a8a2c581a84fbfab93e8b21787432dcd1122bf Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 1 Apr 2025 14:48:38 -0700 Subject: [PATCH 12/16] run decorate early --- workflow/rules/decorated-reads.smk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflow/rules/decorated-reads.smk b/workflow/rules/decorated-reads.smk index 9ebba9598..b39eb5f6f 100644 --- a/workflow/rules/decorated-reads.smk +++ b/workflow/rules/decorated-reads.smk @@ -14,6 +14,8 @@ rule decorate_fibers_chromosome: threads: 4 resources: mem_mb=get_mem_mb, + # the following steps can take a while so this helps the pipeline start this earlier. + priority: 10 conda: DEFAULT_ENV shell: @@ -42,6 +44,7 @@ rule decorate_fibers_1: threads: 8 resources: runtime=240, + priority: 10 conda: DEFAULT_ENV params: @@ -79,6 +82,7 @@ rule decorate_fibers_2: threads: 8 resources: runtime=60 * 16, + priority: 10 conda: DEFAULT_ENV params: From 5fa36ab6d8ddad7d4ac81ca3d866d09ac0bda6e6 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 1 Apr 2025 15:32:27 -0700 Subject: [PATCH 13/16] bump bigtools --- workflow/envs/env.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/envs/env.yaml b/workflow/envs/env.yaml index 04b2f46b2..07984df4c 100644 --- a/workflow/envs/env.yaml +++ b/workflow/envs/env.yaml @@ -13,4 +13,4 @@ dependencies: - ripgrep - csvtk - mosdepth==0.3.7 - - bioconda::bigtools==0.5.4 + - bioconda::bigtools==0.5.5 From 1f3cb475aea1e4b5951edcc5cf5150c05a1696e4 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 15 Apr 2025 07:44:41 -0700 Subject: [PATCH 14/16] only use major and minor version --- workflow/Snakefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 3322c700a..14903e0b2 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -19,7 +19,9 @@ include: "rules/common.smk" # load the version from the environment -VERSION = f"v{os.environ.get("PIXI_PROJECT_VERSION", "UNK")}" +FULL_VERSION = f"v{os.environ.get("PIXI_PROJECT_VERSION", "UNK")}" +# trim the FULL_VERSION to just the just the major and minor version +VERSION = ".".join(FULL_VERSION.split(".")[:2]) # thread options MAX_THREADS = config.get("max_threads", 4) From 4c5b48d5c6e66d7b827bc45659b75e24d06610c8 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 15 Apr 2025 07:50:18 -0700 Subject: [PATCH 15/16] fmt --- pixi.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pixi.toml b/pixi.toml index 080eedf31..431ab05e7 100644 --- a/pixi.toml +++ b/pixi.toml @@ -38,7 +38,6 @@ test = { cmd = [ ], depends-on = [ "test-data", ], clean-env = true } - fire = { cmd = [ "cd", "$INIT_CWD", From 18efcdaa3b5522aa49dcdfe2e2bcbeb7e3d4e640 Mon Sep 17 00:00:00 2001 From: "Mitchell R. Vollger" Date: Tue, 29 Apr 2025 07:25:20 -0700 Subject: [PATCH 16/16] more mem --- workflow/Snakefile | 5 +++++ workflow/rules/fire-peaks.smk | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 14903e0b2..f36e0da1c 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -23,6 +23,11 @@ FULL_VERSION = f"v{os.environ.get("PIXI_PROJECT_VERSION", "UNK")}" # trim the FULL_VERSION to just the just the major and minor version VERSION = ".".join(FULL_VERSION.split(".")[:2]) +# use the full version if requested +if config.get("full-version", False): + VERSION = FULL_VERSION + + # thread options MAX_THREADS = config.get("max_threads", 4) SORT_THREADS = config.get("sort_threads", 8) diff --git a/workflow/rules/fire-peaks.smk b/workflow/rules/fire-peaks.smk index 6d5f2428d..07d15e0f6 100644 --- a/workflow/rules/fire-peaks.smk +++ b/workflow/rules/fire-peaks.smk @@ -77,7 +77,7 @@ rule fdr_table: params: script=workflow.source_path("../scripts/fdr-table.py"), resources: - mem_mb=get_mem_mb, + mem_mb=get_mem_mb_xl, shell: """ MIN=$(cat {input.minimum})