diff --git a/conf/modules.config b/conf/modules.config index f378b3f9..36954ee2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,11 +57,6 @@ process { ] } - - - - - withName: FASTQC { ext.args = '--quiet' ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('fastqc')) } @@ -74,9 +69,9 @@ process { ] } - withName: FASTQC_AFTER_TRIMMING { + withName: 'FASTQC_AFTER_BBDUK|FASTQC_AFTER_CUTADAPT' { ext.args = '--quiet' - ext.when = { !(params.skip_tools && (params.skip_tools.split(',').contains('fastqc') || params.skip_tools.split(',').contains('cutadapt'))) } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('fastqc_after_trimming')) } publishDir = [ [ path: { "${params.outdir}/fastqc_after_trimming/${meta.id}" }, @@ -98,6 +93,17 @@ process { ] } + withName: BBMAP_BBDUK { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bbduk')) } + publishDir = [ + [ + path: { "${params.outdir}/bbduk/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*gz" + ] + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ diff --git a/conf/test_hackathon.config b/conf/test_hackathon.config index 1f29e568..5cbda22c 100644 --- a/conf/test_hackathon.config +++ b/conf/test_hackathon.config @@ -20,7 +20,7 @@ params { max_time = '6.h' input = "https://raw.githubusercontent.com/nf-core/test-datasets/dualrnaseq/spreadsheet/spreadsheet.csv" - + // input = "data/samplesheet_test_hackathon.csv" // Genome references fasta_host = "https://github.com/nf-core/test-datasets/raw/dualrnaseq/references/GRCh38.p13_sub.fasta" @@ -30,7 +30,11 @@ params { transcript_fasta_host = "data/Human_gencode.v33_sub_transcriptome.fasta" libtype = "" + adapters = "data/adapters.fa" + fasta_pathogen = "https://github.com/nf-core/test-datasets/raw/dualrnaseq/references/SL1344_sub.fasta" gff_pathogen = "https://github.com/nf-core/test-datasets/raw/dualrnaseq/references/SL1344_sub.gff3" + skip_tools = "cutadapt" + } diff --git a/data/adapters.fa b/data/adapters.fa new file mode 100644 index 00000000..376a85d8 --- /dev/null +++ b/data/adapters.fa @@ -0,0 +1,308 @@ +>Reverse_adapter +AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Universal_Adapter +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT +>pcr_dimer +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCTAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG +>PCR_Primers +AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCTCAAGCAGAAGACGGCATACGAGCTCTTCCGATCT +>TruSeq_Adapter_Index_1_6 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_2 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_3 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_4 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_5 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_6 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_7 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_8 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_9 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_10 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_11 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_12 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_13 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAACAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_14 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCGTATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_15 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAGAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_16 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCCGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_18_7 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCACATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_19 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAACGATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_20 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCTTATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_21 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGGAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_22 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGTAATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_23 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGAGTGGATATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_25 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATATCTCGTATGCCGTCTTCTGCTTG +>TruSeq_Adapter_Index_27 +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTTTATCTCGTATGCCGTCTTCTGCTTG +>I5_Nextera_Transposase_1 +CTGTCTCTTATACACATCTGACGCTGCCGACGA +>I7_Nextera_Transposase_1 +CTGTCTCTTATACACATCTCCGAGCCCACGAGAC +>I5_Nextera_Transposase_2 +CTGTCTCTTATACACATCTCTGATGGCGCGAGGGAGGC +>I7_Nextera_Transposase_2 +CTGTCTCTTATACACATCTCTGAGCGGGCTGGCAAGGC +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]501 +GACGCTGCCGACGAGCGATCTAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]502 +GACGCTGCCGACGAATAGAGAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]503 +GACGCTGCCGACGAAGAGGATAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]504 +GACGCTGCCGACGATCTACTCTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]505 +GACGCTGCCGACGACTCCTTACGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]506 +GACGCTGCCGACGATATGCAGTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]507 +GACGCTGCCGACGATACTCCTTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]508 +GACGCTGCCGACGAAGGCTTAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_and_Nextera_Enrichment_[N/S/E]517 +GACGCTGCCGACGATCTTACGCGTGTAGATCTCGGTGGTCGCCGTATCATT +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N701 +CCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N702 +CCGAGCCCACGAGACCGTACTAGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N703 +CCGAGCCCACGAGACAGGCAGAAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N704 +CCGAGCCCACGAGACTCCTGAGCATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N705 +CCGAGCCCACGAGACGGACTCCTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N706 +CCGAGCCCACGAGACTAGGCATGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N707 +CCGAGCCCACGAGACCTCTCTACATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N708 +CCGAGCCCACGAGACCAGAGAGGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N709 +CCGAGCCCACGAGACGCTACGCTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N710 +CCGAGCCCACGAGACCGAGGCTGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N711 +CCGAGCCCACGAGACAAGAGGCAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_and_Nextera_Enrichment_N712 +CCGAGCCCACGAGACGTAGAGGAATCTCGTATGCCGTCTTCTGCTTG +>I5_Primer_Nextera_XT_Index_Kit_v2_S502 +GACGCTGCCGACGAATAGAGAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S503 +GACGCTGCCGACGAAGAGGATAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S505 +GACGCTGCCGACGACTCCTTACGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S506 +GACGCTGCCGACGATATGCAGTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S507 +GACGCTGCCGACGATACTCCTTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S508 +GACGCTGCCGACGAAGGCTTAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S510 +GACGCTGCCGACGAATTAGACGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S511 +GACGCTGCCGACGACGGAGAGAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S513 +GACGCTGCCGACGACTAGTCGAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S515 +GACGCTGCCGACGAAGCTAGAAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S516 +GACGCTGCCGACGAACTCTAGGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S517 +GACGCTGCCGACGATCTTACGCGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S518 +GACGCTGCCGACGACTTAATAGGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S520 +GACGCTGCCGACGAATAGCCTTGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S521 +GACGCTGCCGACGATAAGGCTCGTGTAGATCTCGGTGGTCGCCGTATCATT +>I5_Primer_Nextera_XT_Index_Kit_v2_S522 +GACGCTGCCGACGATCGCATAAGTGTAGATCTCGGTGGTCGCCGTATCATT +>I7_Primer_Nextera_XT_Index_Kit_v2_N701 +CCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N702 +CCGAGCCCACGAGACCGTACTAGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N703 +CCGAGCCCACGAGACAGGCAGAAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N704 +CCGAGCCCACGAGACTCCTGAGCATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N705 +CCGAGCCCACGAGACGGACTCCTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N706 +CCGAGCCCACGAGACTAGGCATGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N707 +CCGAGCCCACGAGACCTCTCTACATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N710 +CCGAGCCCACGAGACCGAGGCTGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N711 +CCGAGCCCACGAGACAAGAGGCAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N712 +CCGAGCCCACGAGACGTAGAGGAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N714 +CCGAGCCCACGAGACGCTCATGAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N715 +CCGAGCCCACGAGACATCTCAGGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N716 +CCGAGCCCACGAGACACTCGCTAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N718 +CCGAGCCCACGAGACGGAGCTACATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N719 +CCGAGCCCACGAGACGCGTAGTAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N720 +CCGAGCCCACGAGACCGGAGCCTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N721 +CCGAGCCCACGAGACTACGCTGCATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N722 +CCGAGCCCACGAGACATGCGCAGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N723 +CCGAGCCCACGAGACTAGCGCTCATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N724 +CCGAGCCCACGAGACACTGAGCGATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N726 +CCGAGCCCACGAGACCCTAAGACATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N727 +CCGAGCCCACGAGACCGATCAGTATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N728 +CCGAGCCCACGAGACTGCAGCTAATCTCGTATGCCGTCTTCTGCTTG +>I7_Primer_Nextera_XT_Index_Kit_v2_N729 +CCGAGCCCACGAGACTCGACGTCATCTCGTATGCCGTCTTCTGCTTG +>I5_Adapter_Nextera +CTGATGGCGCGAGGGAGGCGTGTAGATCTCGGTGGTCGCCGTATCATT +>I7_Adapter_Nextera_No_Barcode +CTGAGCGGGCTGGCAAGGCAGACCGATCTCGTATGCCGTCTTCTGCTTG +>Nextera_LMP_Read1_External_Adapter +GATCGGAAGAGCACACGTCTGAACTCCAGTCAC +>Nextera_LMP_Read2_External_Adapter +GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT +>RNA_Adapter_(RA5)_part_#_15013205 +GATCGTCGGACTGTAGAACTCTGAAC +>RNA_Adapter_(RA3)_part_#_15013207 +CCTTGGCACCCGAGAATTCCA +>Stop_Oligo_(STP)_8 +CCACGGGAACGTGGTGGAATTC +>RNA_RT_Primer_(RTP)_part_#_15013981 +TGGAATTCTCGGGTGCCAAGGC +>RNA_PCR_Primer_(RP1)_part_#_15013198 +TCGGACTGTAGAACTCTGAACGTGTAGATCTCGGTGGTCGCCGTATCATT +>RNA_PCR_Primer_Index_1_(RPI1)_2,9 +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_2_(RPI2) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_3_(RPI3) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_4_(RPI4) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_5_(RPI5) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_6_(RPI6) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_7_(RPI7) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_8_(RPI8) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_9_(RPI9) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_10_(RPI10) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_11_(RPI11) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_12_(RPI12) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_13_(RPI13) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACAGTCAAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_14_(RPI14) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACAGTTCCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_15_(RPI15) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACATGTCAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_16_(RPI16) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCCGTCCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_17_(RPI17) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTAGAGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_18_(RPI18) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_19_(RPI19) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTGAAAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_20_(RPI20) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTGGCCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_21_(RPI21) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGTTTCGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_22_(RPI22) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCGTACGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_23_(RPI23) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGAGTGGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_24_(RPI24) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGGTAGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_25_(RPI25) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACACTGATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_26_(RPI26) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACATGAGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_27_(RPI27) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACATTCCTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_28_(RPI28) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAAAAGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_29_(RPI29) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAACTAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_30_(RPI30) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCACCGGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_31_(RPI31) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCACGATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_32_(RPI32) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCACTCAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_33_(RPI33) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAGGCGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_34_(RPI34) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCATGGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_35_(RPI35) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCATTTTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_36_(RPI36) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCCAACAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_37_(RPI37) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCGGAATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_38_(RPI38) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCTAGCTATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_39_(RPI39) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCTATACATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_40_(RPI40) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCTCAGAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_41_(RPI41) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACGACGACATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_42_(RPI42) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTAATCGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_43_(RPI43) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTACAGCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_44_(RPI44) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTATAATATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_45_(RPI45) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTCATTCATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_46_(RPI46) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTCCCGAATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_47_(RPI47) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTCGAAGATCTCGTATGCCGTCTTCTGCTTG +>RNA_PCR_Primer_Index_48_(RPI48) +TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACTCGGCAATCTCGTATGCCGTCTTCTGCTTG +>PhiX_read1_adapter +AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTGAAA +>PhiX_read2_adapter +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAA +>Bisulfite_R1 +AGATCGGAAGAGCACACGTCTGAAC +>Bisulfite_R2 +AGATCGGAAGAGCGTCGTGTAGGGA diff --git a/modules.json b/modules.json index b6029734..69603819 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/bbduk": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", diff --git a/modules/local/combine_quantification_results_hts.nf b/modules/local/combine_quantification_results_hts.nf index 16775dd5..a8c4bdee 100644 --- a/modules/local/combine_quantification_results_hts.nf +++ b/modules/local/combine_quantification_results_hts.nf @@ -1,10 +1,10 @@ process COMBINE_QUANTIFICATION_RESULTS_HTS { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'nfcore/dualrnaseq:dev' : - 'nfcore/dualrnaseq:dev' }" + 'docker.io/nfcore/dualrnaseq:dev' : + 'docker.io/nfcore/dualrnaseq:dev' }" - input: + input: path input_quantification val gene_attribute val organism @@ -17,4 +17,4 @@ process COMBINE_QUANTIFICATION_RESULTS_HTS { -a $gene_attribute \ -org $organism """ -} \ No newline at end of file +} diff --git a/modules/local/combine_quantification_results_salmon.nf b/modules/local/combine_quantification_results_salmon.nf index 840762f1..13ee7771 100644 --- a/modules/local/combine_quantification_results_salmon.nf +++ b/modules/local/combine_quantification_results_salmon.nf @@ -1,15 +1,15 @@ process COMBINE_QUANTIFICATION_RESULTS_SALMON { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'nfcore/dualrnaseq:dev' : - 'nfcore/dualrnaseq:dev' }" + 'docker.io/nfcore/dualrnaseq:dev' : + 'docker.io/nfcore/dualrnaseq:dev' }" - input: + input: path input_quantification val organism output: path "combined_${organism}.tsv", emit: combined_quant_data - + script: def args = task.ext.args ?: '' """ @@ -18,4 +18,4 @@ process COMBINE_QUANTIFICATION_RESULTS_SALMON { -a $params.gene_attribute_gff_to_create_transcriptome_host \ -org $organism """ -} \ No newline at end of file +} diff --git a/modules/local/create_transcriptome_fasta.nf b/modules/local/create_transcriptome_fasta.nf index 29c883f2..763528c6 100644 --- a/modules/local/create_transcriptome_fasta.nf +++ b/modules/local/create_transcriptome_fasta.nf @@ -1,12 +1,12 @@ process CREATE_TRANSCRIPTOME_FASTA { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'nfcore/dualrnaseq:dev' : - 'nfcore/dualrnaseq:dev' }" + 'docker.io/nfcore/dualrnaseq:dev' : + 'docker.io/nfcore/dualrnaseq:dev' }" input: path(fasta) - path(gff) + path(gff) tuple val(features), val(attribute) output: @@ -17,4 +17,4 @@ process CREATE_TRANSCRIPTOME_FASTA { """ python $workflow.projectDir/bin/gff_to_fasta_transcriptome.py -fasta $fasta -gff $gff -f $features -a $attribute -o $outfile_name """ -} \ No newline at end of file +} diff --git a/modules/local/create_transcriptome_fasta_gffread.nf b/modules/local/create_transcriptome_fasta_gffread.nf index 4b1468d7..9db864bb 100644 --- a/modules/local/create_transcriptome_fasta_gffread.nf +++ b/modules/local/create_transcriptome_fasta_gffread.nf @@ -3,8 +3,8 @@ process CREATE_TRANSCRIPTOME_FASTA_GFFREAD { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'nfcore/dualrnaseq:dev' : - 'nfcore/dualrnaseq:dev' }" + 'docker.io/nfcore/dualrnaseq:dev' : + 'docker.io/nfcore/dualrnaseq:dev' }" input: path(fasta) @@ -17,4 +17,4 @@ process CREATE_TRANSCRIPTOME_FASTA_GFFREAD { """ gffread -w $outfile_name -g $fasta $gff """ -} \ No newline at end of file +} diff --git a/modules/local/extract_annotations.nf b/modules/local/extract_annotations.nf index 6149156d..dc1eba37 100644 --- a/modules/local/extract_annotations.nf +++ b/modules/local/extract_annotations.nf @@ -4,8 +4,8 @@ process EXTRACT_ANNOTATIONS { conda "bioconda::conda-forge::python=3.8.3=3.11.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'nfcore/dualrnaseq:dev': - 'nfcore/dualrnaseq:dev' }" + 'docker.io/nfcore/dualrnaseq:dev': + 'docker.io/nfcore/dualrnaseq:dev' }" input: path(gff) diff --git a/modules/local/extract_processed_reads.nf b/modules/local/extract_processed_reads.nf index 799439fa..08b83b58 100644 --- a/modules/local/extract_processed_reads.nf +++ b/modules/local/extract_processed_reads.nf @@ -4,10 +4,10 @@ process EXTRACT_PROCESSED_READS { conda "python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'nfcore/dualrnaseq:dev' : - 'nfcore/dualrnaseq:dev' }" - - input: + 'docker.io/nfcore/dualrnaseq:dev' : + 'docker.io/nfcore/dualrnaseq:dev' }" + + input: tuple val(meta), file (json_file) val(process) @@ -18,10 +18,10 @@ process EXTRACT_PROCESSED_READS { def prefix = task.ext.prefix ?: "${meta.id}" """ if [ ${process} == "salmon" ]; then # for Salmon extract 'num_processed' from meta_info.json file - processed=\$(grep "num_processed" ${json_file} | sed 's/num_processed//g'| sed 's/[^a-zA-Z0-9]//g') + processed=\$(grep "num_processed" ${json_file} | sed 's/num_processed//g'| sed 's/[^a-zA-Z0-9]//g') echo -e "${prefix}\t\${processed}" > ${prefix}.txt elif [ ${process} == "salmon_alignment" ]; then # for Salmon alignment-based mode extract 'num_mapped' from meta_info.json file - processed=\$(grep "num_mapped" ${json_file} | sed 's/num_mapped//g'| sed 's/[^a-zA-Z0-9]//g') + processed=\$(grep "num_mapped" ${json_file} | sed 's/num_mapped//g'| sed 's/[^a-zA-Z0-9]//g') echo -e "${prefix}\t\${processed}" > ${prefix}.txt elif [ ${process} == "star" ]; then # for STAR extract "Number of input reads" from *Log.final.out file processed=\$(grep "Number of input reads" ${json_file} | sed 's/Number of input reads//g'| sed 's/[^a-zA-Z0-9]//g') diff --git a/modules/local/salmon_split_table.nf b/modules/local/salmon_split_table.nf index 82487927..b930b99e 100644 --- a/modules/local/salmon_split_table.nf +++ b/modules/local/salmon_split_table.nf @@ -3,7 +3,7 @@ process SALMON_SPLIT_TABLE { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'nfcore/dualrnaseq:dev' }" + 'docker.io/nfcore/dualrnaseq:dev' }" input: tuple val(meta), path(quant) diff --git a/modules/nf-core/bbmap/bbduk/main.nf b/modules/nf-core/bbmap/bbduk/main.nf new file mode 100644 index 00000000..e14a6fc0 --- /dev/null +++ b/modules/nf-core/bbmap/bbduk/main.nf @@ -0,0 +1,43 @@ +process BBMAP_BBDUK { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bbmap=39.01" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:39.01--h92535d8_1': + 'biocontainers/bbmap:39.01--h92535d8_1' }" + + input: + tuple val(meta), path(reads) + path contaminants + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def raw = meta.single_end ? "in=${reads[0]}" : "in1=${reads[0]} in2=${reads[1]}" + def trimmed = meta.single_end ? "out=${prefix}.fastq.gz" : "out1=${prefix}_1.fastq.gz out2=${prefix}_2.fastq.gz" + def contaminants_fa = contaminants ? "ref=$contaminants" : '' + """ + maxmem=\$(echo \"$task.memory\"| sed 's/ GB/g/g') + bbduk.sh \\ + -Xmx\$maxmem \\ + $raw \\ + $trimmed \\ + threads=$task.cpus \\ + $args \\ + $contaminants_fa \\ + &> ${prefix}.bbduk.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bbmap/bbduk/meta.yml b/modules/nf-core/bbmap/bbduk/meta.yml new file mode 100644 index 00000000..c1719918 --- /dev/null +++ b/modules/nf-core/bbmap/bbduk/meta.yml @@ -0,0 +1,52 @@ +name: bbmap_bbduk +description: Adapter and quality trimming of sequencing reads +keywords: + - trimming + - adapter trimming + - quality trimming + - fastq +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic tools. + homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/ + documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/ + + licence: ["UC-LBL license (see package)"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - contaminants: + type: file + description: | + Reference files containing adapter and/or contaminant sequences for sequence kmer matching + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified fastq reads + pattern: "*fastq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - log: + type: file + description: Bbduk log file + pattern: "*bbduk.log" + +authors: + - "@MGordon09" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9ae58381..4a039e50 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -5,7 +5,7 @@ process FASTQC { conda "bioconda::fastqc=0.11.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/nextflow.config b/nextflow.config index 62e2db26..8f248f09 100644 --- a/nextflow.config +++ b/nextflow.config @@ -208,6 +208,8 @@ env { R_ENVIRON_USER = "/.Renviron" JULIA_DEPOT_PATH = "/usr/local/share/julia" } +docker.registry = 'quay.io' +podman.registry = 'quay.io' // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] diff --git a/workflows/dualrnaseq.nf b/workflows/dualrnaseq.nf index d760b946..0ad5b41b 100644 --- a/workflows/dualrnaseq.nf +++ b/workflows/dualrnaseq.nf @@ -28,6 +28,15 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } + + +ch_fasta_host = params.fasta_host ? file( params.fasta_host, checkIfExists: true ) : Channel.empty() +ch_fasta_pathogen = params.fasta_pathogen ? file( params.fasta_pathogen, checkIfExists: true) : Channel.empty() +ch_gff_host = params.gff_host ? file( params.gff_host, checkIfExists: true ) : Channel.empty() +ch_gff_host_tRNA = params.gff_host_tRNA ? file( params.gff_host_tRNA, checkIfExists: true ) : Channel.empty() +ch_gff_pathogen = params.gff_pathogen ? file( params.gff_pathogen, checkIfExists: true ) : Channel.empty() +ch_adapters = params.adapters ? file( params.adapters, checkIfExists: true ) : Channel.empty() + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -63,8 +72,10 @@ include { SALMON_ALIGNMENT_BASED } from '../subworkflows/local/salmon_alignment_ // MODULE: Installed directly from nf-core/modules // include { FASTQC } from '../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_AFTER_TRIMMING } from '../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_AFTER_BBDUK } from '../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_AFTER_CUTADAPT } from '../modules/nf-core/fastqc/main' include { CUTADAPT } from '../modules/nf-core/cutadapt/main' +include { BBMAP_BBDUK } from '../modules/nf-core/bbmap/bbduk/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' @@ -88,25 +99,38 @@ workflow DUALRNASEQ { ch_input ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + ch_reads = INPUT_CHECK.out.reads - if (!(params.skip_tools && params.skip_tools.split(',').contains('fastqc'))) { - FASTQC(INPUT_CHECK.out.reads) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - } + // + // Running FASTQC before trimming (can be skipped) + // - ch_reads = INPUT_CHECK.out.reads - if (!(params.skip_tools && params.skip_tools.split(',').contains('cutadapt'))) { - CUTADAPT(INPUT_CHECK.out.reads) - ch_reads = CUTADAPT.out.reads - ch_versions = ch_versions.mix(CUTADAPT.out.versions.first()) - } + FASTQC(INPUT_CHECK.out.reads) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - if (!(params.skip_tools && (params.skip_tools.split(',').contains('fastqc') || params.skip_tools.split(',').contains('cutadapt')))) { - FASTQC_AFTER_TRIMMING(ch_reads) - ch_versions = ch_versions.mix(FASTQC_AFTER_TRIMMING.out.versions.first()) - } + // + // Running CUTADAPT and/or BBDUK (can be skipped) + // + + CUTADAPT(INPUT_CHECK.out.reads) + ch_reads_cutadapt = CUTADAPT.out.reads + ch_versions = ch_versions.mix(CUTADAPT.out.versions.first()) + BBMAP_BBDUK(INPUT_CHECK.out.reads, ch_adapters) + ch_reads_bbduk = BBMAP_BBDUK.out.reads + ch_versions = ch_versions.mix(BBMAP_BBDUK.out.versions.first()) + // + // Running FASTQC after trimming (can be skipped) + // + + if (ch_reads_cutadapt) { + FASTQC_AFTER_BBDUK(ch_reads_bbduk) + } + + if (ch_reads_bbduk) { + FASTQC_AFTER_CUTADAPT(ch_reads_cutadapt) + } PREPARE_REFERENCE_FILES( params.fasta_host,