Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
!/resources/README.txt
!/resources/decision_tree_*.json
/resources/vep/cache
resources/GTEx/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct
!/resources/vep/plugins
!/resources/*.chain
!/resources/*.chain.gz
Expand All @@ -33,4 +34,4 @@ nextflow-*-all
/vip

# mkdocs
/site
/site
7 changes: 5 additions & 2 deletions config/nxf_vcf.config
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ params {
vep_plugin_inheritance = "${projectDir}/resources/inheritance_20240115.tsv"
vep_plugin_vkgl_mode = 1

vep_plugin_gtex = "${projectDir}/resources/GTEx/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct"

GRCh38 {
capice_model = "${projectDir}/resources/GRCh38/capice_model_v5.1.2-v2.ubj"
expansionhunter_variant_catalog = "${projectDir}/resources/GRCh38/expansionhunter_variant_catalog.json"
Expand All @@ -83,7 +85,8 @@ params {
metadata = "${projectDir}/resources/field_metadata.json"

GRCh38 {
decision_tree = "${projectDir}/resources/decision_tree_GRCh38.json"
//decision_tree = "${projectDir}/resources/decision_tree_GRCh38.json"
decision_tree = "${projectDir}/resources/PoC/decision_tree_GRCh38_Tissues.json"
}
}

Expand All @@ -109,7 +112,7 @@ params {
include_crams = true
max_records = ""
max_samples = ""
template = ""
template = "${projectDir}/resources/PoC/index.html"
metadata = "${projectDir}/resources/field_metadata.json"

GRCh38 {
Expand Down
12 changes: 12 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ download_files() {
for ((i = 0; i < ${#urls[@]}; i += 2)); do
download_file "${base_url}" "${urls[i+1]}" "${urls[i+0]}" "${output_dir}" "${validate}"
done
download_file "https://ftp.ensembl.org/pub/release-111/variation/indexed_vep_cache" "homo_sapiens_vep_111_GRCh38.tar.gz" "FIXME" "${output_dir}/resources/vep/cache/" "false"
download_file "https://storage.googleapis.com/adult-gtex/bulk-gex/v8/rna-seq" "GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct.gz" "FIXME" "${output_dir}/resources/GTEx" "false"

}

extract_files() {
Expand All @@ -162,7 +165,16 @@ extract_files() {
echo -e "extracting ${vep_gz} ..."
tar -xzf "${vep_gz}" -C "${vep_dir}"
fi
if [ ! -d "${vep_dir}/homo_sapiens/111_GRCh38" ]; then
local -r vep_gz="${vep_dir}/homo_sapiens_vep_111_GRCh38.tar.gz"
echo -e "extracting ${vep_gz} ..."
tar -xzf "${vep_gz}" -C "${vep_dir}"
fi

if [ ! -f "${output_dir}/resources/GTEx/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct" ]; then
gunzip ${output_dir}/resources/GTEx/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct.gz
fi

local -r annotsv_dir="${output_dir}/resources/annotsv/v3.3.6"

local -r annotsv_human_dir="${annotsv_dir}/Annotations_Human"
Expand Down
4 changes: 4 additions & 0 deletions modules/sample_sheet.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ def parseCommonSampleSheet(csvFilename, additionalCols) {
list: true,
regex: /HP:\d{7}/
],
tissues: [
type: "string",
list: true
],
sequencing_method: [
type: "string",
default: { 'WGS' },
Expand Down
4 changes: 3 additions & 1 deletion modules/vcf/annotate.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include { basename; areProbandHpoIdsIndentical } from './utils'
include { basename; areProbandHpoIdsIndentical; getTissues } from './utils'

process annotate {
label 'vcf_annotate'
Expand Down Expand Up @@ -32,8 +32,10 @@ process annotate {
capiceModelPath = params.vcf.annotate[assembly].capice_model
alphScorePath = params.vcf.annotate[assembly].vep_plugin_alphscore
strangerCatalog = params.vcf.annotate[assembly].stranger_catalog
gtexFile = params.vcf.annotate.vep_plugin_gtex

areProbandHpoIdsIndentical = areProbandHpoIdsIndentical(meta.project.samples)
tissues = getTissues(meta.project.samples)
gadoScores = meta.gado != null ? meta.gado : ""

template 'annotate.sh'
Expand Down
5 changes: 3 additions & 2 deletions modules/vcf/classify.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include { basename } from './utils'
include { basename; getTissues } from './utils'

process classify {
label 'vcf_classify'
Expand All @@ -18,7 +18,8 @@ process classify {
metadata = params.vcf.classify.metadata
decisionTree = params.vcf.classify[meta.project.assembly].decision_tree
annotatePath = params.vcf.classify.annotate_path

tissues = getTissues(meta.project.samples)

template 'classify.sh'

stub:
Expand Down
14 changes: 9 additions & 5 deletions modules/vcf/templates/annotate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ vep() {
args+=("--dir_cache" "!{params.vcf.annotate.vep_cache_dir}")
args+=("--species" "homo_sapiens")
args+=("--assembly" "!{assembly}")
args+=("--refseq")
#args+=("--refseq")
args+=("--exclude_predicted")
args+=("--use_given_ref")
args+=("--symbol")
Expand All @@ -160,20 +160,24 @@ vep() {
args+=("--dir_plugins" "!{params.vcf.annotate.vep_plugin_dir}")
args+=("--plugin" "Grantham")
args+=("--plugin" "SpliceAI,snv=!{vepPluginSpliceAiSnvPath},indel=!{vepPluginSpliceAiIndelPath}")
args+=("--plugin" "Capice,${capiceOutputPath}")
args+=("--plugin" "Capice,${capiceOutputPath},!{params.vcf.annotate.ensembl_gene_mapping}")
args+=("--plugin" "UTRannotator,!{vepPluginUtrAnnotatorPath}")
args+=("--custom" "!{vepCustomPhyloPPath},phyloP,bigwig,exact,0")
args+=("--safe")

if [ -n "!{hpoIds}" ]; then
args+=("--plugin" "Hpo,!{params.vcf.annotate.vep_plugin_hpo},!{hpoIds.replace(',', ';')}")
args+=("--plugin" "Hpo,!{params.vcf.annotate.vep_plugin_hpo},!{hpoIds.replace(',', ';')},!{params.vcf.annotate.ensembl_gene_mapping}")
fi
if [ -n "!{gadoScores}" ]; then
args+=("--plugin" "GADO,!{gadoScores},!{params.vcf.annotate.ensembl_gene_mapping}")
fi
args+=("--plugin" "Inheritance,!{params.vcf.annotate.vep_plugin_inheritance}")
if [ -n "!{gtexFile}" ]; then
echo !{tissues}
args+=("--plugin" "GTEx,!{gtexFile},!{tissues.replace(',', ';')}")
fi
args+=("--plugin" "Inheritance,!{params.vcf.annotate.vep_plugin_inheritance},,!{params.vcf.annotate.ensembl_gene_mapping}")
if [ -n "!{vepPluginVkglPath}" ] && [ -n "!{params.vcf.annotate.vep_plugin_vkgl_mode}" ]; then
args+=("--plugin" "VKGL,!{vepPluginVkglPath},!{params.vcf.annotate.vep_plugin_vkgl_mode}")
args+=("--plugin" "VKGL,!{vepPluginVkglPath},!{params.vcf.annotate.vep_plugin_vkgl_mode},!{params.vcf.annotate.ensembl_gene_mapping}")
fi
if [ -n "!{vepPluginGnomAdPath}" ]; then
args+=("--plugin" "gnomAD,!{vepPluginGnomAdPath}")
Expand Down
13 changes: 12 additions & 1 deletion modules/vcf/templates/classify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ classify () {
args+=("-jar" "/opt/vcf-decision-tree/lib/vcf-decision-tree.jar")
args+=("--input" "!{vcf}")
args+=("--metadata" "!{metadata}")
args+=("--config" "!{decisionTree}")
args+=("--config" "decision_tree_updated.json")
if [ !{annotatePath} -eq 1 ]; then
args+=("--path")
fi
Expand Down Expand Up @@ -42,7 +42,18 @@ insert_alt(){
fi
}

write_tissue_file(){
echo !{tissues} | tr ',' '\n' > tissues.tsv
}

update_tree(){
tissuePath=$(realpath tissues.tsv)
sed "s|TISSUE_FILE_PATH|${tissuePath}|g" "!{decisionTree}" > decision_tree_updated.json
}

main () {
write_tissue_file
update_tree
store_alt
classify
insert_alt
Expand Down
12 changes: 12 additions & 0 deletions modules/vcf/utils.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ def determineChunks(meta) {
return chunks
}

def getTissues(samples) {
def tissueKeys = samples.collectMany { sample -> sample.tissues }.unique()
def allTissues = ["Adipose_Subcutaneous","Adipose_Visceral","AdrenalGland","Artery_Aorta","Artery_Coronary","Artery_Tibial","Bladder","Brain_Amygdala","Brain_Anteriorcingulatecortex","Brain_Caudate","Brain_CerebellarHemisphere","Brain_Cerebellum","Brain_Cortex","Brain_FrontalCortex","Brain_Hippocampus","Brain_Hypothalamus","Brain_Nucleusaccumbens","Brain_Putamen","Brain_Spinalcord","Brain_Substantianigra","Breast_MammaryTissue","Cells_Culturedfibroblasts","Cells_EBV_transformedlymphocytes","Cervix_Ectocervix","Cervix_Endocervix","Colon_Sigmoid","Colon_Transverse","Esophagus_GastroesophagealJunction","Esophagus_Mucosa","Esophagus_Muscularis","FallopianTube","Heart_AtrialAppendage","Heart_LeftVentricle","Kidney_Cortex","Kidney_Medulla","Liver","Lung","MinorSalivaryGland","Muscle_Skeletal","Nerve_Tibial","Ovary","Pancreas","Pituitary","Prostate","Skin_NotSunExposed","Skin_SunExposed","SmallIntestine_TerminalIleum","Spleen","Stomach","Testis","Thyroid","Uterus","Vagina","WholeBlood"] as String[]
def tissues = []
for(tissueKey in tissueKeys){
tissues.addAll(Arrays.stream(allTissues)
.filter(tissue -> tissue.matches(".*${tissueKey}.*"))
.toArray(size -> new String[size]));
}
return tissues.join(",")
}

def scatter(meta) {
def chunks = determineChunks(meta)
def index = 0
Expand Down
Loading