Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d0e71eb
Added argparse to AssignToBins
ALuesink Aug 25, 2025
d5bac97
Removed unused arguments
ALuesink Aug 25, 2025
97d5626
Fixed linting issues
ALuesink Sep 1, 2025
8a25037
Added argparse to AverageTechReplicates and fixed linting issues
ALuesink Sep 1, 2025
4fc8385
Added argparse to CollectSumAdducts and fixed linting issues
ALuesink Sep 1, 2025
f956c9d
Added argparse to GenerateExcel and fixed linting issues
ALuesink Sep 1, 2025
47539c1
Added argparse to GenerateQCOutput and fixed linting issues
ALuesink Sep 1, 2025
c94c011
Changed contaner to dims:1.4
ALuesink Sep 1, 2025
6944b66
merge develop
ALuesink Nov 24, 2025
553da70
Added argparse to MakeInit
ALuesink Nov 24, 2025
dc3e80f
Added argparse to CollectFilled and fixed linting issues
ALuesink Nov 24, 2025
768517c
Added argparse to GenerateBreaks and fixed linting issues
ALuesink Nov 24, 2025
a29c1f9
Added argparse to HMDBparts_main and fixed linting issues
ALuesink Nov 24, 2025
d86f644
added argument parsing to DIMS/MakeInit step
mraves2 Mar 3, 2026
16b5e83
added argument parsing to DIMS/GenerateBreaks step
mraves2 Mar 3, 2026
26cb222
removed print statements
mraves2 Mar 3, 2026
e8681e6
added argument parsing for DIMS/HMDBparts_main step
mraves2 Mar 3, 2026
c763e04
added argument parsing for DIMS/HMDBparts step
mraves2 Mar 3, 2026
fed4e3b
fixed loading trim parameters from separate file
mraves2 Mar 5, 2026
ed7b595
added argument parsing to DIMS/EvaluateTics step
mraves2 Mar 5, 2026
f7ef463
added argument parsing for new PeakFinding step
mraves2 Mar 5, 2026
9915f55
added argument parsing to DIMS/AveragePeaks step
mraves2 Mar 5, 2026
b8ea927
fixed issue with sample_name in DIMS/AveragePeaks step
mraves2 Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 29 additions & 23 deletions DIMS/AssignToBins.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
# load required packages
library("argparse")
suppressPackageStartupMessages(library("xcms"))

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)
parser <- ArgumentParser(description = "AssignToBins")

mzml_filepath <- cmd_args[1]
breaks_filepath <- cmd_args[2]
resol <- as.numeric(cmd_args[3])
parser$add_argument("--mzML_filepath", dest = "mzml_filepath",
help = "File path for the mzML file", required = TRUE)
parser$add_argument("--breaks_filepath", dest = "breaks_filepath",
help = "File path for the breaks RData file", required = TRUE)
parser$add_argument("--trim_params_filepath", dest = "trim_params_filepath",
help = "File path for the trim parameters", required = TRUE)

# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(breaks_filepath)
args <- parser$parse_args()

# load breaks_file: contains breaks_fwhm and breaks_fwhm_avg
load(args$breaks_filepath)
# load trim parameters trim_left_neg, trim_left_pos, trim_right_neg and trim_right_pos
load(args$trim_params_filepath)

# get sample name
techrep_name <- sub("\\..*$", "", basename(mzml_filepath))
techrep_name <- sub("\\..*$", "", basename(args$mzml_filepath))

options(digits = 16)

Expand All @@ -26,7 +32,7 @@ neg_bins <- bins
dims_thresh <- 100

# read in the data for 1 sample
raw_data <- suppressMessages(xcms::xcmsRaw(mzml_filepath))
raw_data <- suppressMessages(xcms::xcmsRaw(args$mzml_filepath))

# Generate a matrix with retention times and intensities
raw_data_matrix <- xcms::rawMat(raw_data)
Expand All @@ -41,13 +47,13 @@ neg_times_trimmed <- neg_times[neg_times > trim_left_neg & neg_times < trim_righ
# get TIC intensities for areas between trim_left and trim_right
tic_intensity_persample <- cbind(raw_data@scantime, raw_data@tic)
colnames(tic_intensity_persample) <- c("retention_time", "tic_intensity")
tic_intensity_pos <- tic_intensity_persample[tic_intensity_persample[ , "retention_time"] > min(pos_times_trimmed) &
tic_intensity_persample[ , "retention_time"] < max(pos_times_trimmed), ]
tic_intensity_neg <- tic_intensity_persample[tic_intensity_persample[ , "retention_time"] > min(neg_times_trimmed) &
tic_intensity_persample[ , "retention_time"] < max(neg_times_trimmed), ]
tic_intensity_pos <- tic_intensity_persample[tic_intensity_persample[, "retention_time"] > min(pos_times_trimmed) &
tic_intensity_persample[, "retention_time"] < max(pos_times_trimmed), ]
tic_intensity_neg <- tic_intensity_persample[tic_intensity_persample[, "retention_time"] > min(neg_times_trimmed) &
tic_intensity_persample[, "retention_time"] < max(neg_times_trimmed), ]
# calculate weighted mean of intensities for pos and neg separately
mean_pos <- weighted.mean(tic_intensity_pos[ , "tic_intensity"], tic_intensity_pos[ , "tic_intensity"])
mean_neg <- weighted.mean(tic_intensity_neg[ , "tic_intensity"], tic_intensity_neg[ , "tic_intensity"])
mean_pos <- weighted.mean(tic_intensity_pos[, "tic_intensity"], tic_intensity_pos[, "tic_intensity"])
mean_neg <- weighted.mean(tic_intensity_neg[, "tic_intensity"], tic_intensity_neg[, "tic_intensity"])
# intensity per scan should be at least 80% of weighted mean
dims_thresh_pos <- 0.8 * mean_pos
dims_thresh_neg <- 0.8 * mean_neg
Expand All @@ -67,17 +73,17 @@ neg_raw_data_matrix <- raw_data_matrix[neg_index, ]

# Get index for binning intensity values
bin_indices_pos <- cut(
pos_raw_data_matrix[, "mz"],
pos_raw_data_matrix[, "mz"],
breaks_fwhm,
include.lowest = TRUE,
right = TRUE,
include.lowest = TRUE,
right = TRUE,
labels = FALSE
)
bin_indices_neg <- cut(
neg_raw_data_matrix[, "mz"],
breaks_fwhm,
include.lowest = TRUE,
right = TRUE,
neg_raw_data_matrix[, "mz"],
breaks_fwhm,
include.lowest = TRUE,
right = TRUE,
labels = FALSE
)

Expand Down
9 changes: 6 additions & 3 deletions DIMS/AssignToBins.nf
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
process AssignToBins {
tag "DIMS AssignToBins ${file_id}"
label 'AssignToBins'
container = 'docker://umcugenbioinf/dims:1.3'
container = 'docker://umcugenbioinf/dims:1.4'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
tuple(val(file_id), path(mzML_file), path(breaks_file))
tuple(val(file_id), path(mzML_file), path(breaks_file), path(trim_params_file))

output:
path("${file_id}.RData"), emit: rdata_file
path("${file_id}_TIC.txt"), emit: tic_txt_file

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R $mzML_file $breaks_file $params.resolution
Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R \
--mzML_filepath $mzML_file \
--breaks_filepath $breaks_file \
--trim_params_filepath $trim_params_file
"""
}

Expand Down
45 changes: 45 additions & 0 deletions DIMS/AveragePeaks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# load required packages
library("dplyr")
library("argparse")

parser <- ArgumentParser(description = "AveragePeaks")

parser$add_argument("--sample_name", dest = "sample_name",
help = "Name of a biological sample", required = TRUE)
parser$add_argument("--tech_reps", dest = "tech_reps",
help = "Names of the technical replicates belonging to the biological sample", required = TRUE)
parser$add_argument("--scanmode", dest = "scanmode",
help = "Scan mode (either posiive or negative)", required = TRUE)
parser$add_argument("--preprocessing_scripts_dir", dest = "preprocessing_scripts_dir",
help = "File path to the directory containing functions used", required = TRUE)

args <- parser$parse_args()

# define parameters
tech_reps <- strsplit(args$tech_reps, ";")[[1]]

# load in function scripts
source(paste0(args$preprocessing_scripts_dir, "average_peaks_functions.R"))

# Initialize per sample
peaklist_allrepl <- NULL
nr_repl_persample <- 0
averaged_peaks <- matrix(0, nrow = 0, ncol = 6)
colnames(averaged_peaks) <- c("samplenr", "mzmed.pkt", "fq", "mzmin.pkt", "mzmax.pkt", "height.pkt")

# load RData files of technical replicates belonging to biological sample
for (file_nr in 1:length(tech_reps)) {
tech_repl_file <- paste0(tech_reps[file_nr], "_", args$scanmode, ".RData")
tech_repl <- get(load(tech_repl_file))
# combine data for all technical replicates
peaklist_allrepl <- rbind(peaklist_allrepl, tech_repl)
}
# sort on mass
peaklist_allrepl_df <- as.data.frame(peaklist_allrepl)
peaklist_allrepl_df$mzmed.pkt <- as.numeric(peaklist_allrepl_df$mzmed.pkt)
peaklist_allrepl_df$height.pkt <- as.numeric(peaklist_allrepl_df$height.pkt)
peaklist_allrepl_sorted <- peaklist_allrepl_df %>% arrange(mzmed.pkt)

# average over technical replicates
averaged_peaks <- average_peaks_per_sample(peaklist_allrepl_sorted, args$sample_name)
save(averaged_peaks, file = paste0("AvgPeaks_", args$sample_name, "_", args$scanmode, ".RData"))
22 changes: 22 additions & 0 deletions DIMS/AveragePeaks.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
process AveragePeaks {
tag "DIMS AveragePeaks"
label 'AveragePeaks'
container = 'docker://umcugenbioinf/dims:1.4'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
path(rdata_files)
tuple val(sample_id), val(tech_reps), val(scanmode)

output:
path 'AvgPeaks_*.RData'

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/AveragePeaks.R \
--sample_name $sample_id \
--tech_reps $tech_reps \
--scanmode $scanmode \
--preprocessing_scripts_dir $params.preprocessing_scripts_dir
"""
}
77 changes: 43 additions & 34 deletions DIMS/AverageTechReplicates.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,37 @@
# adapted from 3-AverageTechReplicates.R

# load packages
library("argparse")
library("ggplot2")
library("gridExtra")

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)
parser <- ArgumentParser(description = "AverageTechReplicates")

init_file <- cmd_args[1]
nr_replicates <- as.numeric(cmd_args[2])
run_name <- cmd_args[3]
dims_matrix <- cmd_args[4]
highest_mz_file <- cmd_args[5]
highest_mz <- get(load(highest_mz_file))
breaks_filepath <- cmd_args[6]
thresh2remove <- as.numeric(cmd_args[7])
parser$add_argument("--init_filepath", dest = "init_file",
help = "File path for the init RData file", required = TRUE)
parser$add_argument("-n", "--nr_replicates", dest = "nr_replicates", type = "integer",
help = "Number of replicates", required = TRUE)
parser$add_argument("--run_name", dest = "run_name",
help = "The run name/analysis ID", required = TRUE)
parser$add_argument("--matrix", dest = "dims_matrix",
help = "The matrix used, e.g. Plasma, Research, ...")
parser$add_argument("--highest_mz_file", dest = "highest_mz_file",
help = "File path for the highest Mz RData file", required = TRUE)
parser$add_argument("--breaks_filepath", dest = "breaks_filepath",
help = "File path for the breaks RData file", required = TRUE)

args <- parser$parse_args()

highest_mz <- get(load(args$highest_mz_file))
thresh2remove <- 1000000000

remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
# collect list of samples to remove from replication pattern
remove_from_group <- NULL
for (sample_nr in 1:length(repl_pattern)){
for (sample_nr in seq_along(repl_pattern)){
repl_pattern_1sample <- repl_pattern[[sample_nr]]
remove <- NULL
for (file_nr in 1:length(repl_pattern_1sample)) {
for (file_nr in seq_along(repl_pattern_1sample)) {
if (repl_pattern_1sample[file_nr] %in% bad_samples) {
remove <- c(remove, file_nr)
}
Expand All @@ -41,11 +50,11 @@ remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
}

# load init_file: contains repl_pattern
load(init_file)
load(args$init_file)

# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
load(breaks_filepath)
load(args$breaks_filepath)

# lower the threshold for non Plasma matrices
if (dims_matrix != "Plasma") {
Expand All @@ -64,15 +73,15 @@ if (highest_mz > 700) {
remove_neg <- NULL
remove_pos <- NULL
cat("Pklist sum threshold to remove technical replicate:", thresh2remove, "\n")
for (sample_nr in 1:length(repl_pattern)) {
for (sample_nr in seq_along(repl_pattern)) {
tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
tech_reps_array_pos <- NULL
tech_reps_array_neg <- NULL
sum_neg <- 0
sum_pos <- 0
nr_pos <- 0
nr_neg <- 0
for (file_nr in 1:length(tech_reps)) {
for (file_nr in seq_along(tech_reps)) {
load(paste(tech_reps[file_nr], ".RData", sep = ""))
cat("\n\nParsing", tech_reps[file_nr])
# negative scanmode
Expand All @@ -96,7 +105,7 @@ for (sample_nr in 1:length(repl_pattern)) {
}
tech_reps_array_pos <- cbind(tech_reps_array_pos, peak_list$pos)
}
# save to file
# save to file
if (nr_neg != 0) {
sum_neg[, 1] <- sum_neg[, 1] / nr_neg
colnames(sum_neg) <- names(repl_pattern)[sample_nr]
Expand All @@ -109,25 +118,25 @@ for (sample_nr in 1:length(repl_pattern)) {
}
}

pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, args$nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
write.table(
remove_neg,
file = "miss_infusions_negative.txt",
row.names = FALSE,
col.names = FALSE,
remove_neg,
file = "miss_infusions_negative.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)

pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, args$nr_replicates)
repl_pattern_filtered <- pattern_list$pattern
save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
write.table(
remove_pos,
file = "miss_infusions_positive.txt",
row.names = FALSE,
col.names = FALSE,
remove_pos,
file = "miss_infusions_positive.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)

Expand All @@ -150,10 +159,10 @@ for (file in tic_files) {
# create a list with information for all TIC plots
tic_plot_list <- list()
plot_nr <- 0
for (sample_nr in c(1:length(repl_pattern))) {
for (sample_nr in seq_along(repl_pattern)) {
tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
sample_name <- names(repl_pattern)[sample_nr]
for (file_nr in 1:length(tech_reps)) {
for (file_nr in seq_along(tech_reps)) {
plot_nr <- plot_nr + 1
# read file with retention time, intensity and dims_threshold values
repl1_nr <- read.table(paste0(tech_reps[file_nr], "_TIC.txt"))
Expand All @@ -163,7 +172,7 @@ for (sample_nr in c(1:length(repl_pattern))) {
# for replicates with bad TIC, determine what color the border of the plot should be
bad_color_pos <- tech_reps[file_nr] %in% remove_pos
bad_color_neg <- tech_reps[file_nr] %in% remove_neg
if (bad_color_neg & bad_color_pos) {
if (bad_color_neg && bad_color_pos) {
plot_color <- "#F8766D"
} else if (bad_color_pos) {
plot_color <- "#ED8141"
Expand Down Expand Up @@ -191,19 +200,19 @@ for (sample_nr in c(1:length(repl_pattern))) {
}

# create a layout matrix dependent on number of replicates
layout <- matrix(1:(10 * nr_replicates), 10, nr_replicates, TRUE)
layout <- matrix(1:(10 * args$nr_replicates), 10, args$nr_replicates, TRUE)
# put TIC plots in matrix
tic_plot_pdf <- marrangeGrob(
grobs = tic_plot_list,
nrow = 10, ncol = nr_replicates,
nrow = 10, ncol = args$nr_replicates,
layout_matrix = layout,
top = quote(paste(
"TICs of run", run_name,
"TICs of run", args$run_name,
" \n colors: red = both modes misinjection, orange = pos mode misinjection, purple = neg mode misinjection \n ",
g, "/", npages
))
)

# save to file
ggsave(filename = paste0(run_name, "_TICplots.pdf"),
ggsave(filename = paste0(args$run_name, "_TICplots.pdf"),
tic_plot_pdf, width = 21, height = 29.7, units = "cm")
16 changes: 8 additions & 8 deletions DIMS/AverageTechReplicates.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process AverageTechReplicates {
tag "DIMS AverageTechReplicates"
label 'AverageTechReplicates'
container = 'docker://umcugenbioinf/dims:1.3'
container = 'docker://umcugenbioinf/dims:1.4'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
Expand All @@ -23,13 +23,13 @@ process AverageTechReplicates {

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/AverageTechReplicates.R $init_file \
$params.nr_replicates \
$analysis_id \
$matrix \
$highest_mz_file \
$breaks_file \
$params.threshold_tics
Rscript ${baseDir}/CustomModules/DIMS/AverageTechReplicates.R \\
--init_filepath $init_file \\
--nr_replicates $params.nr_replicates \\
--run_name $analysis_id \\
--matrix $matrix \\
--highest_mz_file $highest_mz_file \\
--breaks_filepath $breaks_file
"""
}

Expand Down
Loading