-
Notifications
You must be signed in to change notification settings - Fork 0
Feature/refactor_DIMS_GenerateBreaks #95
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
4c937de
093a4e9
1c98907
76e4a56
aa127cd
9c2d41c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,61 +1,23 @@ | ||
| ## adapted from 1-generateBreaksFwhm.HPC.R ## | ||
|
|
||
| # load required package | ||
| suppressPackageStartupMessages(library("xcms")) | ||
|
|
||
| # define parameters | ||
| cmd_args <- commandArgs(trailingOnly = TRUE) | ||
|
|
||
| filepath <- cmd_args[1] | ||
| outdir <- cmd_args[2] | ||
| trim <- as.numeric(cmd_args[3]) | ||
| resol <- as.numeric(cmd_args[4]) | ||
|
|
||
| # initialize | ||
| trim_left_pos <- NULL | ||
| trim_right_pos <- NULL | ||
| trim_left_neg <- NULL | ||
| trim_right_neg <- NULL | ||
| breaks_fwhm <- NULL | ||
| breaks_fwhm_avg <- NULL | ||
| bins <- NULL | ||
| trim <- as.numeric(cmd_args[2]) | ||
| resol <- as.numeric(cmd_args[3]) | ||
|
|
||
| # read in mzML file | ||
| raw_data <- suppressMessages(xcms::xcmsRaw(filepath)) | ||
|
|
||
| # Get time values for positive and negative scans | ||
| pos_times <- raw_data@scantime[raw_data@polarity == "positive"] | ||
| neg_times <- raw_data@scantime[raw_data@polarity == "negative"] | ||
| # get trim parameters and save them to file | ||
| get_trim_parameters(raw_data@scantime, raw_data@polarity, trim) | ||
|
|
||
| # trim (remove) scans at the start and end for positive | ||
| trim_left_pos <- round(pos_times[length(pos_times) * (trim * 1.5)]) # 15% aan het begin | ||
| trim_right_pos <- round(pos_times[length(pos_times) * (1 - (trim * 0.5))]) # 5% aan het eind | ||
| # create breaks of bins for intensities. Bin size is a function of fwhm which is a function of m/z | ||
| get_breaks_for_bins(raw_data$mzrange, resol) | ||
|
|
||
| # trim (remove) scans at the start and end for negative | ||
| trim_left_neg <- round(neg_times[length(neg_times) * trim]) | ||
| trim_right_neg <- round(neg_times[length(neg_times) * (1 - trim)]) | ||
|
|
||
| # Mass range m/z | ||
| low_mz <- raw_data@mzrange[1] | ||
| # Determine maximum m/z and save to file | ||
| high_mz <- raw_data@mzrange[2] | ||
|
|
||
| # determine number of segments (bins) | ||
| nr_segments <- 2 * (high_mz - low_mz) | ||
| segment <- seq(from = low_mz, to = high_mz, length.out = nr_segments + 1) | ||
|
|
||
| # determine start and end of each bin. | ||
| for (i in 1:nr_segments) { | ||
| start_segment <- segment[i] | ||
| end_segment <- segment[i+1] | ||
| resol_mz <- resol * (1 / sqrt(2) ^ (log2(start_segment / 200))) | ||
| fwhm_segment <- start_segment / resol_mz | ||
| breaks_fwhm <- c(breaks_fwhm, seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment)) | ||
| # average the m/z instead of start value | ||
| range <- seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment) | ||
| delta_mz <- range[2] - range[1] | ||
| breaks_fwhm_avg <- c(breaks_fwhm_avg, range + 0.5 * delta_mz) | ||
| } | ||
|
|
||
| # generate output file | ||
| save(breaks_fwhm, breaks_fwhm_avg, trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "breaks.fwhm.RData") | ||
| save(high_mz, file = "highest_mz.RData") | ||
|
|
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,58 @@ | ||||
| # GenerateBreaks functions | ||||
| get_trim_parameters <- function(scantimes, polarities, trim = 0.1) { | ||||
| #' determine the scans per scanmode which are trimmed off; save trim parameters to file | ||||
| #' | ||||
| #' @param scantimes: vector of scan times in seconds | ||||
| #' @param polarities: vector of polarities (positive or negative) | ||||
|
Comment on lines
+5
to
+6
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am missing a description of the trim param |
||||
|
|
||||
| # Get time values for positive and negative scans | ||||
| pos_times <- scantimes[polarities == "positive"] | ||||
| neg_times <- scantimes[polarities == "negative"] | ||||
|
|
||||
| # trim (remove) scans at the start (15%) and end (5%) for positive | ||||
| trim_left_pos <- round(pos_times[length(pos_times) * (trim * 1.5)]) | ||||
| trim_right_pos <- round(pos_times[length(pos_times) * (1 - (trim * 0.5))]) | ||||
|
Comment on lines
+13
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am a bit confused what trim is exactly? |
||||
|
|
||||
| # trim (remove) scans at the start and end (10%) for negative | ||||
| trim_left_neg <- round(neg_times[length(neg_times) * trim]) | ||||
| trim_right_neg <- round(neg_times[length(neg_times) * (1 - trim)]) | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No percentages here? |
||||
|
|
||||
| # save trim parameters to file | ||||
| save(trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "trim_params.RData") | ||||
| } | ||||
|
|
||||
| get_breaks_for_bins <- function(mzrange, resol = 140000) { | ||||
| #' create a vector with the breaks in m/z of bins for intensities | ||||
| #' | ||||
| #' @param mzrange: vector of minimum and maximum m/z values (integeers) | ||||
| #' @param resol: value for resolution (integer) | ||||
|
|
||||
| # initialize | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| breaks_fwhm <- NULL | ||||
| breaks_fwhm_avg <- NULL | ||||
|
|
||||
| # determine number of segments used to create bins | ||||
| nr_segments <- 2 * (mzrange[2] - mzrange[1]) | ||||
| segments <- seq(from = mzrange[1], to = mzrange[2], length.out = nr_segments + 1) | ||||
|
|
||||
| # determine start and end of each bin. fwhm (width of peaks) is assumed to be constant within a segment | ||||
| for (segment_index in 1:nr_segments) { | ||||
| start_segment <- segments[segment_index] | ||||
| end_segment <- segments[segment_index + 1] | ||||
| # determine resolution at given m/z value | ||||
| resol_mz <- resol * (1 / sqrt(2) ^ (log2(start_segment / 200))) | ||||
| # determine fwhm (full width at half maximum) of the peaks in this segment | ||||
| fwhm_segment <- start_segment / resol_mz | ||||
| # determine the breaks within this segment | ||||
| breaks_segment <- seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment) | ||||
| # add breaks for this segment to vector with all breaks | ||||
| breaks_fwhm <- c(breaks_fwhm, seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment)) | ||||
| # get a vector of average m/z instead of start value | ||||
| delta_mz <- breaks_segment[2] - breaks_segment[1] | ||||
| avg_breaks_segment <- breaks_segment + 0.5 * delta_mz | ||||
| breaks_fwhm_avg <- c(breaks_fwhm_avg, avg_breaks_segment) | ||||
| } | ||||
|
|
||||
| # save breaks to file | ||||
| save(breaks_fwhm, breaks_fwhm_avg, file = "breaks.fwhm.RData") | ||||
| } | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in Would it make sense to add a test to see what happens if that is not the case? |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| # unit tests for GenerateBreaks | ||
| # functions: get_trim_parameters and get_breaks_for_bins | ||
|
|
||
| # source all functions for GenerateBreaks | ||
| source("../../preprocessing/generate_breaks_functions.R") | ||
|
|
||
| # test get_trim_parameters | ||
| testthat::test_that("trim parameters are correctly calculated", { | ||
| # create list of scan times to test on: | ||
| test_scantimes <- seq(0, 100, length = 168) | ||
| test_polarities <- c(rep("positive", 84), rep("negative", 84)) | ||
| test_trim <- 0.1 | ||
|
|
||
| # test that the function produces no output except trim_params.RData file | ||
| expect_silent(get_trim_parameters(test_scantimes, test_polarities, test_trim)) | ||
| expect_true(file.exists("./trim_params.RData")) | ||
|
|
||
| # test the parameters in the output RData file | ||
| load("./trim_params.RData") | ||
| test_trim_left_pos <- trim_left_pos | ||
| test_trim_left_neg <- trim_left_neg | ||
| test_trim_right_pos <- trim_right_pos | ||
| test_trim_right_neg <- trim_right_neg | ||
| rm(trim_left_pos, trim_left_neg, trim_right_pos, trim_right_neg) | ||
|
|
||
| # load previously stored values from fixtures | ||
| load("fixtures/test_trim_params.RData") | ||
| expect_equal(test_trim_left_pos, trim_left_pos) | ||
| expect_equal(test_trim_left_neg, trim_left_neg) | ||
| expect_equal(test_trim_right_pos, trim_right_pos) | ||
| expect_equal(test_trim_right_neg, trim_right_neg) | ||
| }) | ||
|
|
||
| # test get_breaks_for_bins | ||
| testthat::test_that("breaks are correctly calculated", { | ||
| # create list of scan times to test on: | ||
| test_mzrange <- c(300, 400) | ||
| test_resol <- 140000 | ||
|
|
||
| # test that the function produces no output except breaks.fwhm.RData file | ||
| expect_silent(get_breaks_for_bins(test_mzrange, test_resol)) | ||
| expect_true(file.exists("./breaks.fwhm.RData")) | ||
|
|
||
| # test the vectors in the output RData file | ||
| load("./breaks.fwhm.RData") | ||
| test_breaks_fwhm <- breaks_fwhm | ||
| test_breaks_fwhm_avg <- breaks_fwhm_avg | ||
| rm(breaks_fwhm, breaks_fwhm_avg) | ||
|
|
||
| # load breaks from fixtures and compare vectors | ||
| load("fixtures/breaks.fwhm.RData") | ||
| expect_equal(test_breaks_fwhm, breaks_fwhm) | ||
| expect_equal(test_breaks_fwhm_avg, breaks_fwhm_avg) | ||
| }) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please refrain from referencing params and baseDir in the script block.
See #94 (comment) and #94 (comment)