Skip to content

Commit de7cfde

Browse files
authored
Merge pull request #255 from mwang87/bug-fixes
adding tests and filters for mass defect on MS2
2 parents e6a5182 + 4eeb776 commit de7cfde

File tree

2 files changed

+49
-2
lines changed

2 files changed

+49
-2
lines changed

massql/msql_engine_filters.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,11 +248,20 @@ def ms2prod_condition(condition, ms1_df, ms2_df, reference_conditions_register):
248248
mz_min = mz - mz_tol
249249
mz_max = mz + mz_tol
250250

251+
massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None))
251252
intensity_mask = _get_intensity_mask(ms2_df, condition.get("qualifiers", None))
252253

253254
ms2_filtered_df = ms2_df[(ms2_df["mz"] > mz_min) &
254255
(ms2_df["mz"] < mz_max) &
255256
intensity_mask]
257+
258+
if massdefect_min > 0 or massdefect_max < 1:
259+
ms2_filtered_df["mz_defect"] = ms2_filtered_df["mz"] - ms2_filtered_df["mz"].astype(int)
260+
261+
ms2_filtered_df = ms2_filtered_df[
262+
(ms2_filtered_df["mz_defect"] > massdefect_min) &
263+
(ms2_filtered_df["mz_defect"] < massdefect_max)
264+
]
256265

257266
# Setting the intensity match register
258267
_set_intensity_register(ms2_filtered_df, reference_conditions_register, condition)
@@ -328,6 +337,7 @@ def ms2nl_condition(condition, ms1_df, ms2_df, reference_conditions_register):
328337
nl_min = mz - mz_tol
329338
nl_max = mz + mz_tol
330339

340+
massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None))
331341
intensity_mask = _get_intensity_mask(ms2_df, condition.get("qualifiers", None))
332342

333343
ms2_filtered_df = ms2_df[
@@ -336,6 +346,14 @@ def ms2nl_condition(condition, ms1_df, ms2_df, reference_conditions_register):
336346
intensity_mask
337347
]
338348

349+
if massdefect_min > 0 or massdefect_max < 1:
350+
ms2_filtered_df["mz_defect"] = (ms2_filtered_df["precmz"] - ms2_filtered_df["mz"]) - (ms2_filtered_df["precmz"] - ms2_filtered_df["mz"]).astype(int)
351+
352+
ms2_filtered_df = ms2_filtered_df[
353+
(ms2_filtered_df["mz_defect"] > massdefect_min) &
354+
(ms2_filtered_df["mz_defect"] < massdefect_max)
355+
]
356+
339357
# Setting the intensity match register
340358
_set_intensity_register(ms2_filtered_df, reference_conditions_register, condition)
341359

@@ -402,6 +420,7 @@ def ms2prec_condition(condition, ms1_df, ms2_df, reference_conditions_register):
402420
(ms2_filtered_df["precmz_defect"] < massdefect_max)
403421
]
404422
else:
423+
massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None))
405424
mz_tol = _get_mz_tolerance(condition.get("qualifiers", None), mz)
406425
mz_min = mz - mz_tol
407426
mz_max = mz + mz_tol
@@ -411,6 +430,14 @@ def ms2prec_condition(condition, ms1_df, ms2_df, reference_conditions_register):
411430
(ms2_df["precmz"] < mz_max)
412431
]
413432

433+
if massdefect_min > 0 or massdefect_max < 1:
434+
ms2_filtered_df["precmz_defect"] = ms2_filtered_df["precmz"] - ms2_filtered_df["precmz"].astype(int)
435+
436+
ms2_filtered_df = ms2_filtered_df[
437+
(ms2_filtered_df["precmz_defect"] > massdefect_min) &
438+
(ms2_filtered_df["precmz_defect"] < massdefect_max)
439+
]
440+
414441
ms2_list.append(ms2_filtered_df)
415442

416443
if len(ms2_list) == 1:
@@ -494,7 +521,7 @@ def ms1_condition(condition, ms1_df, ms2_df, reference_conditions_register, ms1_
494521
(ms1_df["mz"] < mz_max) &
495522
intensity_mask]
496523

497-
if massdefect_min > 0 or massdefect_max < 1:
524+
if massdefect_min != 0 or massdefect_max != 1:
498525
ms1_filtered_df["mz_defect"] = ms1_filtered_df["mz"] - ms1_filtered_df["mz"].astype(int)
499526

500527
ms1_filtered_df = ms1_filtered_df[
@@ -608,7 +635,7 @@ def ms1_filter(condition, ms1_df):
608635
(ms1_df["mz"] < mz_max) &
609636
intensity_mask]
610637

611-
if massdefect_min > 0 or massdefect_max < 1:
638+
if massdefect_min != 0 or massdefect_max != 1:
612639
ms1_filtered_df["mz_defect"] = ms1_filtered_df["mz"] - ms1_filtered_df["mz"].astype(int)
613640

614641
ms1_filtered_df = ms1_filtered_df[

tests/test_query.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,26 @@ def test_massdefect_ANY_query():
615615
results_df = msql_engine.process_query(query, "tests/data/GNPS00002_A3_p.mzML")
616616
assert(len(results_df) == 77)
617617

618+
def test_massdefect_bug_reproduction():
619+
f = "tests/data/GNPS00002_A3_p.mzML"
620+
621+
# MS2PROD MASSDEFECT
622+
exclude_query = "QUERY MS2DATA WHERE MS2PROD=226.18:MASSDEFECT=massdefect(min=0.8,max=0.9)"
623+
df_exclude = msql_engine.process_query(exclude_query, f)
624+
assert len(df_exclude) == 0
625+
626+
# MS1MZ MASSDEFECT Full Range
627+
full_range_query = "QUERY MS1DATA WHERE MS1MZ=226.18:MASSDEFECT=massdefect(min=0.0,max=1.0)"
628+
df_full = msql_engine.process_query(full_range_query, f)
629+
# Get baseline
630+
df_all = msql_engine.process_query("QUERY MS1DATA WHERE MS1MZ=226.18", f)
631+
assert len(df_full) == len(df_all)
632+
633+
# MS1MZ MASSDEFECT Exclusion
634+
exclude_ms1_query = "QUERY MS1DATA WHERE MS1MZ=226.18:MASSDEFECT=massdefect(min=0.8,max=0.9)"
635+
df_ms1_exclude = msql_engine.process_query(exclude_ms1_query, f)
636+
assert len(df_ms1_exclude) == 0
637+
618638
def test_advanced_filters():
619639
query = """
620640
QUERY scansum(MS1DATA) FILTER MS1MZ=ANY:TOLERANCEMZ=35:MASSDEFECT=massdefect(min=0.1332, max=0.2112)

0 commit comments

Comments
 (0)