11"""
22 ----------------------------------------
3- IDC Radiomics use case (Colab Demo )
3+ IDC Radiomics use case (Colab)
44
5- useful functions for data handling/proc
5+ Functions for data handling/processing
66 ----------------------------------------
77
88 ----------------------------------------
99 Author: Dennis Bontempi
10- Email: dennis_bontempi@dfci.harvard.edu
11- Modified: 07 OCT 20
10+ Email: dbontempi@bwh.harvard.edu
1211 ----------------------------------------
1312
1413"""
1514
1615import os
1716import json
18- import pydicom
1917import numpy as np
2018import SimpleITK as sitk
2119
2220import subprocess
2321
2422## ----------------------------------------
2523
26- # normalise the values of the volume between new_min_val and new_max_val
2724def normalise_volume (input_volume , new_min_val , new_max_val , old_min_val = None , old_max_val = None ):
2825
2926 """
30- Normalise a numpy volume intensity in a range between two given values
31-
32- @params:
33- input_volume - required : numpy volume to rescale (intensity-wise) in the new range.
34- new_min_val - required : the lower bound of the new intensity range.
35- new_max_val - required : the upper bound of the new intensity range.
36- old_min_val - optional : the lower bound of the old intensity range. Defaults to input_volume's np.min()
37- old_max_val - optional : the lower bound of the old intensity range. Defaults to input_volume's np.max()
38-
27+ This function normalizes the volume of an image between
28+ a new minimum and maximum value, given the old minimum and maximum values.
29+
30+ Parameters:
31+ - input_volume: a 3D numpy array representing the image to be normalized.
32+ - new_min_val: a float representing the new minimum value for the normalized image.
33+ - new_max_val: a float representing the new maximum value for the normalized image.
34+ - old_min_val: a float representing the old minimum value of the image.
35+ If None, the minimum value of the input_volume will be used.
36+ - old_max_val: a float representing the old maximum value of the image.
37+ If None, the maximum value of the input_volume will be used.
38+
39+ Returns:
40+ - a 3D numpy array representing the normalized image
3941 """
4042
4143 # make sure the input volume is treated as a float volume
@@ -46,7 +48,6 @@ def normalise_volume(input_volume, new_min_val, new_max_val, old_min_val = None,
4648 curr_min = np .min (input_volume ) if old_min_val == None else old_min_val
4749 curr_max = np .max (input_volume ) if old_max_val == None else old_max_val
4850
49-
5051 # normalise the values of each voxel between zero and one
5152 zero_to_one_norm = (input_volume - curr_min )/ (curr_max - curr_min )
5253
@@ -59,26 +60,20 @@ def normalise_volume(input_volume, new_min_val, new_max_val, old_min_val = None,
5960def compute_center_of_mass (input_mask ):
6061
6162 """
62- Compute the center of mass (CoM) of a given binary 3D numpy segmask (fast version, numpy based).
63- The idea is the following:
64- - create a 4D vector starting from the 3D, populating each channel as follows:
65- (x pos, y pos, z pos, mask_value);
66- - keep only the nonzero values;
67- - compute an average of the position (CoM) using such triplets (x, y, z).
68-
69- @params:
70- input_mask - required : the 3D numpy vector (binary mask) to compute the center of mass of.
71-
72- @returns:
73- ctr_mass - a list of three elements storing the coordinates of the CoM
74- (the axes are the same as the input mask)
75-
76- """
63+ This function computes the center of mass (CoM) of a binary 3D mask.
64+
65+ Parameters:
66+ - input_mask: a 3D numpy array representing the binary mask.
67+
68+ Returns:
69+ - a 3D numpy array representing the CoM in (x, y, z) coordinates
7770
78- # sanity check: mask should be binary (multi-class RT is taken care of during in the export function)
71+ """
72+
73+ # sanity check: the mask should be binary
7974 assert (len (np .unique (input_mask )) <= 2 )
8075
81- # display and log warning
76+ # display a warning if the mask is empty
8277 if len (np .unique (input_mask )) == 1 :
8378 print ('WARNING: DICOM RTSTRUCT is empty.' )
8479 return [- 1 , - 1 , - 1 ]
@@ -103,32 +98,35 @@ def compute_center_of_mass(input_mask):
10398 nonzero_voxels = segmask_4d [np .nonzero (segmask_4d [:, :, :, 3 ])]
10499
105100 # average the (x, y, z) triplets
106- ctr_mass = np .average (nonzero_voxels [:, :3 ], axis = 0 )
101+ com = np .average (nonzero_voxels [:, :3 ], axis = 0 )
107102
108- return ctr_mass
109-
103+ return com
110104
111105## ----------------------------------------
112106## ----------------------------------------
113107
114108def get_bbox_dict (int_center_of_mass , seg_mask_shape , bbox_size , z_first = True ):
115109
110+
116111 """
117- Compute the crop bounding box indices starting from the CoM and the BBox size.
112+ Computes the bounding box of a segmented mask centered around a center of mass, given a specified box size.
118113
119- @params:
120- int_center_of_mass - required : a list of integers (int(CoM)) where CoM is the output
121- of the function "compute_center_of_mass()"
122- bbox_size - required : the size of the bounding box along each axis.
123- z_first - optional : if True, returns the dict as (lon, cor, sag), i.e., (z, y, x),
124- as opposed to (sag, cor, lon), i.e., (x, y, z) (defaults to True).
125-
126- @returns:
127- bbox : a dictionary formatted as follows:
114+ Parameters:
115+ - int_center_of_mass: a tuple or list of three integers representing the center of mass.
116+ - seg_mask_shape: a tuple or list of three integers representing the shape of the segmented mask.
117+ - bbox_size: a tuple or list of three integers representing the size of the bounding box
118+ in each direction (coronal, sagittal, and longitudinal).
119+ - z_first: a boolean indicating whether the longitudinal direction is the first dimension (True)
120+ or the third dimension (False). Defaults to True.
121+
122+ Returns:
123+ - a dictionary with the indices of the first and last voxels in each direction (coronal, sagittal, and longitudinal)
124+ that define the bounding box, formatted as follows:
125+
128126 {
129- 'lon': {'first': 89, 'last': 139}
130- 'cor': {'first': 241, 'last': 291},
131- 'sag': {'first': 150, 'last': 200}
127+ 'lon': {'first': 89, 'last': 139}
128+ 'cor': {'first': 241, 'last': 291},
129+ 'sag': {'first': 150, 'last': 200}
132130 }
133131 """
134132
@@ -146,8 +144,7 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
146144 lon_idx = 0 if z_first else 2
147145 sag_idx = 2 if z_first else 0
148146
149-
150- # bounding box if no exception is found
147+ # bounding box if no exception is raised
151148 sag_first = int (int_center_of_mass [sag_idx ] - np .round (bbox_size [sag_idx ]/ 2 ))
152149 sag_last = int (int_center_of_mass [sag_idx ] + np .round (bbox_size [sag_idx ]/ 2 )) - 1
153150
@@ -156,7 +153,6 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
156153
157154 lon_first = int (int_center_of_mass [lon_idx ] - np .round (bbox_size [lon_idx ]/ 2 ))
158155 lon_last = int (int_center_of_mass [lon_idx ] + np .round (bbox_size [lon_idx ]/ 2 )) - 1
159-
160156
161157 # print out exceptions
162158 if sag_last > seg_mask_shape [sag_idx ] - 1 or sag_first < 0 :
@@ -171,7 +167,6 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
171167 print ('WARNING: the bounding box size exceeds volume dimensions (lon axis)' )
172168 print ('Cropping will be performed ignoring the "bbox_size" parameter' )
173169
174-
175170 # take care of exceptions where bbox is bigger than the actual volume
176171 sag_first = int (np .max ([0 , sag_first ]))
177172 sag_last = int (np .min ([seg_mask_shape [sag_idx ] - 1 , sag_last ]))
@@ -182,7 +177,6 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
182177 lon_first = int (np .max ([0 , lon_first ]))
183178 lon_last = int (np .min ([seg_mask_shape [lon_idx ] - 1 , lon_last ]))
184179
185-
186180 # populate the dictionary and return it
187181 bbox ['sag' ]['first' ] = sag_first
188182 bbox ['sag' ]['last' ] = sag_last
@@ -195,68 +189,27 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
195189
196190 return bbox
197191
198-
199192## ----------------------------------------
200193## ----------------------------------------
201194
202- def get_input_volume (input_ct_nrrd_path ):
203-
204- """
205- Should accept path to (ideally 150x150x150) NRRD volumes only, obtained
206- exploting the pipeline in "lung1_preprocessing.ipynb" using the functions above.
207-
208- FIXME: add as params also "com_int" and "final_crop_size = (50, 50, 50)"
209- (handling of volumes that are not 150x150x150?)
210-
211- """
212-
213- sitk_ct_nrdd = sitk .ReadImage (input_ct_nrrd_path )
214- ct_nrdd = sitk .GetArrayFromImage (sitk_ct_nrdd )
215-
216- # volume intensity normalisation, as seen in:
217- # https://github.com/modelhub-ai/deep-prognosis/blob/master/contrib_src/processing.py
218- ct_nrdd_norm = normalise_volume (input_volume = ct_nrdd ,
219- new_min_val = 0 ,
220- new_max_val = 1 ,
221- old_min_val = - 1024 ,
222- old_max_val = 3071 )
223-
224- # FIXME: handle exceptions for volumes that are not 150x150x150
225- ct_nrdd_norm_crop = ct_nrdd_norm [50 :100 , 50 :100 , 50 :100 ]
195+ def export_res_nrrd_from_dicom (dicom_ct_path , dicom_rt_path , output_dir , pat_id ,
196+ ct_interpolation = 'linear' , output_dtype = "int" ):
226197
227198 """
228- # FIXME: debug
229- print(ct_nrdd.shape)
230- print(ct_nrdd_norm.shape)
231- print(min_x, max_x)
232- print(min_y, max_y)
233- print(min_z, max_z)
234- print(ct_nrdd_norm_crop.shape)
235- """
236-
237- return ct_nrdd_norm_crop
238199
200+ This function exports a resampled CT scan and a resampled RT structure set from a DICOM CT and a DICOM RT structure set.
239201
240- ## ----------------------------------------
241- ## ----------------------------------------
202+ Parameters:
203+ - dicom_ct_path: a string representing the path of the DICOM CT folder.
204+ - dicom_rt_path: a string representing the path of the DICOM RT structure set folder.
205+ - output_dir: a string representing the path of the output directory.
206+ - pat_id: a string representing the patient ID.
207+ - ct_interpolation: a string representing the interpolation method to be used for the CT scan resampling.
208+ - output_dtype: a string representing the data type of the exported nrrd files.
209+
210+ Returns:
211+ - a dictionary containing the paths of the exported nrrd files.
242212
243- def export_res_nrrd_from_dicom (dicom_ct_path , dicom_rt_path , output_dir , pat_id ,
244- ct_interpolation = 'linear' , output_dtype = "int" ):
245-
246- """
247- Convert DICOM CT and RTSTRUCT sequences to NRRD files and resample to 1-mm isotropic
248- exploiting plastimatch (direct call, bash-like).
249-
250- @params:
251- dicom_ct_path - required :
252- dicom_rt_path - required :
253- output_dir - required :
254- pat_id - required :
255- output_dtype - optional :
256-
257- @returns:
258- out_log :
259-
260213 """
261214
262215 out_log = dict ()
@@ -268,7 +221,7 @@ def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
268221 # log the labels of the exported segmasks
269222 rt_struct_list_path = os .path .join (output_dir , pat_id + '_rt_list.txt' )
270223
271- # convert DICOM CT to NRRD file - no resampling
224+ # convert DICOM CT to NRRD file without resampling
272225 bash_command = list ()
273226 bash_command += ["plastimatch" , "convert" ]
274227 bash_command += ["--input" , dicom_ct_path ]
@@ -279,8 +232,7 @@ def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
279232 out_log ['dcm_ct_to_nrrd' ] = subprocess .call (bash_command )
280233 print ("Done." )
281234
282-
283- # convert DICOM RTSTRUCT to NRRD file - no resampling
235+ # convert DICOM RTSTRUCT to NRRD file without resampling
284236 bash_command = list ()
285237 bash_command += ["plastimatch" , "convert" ]
286238 bash_command += ["--input" , dicom_rt_path ]
@@ -318,10 +270,6 @@ def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
318270 out_log ['dcm_nrrd_ct_resampling' ] = subprocess .call (bash_command )
319271 print ("Done." )
320272
321- # FIXME: log informations about the native volume
322- #out_log["shape_original"] = list(tmp.)
323-
324-
325273 # resample the NRRD RTSTRUCT file to 1mm isotropic
326274 bash_command = list ()
327275 bash_command += ["plastimatch" , "resample" ]
@@ -335,29 +283,34 @@ def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
335283 out_log ['dcm_nrrd_rt_resampling' ] = subprocess .call (bash_command )
336284 print ("Done." )
337285
338-
339286 # clean up
340287 print ("\n Removing temporary files (DICOM to NRRD, non-resampled)... " , end = '' )
341288 os .remove (ct_nrrd_path )
342- # FIXME: keep the RTSTRUCTs (latest LUNG1 has multiple structures --> additional checks afterwards)?
343- #os.remove(rt_nrrd_path)
344289 print ("Done." )
345290
346-
347291 return out_log
348292
349293
350294## ----------------------------------------
351295## ----------------------------------------
352296
353- def export_com_subvolume (ct_nrrd_path , rt_nrrd_path , crop_size , output_dir , pat_id ,
297+ def export_com_subvolume (ct_nrrd_path , rt_nrrd_path , output_dir , pat_id , crop_size = ( 150 , 150 , 150 ) ,
354298 z_first = True , rm_orig = False ):
355299
356300 """
357- Main reason: save space
358-
359- Use plastimatch so that we don't need to load and then write NRRD through python (pynrrd)
360-
301+ This function exports a subvolume centered on the CoM of the GTV and of the same size as the crop_size parameter.
302+
303+ Parameters:
304+ - ct_nrrd_path: a string representing the path to the NRRD CT file.
305+ - rt_nrrd_path: a string representing the path to the NRRD RTSTRUCT file.
306+ - output_dir: a string representing the path of the output directory.
307+ - pat_id: a string representing the patient ID.
308+ - crop_size: a tuple representing the size of the subvolume to be exported. Defaults to 150x150x150.
309+ - z_first: a boolean indicating whether the z-axis is the first or the last in the NRRD files. Defaults to True.
310+ - rm_orig: a boolean indicating whether the original CT and RTSTRUCT files should be removed. Defaults to False.
311+
312+ Returns:
313+ - a dictionary containing the log of the operations performed.
361314 """
362315
363316 # sanity check
@@ -449,4 +402,37 @@ def export_com_subvolume(ct_nrrd_path, rt_nrrd_path, crop_size, output_dir, pat_
449402
450403 return out_log
451404
405+
406+ ## ----------------------------------------
407+ ## ----------------------------------------
408+
409+ def get_input_volume (input_ct_nrrd_path ):
410+
411+ """
412+ This function prepares the data to be ingested by the model.
413+ It reads a CT scan nrrd file, normalizes the volume intensity, and crops the volume to a size of 50x50x50.
414+ Here, the input volume is assumed to be a 150x150x150 volume (see the export_com_subvolume function).
415+
416+ Parameters:
417+ - input_ct_nrrd_path: a string representing the file path of the CT scan nrrd file.
418+
419+ Returns:
420+ - a numpy array of shape (50,50,50) representing the cropped and normalized volume.
421+
422+ """
423+ sitk_ct_nrdd = sitk .ReadImage (input_ct_nrrd_path )
424+ ct_nrdd = sitk .GetArrayFromImage (sitk_ct_nrdd )
425+
426+ # volume intensity normalisation, should follow the same procedure as in the original code:
427+ # https://github.com/modelhub-ai/deep-prognosis/blob/master/contrib_src/processing.py
428+ ct_nrdd_norm = normalise_volume (input_volume = ct_nrdd ,
429+ new_min_val = 0 ,
430+ new_max_val = 1 ,
431+ old_min_val = - 1024 ,
432+ old_max_val = 3071 )
433+
434+ ct_nrdd_norm_crop = ct_nrdd_norm [50 :100 , 50 :100 , 50 :100 ]
435+
436+ return ct_nrdd_norm_crop
437+
452438
0 commit comments