Skip to content

Commit a93f541

Browse files
author
Den
committed
Update python scripts (minor), fix comments
1 parent 3278e63 commit a93f541

File tree

2 files changed

+131
-131
lines changed

2 files changed

+131
-131
lines changed

notebooks/nsclc-radiomics/demo/data_utils.py

Lines changed: 110 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,43 @@
11
"""
22
----------------------------------------
3-
IDC Radiomics use case (Colab Demo)
3+
IDC Radiomics use case (Colab)
44
5-
useful functions for data handling/proc
5+
Functions for data handling/processing
66
----------------------------------------
77
88
----------------------------------------
99
Author: Dennis Bontempi
10-
Email: dennis_bontempi@dfci.harvard.edu
11-
Modified: 07 OCT 20
10+
Email: dbontempi@bwh.harvard.edu
1211
----------------------------------------
1312
1413
"""
1514

1615
import os
1716
import json
18-
import pydicom
1917
import numpy as np
2018
import SimpleITK as sitk
2119

2220
import subprocess
2321

2422
## ----------------------------------------
2523

26-
# normalise the values of the volume between new_min_val and new_max_val
2724
def normalise_volume(input_volume, new_min_val, new_max_val, old_min_val = None, old_max_val = None):
2825

2926
"""
30-
Normalise a numpy volume intensity in a range between two given values
31-
32-
@params:
33-
input_volume - required : numpy volume to rescale (intensity-wise) in the new range.
34-
new_min_val - required : the lower bound of the new intensity range.
35-
new_max_val - required : the upper bound of the new intensity range.
36-
old_min_val - optional : the lower bound of the old intensity range. Defaults to input_volume's np.min()
37-
old_max_val - optional : the lower bound of the old intensity range. Defaults to input_volume's np.max()
38-
27+
This function normalizes the volume of an image between
28+
a new minimum and maximum value, given the old minimum and maximum values.
29+
30+
Parameters:
31+
- input_volume: a 3D numpy array representing the image to be normalized.
32+
- new_min_val: a float representing the new minimum value for the normalized image.
33+
- new_max_val: a float representing the new maximum value for the normalized image.
34+
- old_min_val: a float representing the old minimum value of the image.
35+
If None, the minimum value of the input_volume will be used.
36+
- old_max_val: a float representing the old maximum value of the image.
37+
If None, the maximum value of the input_volume will be used.
38+
39+
Returns:
40+
- a 3D numpy array representing the normalized image
3941
"""
4042

4143
# make sure the input volume is treated as a float volume
@@ -46,7 +48,6 @@ def normalise_volume(input_volume, new_min_val, new_max_val, old_min_val = None,
4648
curr_min = np.min(input_volume) if old_min_val == None else old_min_val
4749
curr_max = np.max(input_volume) if old_max_val == None else old_max_val
4850

49-
5051
# normalise the values of each voxel between zero and one
5152
zero_to_one_norm = (input_volume - curr_min)/(curr_max - curr_min)
5253

@@ -59,26 +60,20 @@ def normalise_volume(input_volume, new_min_val, new_max_val, old_min_val = None,
5960
def compute_center_of_mass(input_mask):
6061

6162
"""
62-
Compute the center of mass (CoM) of a given binary 3D numpy segmask (fast version, numpy based).
63-
The idea is the following:
64-
- create a 4D vector starting from the 3D, populating each channel as follows:
65-
(x pos, y pos, z pos, mask_value);
66-
- keep only the nonzero values;
67-
- compute an average of the position (CoM) using such triplets (x, y, z).
68-
69-
@params:
70-
input_mask - required : the 3D numpy vector (binary mask) to compute the center of mass of.
71-
72-
@returns:
73-
ctr_mass - a list of three elements storing the coordinates of the CoM
74-
(the axes are the same as the input mask)
75-
76-
"""
63+
This function computes the center of mass (CoM) of a binary 3D mask.
64+
65+
Parameters:
66+
- input_mask: a 3D numpy array representing the binary mask.
67+
68+
Returns:
69+
- a 3D numpy array representing the CoM in (x, y, z) coordinates
7770
78-
# sanity check: mask should be binary (multi-class RT is taken care of during in the export function)
71+
"""
72+
73+
# sanity check: the mask should be binary
7974
assert(len(np.unique(input_mask)) <= 2)
8075

81-
# display and log warning
76+
# display a warning if the mask is empty
8277
if len(np.unique(input_mask)) == 1:
8378
print('WARNING: DICOM RTSTRUCT is empty.')
8479
return [-1, -1, -1]
@@ -103,32 +98,35 @@ def compute_center_of_mass(input_mask):
10398
nonzero_voxels = segmask_4d[np.nonzero(segmask_4d[:, :, :, 3])]
10499

105100
# average the (x, y, z) triplets
106-
ctr_mass = np.average(nonzero_voxels[:, :3], axis = 0)
101+
com = np.average(nonzero_voxels[:, :3], axis = 0)
107102

108-
return ctr_mass
109-
103+
return com
110104

111105
## ----------------------------------------
112106
## ----------------------------------------
113107

114108
def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True):
115109

110+
116111
"""
117-
Compute the crop bounding box indices starting from the CoM and the BBox size.
112+
Computes the bounding box of a segmented mask centered around a center of mass, given a specified box size.
118113
119-
@params:
120-
int_center_of_mass - required : a list of integers (int(CoM)) where CoM is the output
121-
of the function "compute_center_of_mass()"
122-
bbox_size - required : the size of the bounding box along each axis.
123-
z_first - optional : if True, returns the dict as (lon, cor, sag), i.e., (z, y, x),
124-
as opposed to (sag, cor, lon), i.e., (x, y, z) (defaults to True).
125-
126-
@returns:
127-
bbox : a dictionary formatted as follows:
114+
Parameters:
115+
- int_center_of_mass: a tuple or list of three integers representing the center of mass.
116+
- seg_mask_shape: a tuple or list of three integers representing the shape of the segmented mask.
117+
- bbox_size: a tuple or list of three integers representing the size of the bounding box
118+
in each direction (coronal, sagittal, and longitudinal).
119+
- z_first: a boolean indicating whether the longitudinal direction is the first dimension (True)
120+
or the third dimension (False). Defaults to True.
121+
122+
Returns:
123+
- a dictionary with the indices of the first and last voxels in each direction (coronal, sagittal, and longitudinal)
124+
that define the bounding box, formatted as follows:
125+
128126
{
129-
'lon': {'first': 89, 'last': 139}
130-
'cor': {'first': 241, 'last': 291},
131-
'sag': {'first': 150, 'last': 200}
127+
'lon': {'first': 89, 'last': 139}
128+
'cor': {'first': 241, 'last': 291},
129+
'sag': {'first': 150, 'last': 200}
132130
}
133131
"""
134132

@@ -146,8 +144,7 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
146144
lon_idx = 0 if z_first else 2
147145
sag_idx = 2 if z_first else 0
148146

149-
150-
# bounding box if no exception is found
147+
# bounding box if no exception is raised
151148
sag_first = int(int_center_of_mass[sag_idx] - np.round(bbox_size[sag_idx]/2))
152149
sag_last = int(int_center_of_mass[sag_idx] + np.round(bbox_size[sag_idx]/2)) - 1
153150

@@ -156,7 +153,6 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
156153

157154
lon_first = int(int_center_of_mass[lon_idx] - np.round(bbox_size[lon_idx]/2))
158155
lon_last = int(int_center_of_mass[lon_idx] + np.round(bbox_size[lon_idx]/2)) - 1
159-
160156

161157
# print out exceptions
162158
if sag_last > seg_mask_shape[sag_idx] - 1 or sag_first < 0:
@@ -171,7 +167,6 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
171167
print('WARNING: the bounding box size exceeds volume dimensions (lon axis)')
172168
print('Cropping will be performed ignoring the "bbox_size" parameter')
173169

174-
175170
# take care of exceptions where bbox is bigger than the actual volume
176171
sag_first = int(np.max([0, sag_first]))
177172
sag_last = int(np.min([seg_mask_shape[sag_idx] - 1, sag_last]))
@@ -182,7 +177,6 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
182177
lon_first = int(np.max([0, lon_first]))
183178
lon_last = int(np.min([seg_mask_shape[lon_idx] - 1, lon_last]))
184179

185-
186180
# populate the dictionary and return it
187181
bbox['sag']['first'] = sag_first
188182
bbox['sag']['last'] = sag_last
@@ -195,68 +189,27 @@ def get_bbox_dict(int_center_of_mass, seg_mask_shape, bbox_size, z_first = True)
195189

196190
return bbox
197191

198-
199192
## ----------------------------------------
200193
## ----------------------------------------
201194

202-
def get_input_volume(input_ct_nrrd_path):
203-
204-
"""
205-
Should accept path to (ideally 150x150x150) NRRD volumes only, obtained
206-
exploting the pipeline in "lung1_preprocessing.ipynb" using the functions above.
207-
208-
FIXME: add as params also "com_int" and "final_crop_size = (50, 50, 50)"
209-
(handling of volumes that are not 150x150x150?)
210-
211-
"""
212-
213-
sitk_ct_nrdd = sitk.ReadImage(input_ct_nrrd_path)
214-
ct_nrdd = sitk.GetArrayFromImage(sitk_ct_nrdd)
215-
216-
# volume intensity normalisation, as seen in:
217-
# https://github.com/modelhub-ai/deep-prognosis/blob/master/contrib_src/processing.py
218-
ct_nrdd_norm = normalise_volume(input_volume = ct_nrdd,
219-
new_min_val = 0,
220-
new_max_val = 1,
221-
old_min_val = -1024,
222-
old_max_val = 3071)
223-
224-
# FIXME: handle exceptions for volumes that are not 150x150x150
225-
ct_nrdd_norm_crop = ct_nrdd_norm[50:100, 50:100, 50:100]
195+
def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
196+
ct_interpolation = 'linear', output_dtype = "int"):
226197

227198
"""
228-
# FIXME: debug
229-
print(ct_nrdd.shape)
230-
print(ct_nrdd_norm.shape)
231-
print(min_x, max_x)
232-
print(min_y, max_y)
233-
print(min_z, max_z)
234-
print(ct_nrdd_norm_crop.shape)
235-
"""
236-
237-
return ct_nrdd_norm_crop
238199
200+
This function exports a resampled CT scan and a resampled RT structure set from a DICOM CT and a DICOM RT structure set.
239201
240-
## ----------------------------------------
241-
## ----------------------------------------
202+
Parameters:
203+
- dicom_ct_path: a string representing the path of the DICOM CT folder.
204+
- dicom_rt_path: a string representing the path of the DICOM RT structure set folder.
205+
- output_dir: a string representing the path of the output directory.
206+
- pat_id: a string representing the patient ID.
207+
- ct_interpolation: a string representing the interpolation method to be used for the CT scan resampling.
208+
- output_dtype: a string representing the data type of the exported nrrd files.
209+
210+
Returns:
211+
- a dictionary containing the paths of the exported nrrd files.
242212
243-
def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
244-
ct_interpolation = 'linear', output_dtype = "int"):
245-
246-
"""
247-
Convert DICOM CT and RTSTRUCT sequences to NRRD files and resample to 1-mm isotropic
248-
exploiting plastimatch (direct call, bash-like).
249-
250-
@params:
251-
dicom_ct_path - required :
252-
dicom_rt_path - required :
253-
output_dir - required :
254-
pat_id - required :
255-
output_dtype - optional :
256-
257-
@returns:
258-
out_log :
259-
260213
"""
261214

262215
out_log = dict()
@@ -268,7 +221,7 @@ def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
268221
# log the labels of the exported segmasks
269222
rt_struct_list_path = os.path.join(output_dir, pat_id + '_rt_list.txt')
270223

271-
# convert DICOM CT to NRRD file - no resampling
224+
# convert DICOM CT to NRRD file without resampling
272225
bash_command = list()
273226
bash_command += ["plastimatch", "convert"]
274227
bash_command += ["--input", dicom_ct_path]
@@ -279,8 +232,7 @@ def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
279232
out_log['dcm_ct_to_nrrd'] = subprocess.call(bash_command)
280233
print("Done.")
281234

282-
283-
# convert DICOM RTSTRUCT to NRRD file - no resampling
235+
# convert DICOM RTSTRUCT to NRRD file without resampling
284236
bash_command = list()
285237
bash_command += ["plastimatch", "convert"]
286238
bash_command += ["--input", dicom_rt_path]
@@ -318,10 +270,6 @@ def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
318270
out_log['dcm_nrrd_ct_resampling'] = subprocess.call(bash_command)
319271
print("Done.")
320272

321-
# FIXME: log informations about the native volume
322-
#out_log["shape_original"] = list(tmp.)
323-
324-
325273
# resample the NRRD RTSTRUCT file to 1mm isotropic
326274
bash_command = list()
327275
bash_command += ["plastimatch", "resample"]
@@ -335,29 +283,34 @@ def export_res_nrrd_from_dicom(dicom_ct_path, dicom_rt_path, output_dir, pat_id,
335283
out_log['dcm_nrrd_rt_resampling'] = subprocess.call(bash_command)
336284
print("Done.")
337285

338-
339286
# clean up
340287
print("\nRemoving temporary files (DICOM to NRRD, non-resampled)... ", end = '')
341288
os.remove(ct_nrrd_path)
342-
# FIXME: keep the RTSTRUCTs (latest LUNG1 has multiple structures --> additional checks afterwards)?
343-
#os.remove(rt_nrrd_path)
344289
print("Done.")
345290

346-
347291
return out_log
348292

349293

350294
## ----------------------------------------
351295
## ----------------------------------------
352296

353-
def export_com_subvolume(ct_nrrd_path, rt_nrrd_path, crop_size, output_dir, pat_id,
297+
def export_com_subvolume(ct_nrrd_path, rt_nrrd_path, output_dir, pat_id, crop_size = (150, 150, 150),
354298
z_first = True, rm_orig = False):
355299

356300
"""
357-
Main reason: save space
358-
359-
Use plastimatch so that we don't need to load and then write NRRD through python (pynrrd)
360-
301+
This function exports a subvolume centered on the CoM of the GTV and of the same size as the crop_size parameter.
302+
303+
Parameters:
304+
- ct_nrrd_path: a string representing the path to the NRRD CT file.
305+
- rt_nrrd_path: a string representing the path to the NRRD RTSTRUCT file.
306+
- output_dir: a string representing the path of the output directory.
307+
- pat_id: a string representing the patient ID.
308+
- crop_size: a tuple representing the size of the subvolume to be exported. Defaults to 150x150x150.
309+
- z_first: a boolean indicating whether the z-axis is the first or the last in the NRRD files. Defaults to True.
310+
- rm_orig: a boolean indicating whether the original CT and RTSTRUCT files should be removed. Defaults to False.
311+
312+
Returns:
313+
- a dictionary containing the log of the operations performed.
361314
"""
362315

363316
# sanity check
@@ -449,4 +402,37 @@ def export_com_subvolume(ct_nrrd_path, rt_nrrd_path, crop_size, output_dir, pat_
449402

450403
return out_log
451404

405+
406+
## ----------------------------------------
407+
## ----------------------------------------
408+
409+
def get_input_volume(input_ct_nrrd_path):
410+
411+
"""
412+
This function prepares the data to be ingested by the model.
413+
It reads a CT scan nrrd file, normalizes the volume intensity, and crops the volume to a size of 50x50x50.
414+
Here, the input volume is assumed to be a 150x150x150 volume (see the export_com_subvolume function).
415+
416+
Parameters:
417+
- input_ct_nrrd_path: a string representing the file path of the CT scan nrrd file.
418+
419+
Returns:
420+
- a numpy array of shape (50,50,50) representing the cropped and normalized volume.
421+
422+
"""
423+
sitk_ct_nrdd = sitk.ReadImage(input_ct_nrrd_path)
424+
ct_nrdd = sitk.GetArrayFromImage(sitk_ct_nrdd)
425+
426+
# volume intensity normalisation, should follow the same procedure as in the original code:
427+
# https://github.com/modelhub-ai/deep-prognosis/blob/master/contrib_src/processing.py
428+
ct_nrdd_norm = normalise_volume(input_volume = ct_nrdd,
429+
new_min_val = 0,
430+
new_max_val = 1,
431+
old_min_val = -1024,
432+
old_max_val = 3071)
433+
434+
ct_nrdd_norm_crop = ct_nrdd_norm[50:100, 50:100, 50:100]
435+
436+
return ct_nrdd_norm_crop
437+
452438

0 commit comments

Comments
 (0)