Skip to content

Commit 1dbc7a9

Browse files
committed
add advanced topics folder
1 parent d7a2909 commit 1dbc7a9

File tree

9 files changed

+3271
-0
lines changed

9 files changed

+3271
-0
lines changed

notebooks/advanced_topics/clinical_data_intro.ipynb

Lines changed: 779 additions & 0 deletions
Large diffs are not rendered by default.

notebooks/advanced_topics/idc_api.ipynb

Lines changed: 1095 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
import array
2+
import pydicom
3+
from pydicom.sequence import Sequence
4+
from pydicom import Dataset , DataElement
5+
from pydicom.dataset import FileMetaDataset
6+
from pydicom.uid import UID
7+
import json
8+
import logging
9+
import importlib
10+
import boto3
11+
from openjpeg import decode
12+
import io
13+
import sys
14+
import time
15+
import os
16+
import gzip
17+
18+
logging.basicConfig( level="INFO" )
19+
20+
class MedicalImaging:
21+
def __init__(self):
22+
session = boto3.Session()
23+
self.client = boto3.client('medical-imaging')
24+
25+
def stopwatch(self, start_time, end_time):
26+
time_lapsed = end_time - start_time
27+
return time_lapsed*1000
28+
29+
30+
def getMetadata(self, datastoreId, imageSetId):
31+
start_time = time.time()
32+
dicom_study_metadata = self.client.get_image_set_metadata(datastoreId=datastoreId , imageSetId=imageSetId )
33+
json_study_metadata = json.loads( gzip.decompress(dicom_study_metadata["imageSetMetadataBlob"].read()) )
34+
end_time = time.time()
35+
logging.info(f"Metadata fetch : {self.stopwatch(start_time,end_time)} ms")
36+
return json_study_metadata
37+
38+
39+
def listDatastores(self):
40+
start_time = time.time()
41+
response = self.client.list_datastores()
42+
end_time = time.time()
43+
logging.info(f"List Datastores : {self.stopwatch(start_time,end_time)} ms")
44+
return response
45+
46+
47+
def createDatastore(self, datastoreName):
48+
start_time = time.time()
49+
response = self.client.create_datastore(datastoreName=datastoreName)
50+
end_time = time.time()
51+
logging.info(f"Create Datastore : {self.stopwatch(start_time,end_time)} ms")
52+
return response
53+
54+
55+
def getDatastore(self, datastoreId):
56+
start_time = time.time()
57+
response = self.client.get_datastore(datastoreId=datastoreId)
58+
end_time = time.time()
59+
logging.info(f"Get Datastore : {self.stopwatch(start_time,end_time)} ms")
60+
return response
61+
62+
63+
def deleteDatastore(self, datastoreId):
64+
start_time = time.time()
65+
response = self.client.delete_datastore(datastoreId=datastoreId)
66+
end_time = time.time()
67+
logging.info(f"Delete Datastore : {self.stopwatch(start_time,end_time)} ms")
68+
return response
69+
70+
71+
def deleteImageSet(self, datastoreId, imageSetId):
72+
start_time = time.time()
73+
response = self.client.delete_image_set(datastoreId=datastoreId, imageSetId=imageSetId)
74+
end_time = time.time()
75+
logging.info(f"Delete ImageSet : {self.stopwatch(start_time,end_time)} ms")
76+
return response
77+
78+
79+
def startImportJob(self, datastoreId, IamRoleArn, inputS3, outputS3):
80+
start_time = time.time()
81+
response = self.client.start_dicom_import_job(
82+
datastoreId=datastoreId,
83+
dataAccessRoleArn = IamRoleArn,
84+
inputS3Uri = inputS3,
85+
outputS3Uri = outputS3,
86+
clientToken = "demoClient"
87+
)
88+
end_time = time.time()
89+
logging.info(f"Start Import Job : {self.stopwatch(start_time,end_time)} ms")
90+
return response
91+
92+
93+
def getImportJob(self, datastoreId, jobId):
94+
start_time = time.time()
95+
response = self.client.get_dicom_import_job(datastoreId=datastoreId, jobId=jobId)
96+
end_time = time.time()
97+
logging.info(f"Get Import Job : {self.stopwatch(start_time,end_time)} ms")
98+
return response
99+
100+
101+
def getFramePixels(self, datastoreId, imageSetId, imageFrameId):
102+
start_time = time.time()
103+
res = self.client.get_image_frame(
104+
datastoreId=datastoreId,
105+
imageSetId=imageSetId,
106+
imageFrameInformation={
107+
'imageFrameId': imageFrameId
108+
})
109+
end_time = time.time()
110+
logging.debug(f"Frame fetch : {self.stopwatch(start_time,end_time)} ms")
111+
start_time = time.time()
112+
b = io.BytesIO()
113+
b.write(res['imageFrameBlob'].read())
114+
b.seek(0)
115+
d = decode(b)
116+
end_time = time.time()
117+
logging.debug(f"Frame decode : {self.stopwatch(start_time,end_time)} ms")
118+
return d
119+
120+
def getDICOMdataset(self, datastoreId, imageSetId):
121+
logging.debug("Reading the JSON metadata file")
122+
json_dicom_header = self.getMetadata(datastoreId , imageSetId)
123+
124+
vrlist = []
125+
sop_instances = []
126+
127+
file_meta = FileMetaDataset()
128+
file_meta.MediaStorageSOPClassUID = UID('1.2.840.10008.5.1.4.1.1.1') ## Media Storage SOP Class UID, e.g. "1.2.840.10008.5.1.4.1.1.88.34" for Comprehensive 3D SR IOD.
129+
file_meta.MediaStorageSOPInstanceUID = UID("1.3.51.5145.5142.20010109.1105627.1.0.1")
130+
file_meta.ImplementationClassUID = UID("1.2.826.0.1.3680043.9.3811.2.0.1")
131+
file_meta.TransferSyntaxUID = UID('1.2.840.10008.1.2.1') # Made up. Not registered.
132+
133+
logging.debug("Reading the Pixels")
134+
for series in json_dicom_header["Study"]["Series"]:
135+
for instances in json_dicom_header["Study"]["Series"][series]["Instances"]:
136+
ds = Dataset()
137+
ds.file_meta = file_meta
138+
139+
PatientLevel = json_dicom_header["Patient"]["DICOM"]
140+
self.getTags(PatientLevel, ds, vrlist)
141+
StudyLevel = json_dicom_header["Study"]["DICOM"]
142+
self.getTags(StudyLevel, ds, vrlist)
143+
self.getDICOMVRs(json_dicom_header["Study"]["Series"][series]["Instances"][instances]["DICOMVRs"] , vrlist)
144+
self.getTags( json_dicom_header["Study"]["Series"][series]["Instances"][instances]["DICOM"] , ds, vrlist)
145+
self.getTags(json_dicom_header["Study"]["Series"][series]["DICOM"], ds, vrlist)
146+
147+
ds.file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
148+
ds.file_meta.MediaStorageSOPInstanceUID = UID(instances)
149+
ds.is_little_endian = True
150+
ds.is_implicit_VR = False
151+
152+
frameId = json_dicom_header["Study"]["Series"][series]["Instances"][instances]["ImageFrames"][0]["ID"]
153+
pixels = self.getFramePixels(datastoreId, json_dicom_header["ImageSetID"], frameId)
154+
155+
start_time = time.time()
156+
ds.PixelData = pixels.tobytes()
157+
sop_instances.append(ds)
158+
vrlist.clear()
159+
end_time = time.time()
160+
logging.debug(f"Outpout save : {self.stopwatch(start_time,end_time)} ms")
161+
return sop_instances
162+
163+
def getDICOMVRs(self, taglevel, vrlist):
164+
for theKey in taglevel:
165+
vrlist.append( [ theKey , taglevel[theKey] ])
166+
logging.debug(f"[getDICOMVRs] - List of private tags VRs: {vrlist}\r\n")
167+
168+
169+
def getTags(self, tagLevel, ds, vrlist):
170+
for theKey in tagLevel:
171+
if theKey in ['PrivateCreatorID', 'FileMetaInformationVersion', '00291203']:
172+
continue
173+
try:
174+
try:
175+
tagvr = pydicom.datadict.dictionary_VR(theKey)
176+
except: #In case the vr is not in the pydicom dictionnary, it might be a private tag , listed in the vrlist
177+
tagvr = None
178+
for vr in vrlist:
179+
if theKey == vr[0]:
180+
tagvr = vr[1]
181+
datavalue=tagLevel[theKey]
182+
#print(f"{tagvr} {theKey} : {datavalue}")
183+
if(tagvr == 'SQ'):
184+
logging.debug(f"{theKey} : {tagLevel[theKey]} , {vrlist}")
185+
seqs = []
186+
for underSeq in tagLevel[theKey]:
187+
seqds = Dataset()
188+
self.getTags(underSeq, seqds, vrlist)
189+
seqs.append(seqds)
190+
datavalue = Sequence(seqs)
191+
continue
192+
if(tagvr == 'US or SS'):
193+
datavalue=tagLevel[theKey]
194+
if (int(datavalue) > 32767):
195+
tagvr = 'US'
196+
if( tagvr == 'OB'):
197+
datavalue = self.getOBVRTagValue(tagLevel[theKey] )
198+
199+
data_element = DataElement(theKey , tagvr , datavalue )
200+
if data_element.tag.group != 2:
201+
try:
202+
if (int(data_element.tag.group) % 2) == 0 : # we are skipping all the private tags
203+
ds.add(data_element)
204+
except:
205+
continue
206+
except Exception as err:
207+
logging.warning(f"[HLIDataDICOMizer][getTags] - {err} for Key: {theKey}")
208+
continue
209+
210+
211+
212+
def getOBVRTagValue(self, datalist):
213+
bytevals = []
214+
for byteval in datalist:
215+
bytevals.append(int(byteval))
216+
OBArray = bytearray(bytevals)
217+
return bytes(OBArray)
218+

0 commit comments

Comments
 (0)