1
- # Copyright 2021-2023 MONAI Consortium
1
+ # Copyright 2021-2025 MONAI Consortium
2
2
# Licensed under the Apache License, Version 2.0 (the "License");
3
3
# you may not use this file except in compliance with the License.
4
4
# You may obtain a copy of the License at
12
12
import logging
13
13
import os
14
14
from pathlib import Path
15
- from typing import List
15
+ from typing import Dict , List , Optional , Tuple , cast
16
+
17
+ from pydicom .dataset import Dataset
16
18
17
19
from monai .deploy .core import ConditionType , Fragment , Operator , OperatorSpec
18
20
from monai .deploy .core .domain .dicom_series import DICOMSeries
@@ -163,6 +165,7 @@ def _load_data(self, files: List[str]):
163
165
study_dict = {}
164
166
series_dict = {}
165
167
sop_instances = []
168
+ sop_map : Dict [Tuple [str , str ], Dict [Optional [Tuple [float , float , float ]], List [Dataset ]]] = {}
166
169
167
170
for file in files :
168
171
try :
@@ -171,34 +174,94 @@ def _load_data(self, files: List[str]):
171
174
self ._logger .warn (f"Ignored { file } , reason being: { ex } " )
172
175
173
176
for sop_instance in sop_instances :
174
- study_instance_uid = sop_instance [0x0020 , 0x000D ].value .name # name is the UID as str
175
-
176
- # First need to eliminate the SOP instances whose SOP Class is to be ignored.
177
- if "SOPInstanceUID" not in sop_instance :
178
- self ._logger .warn ("Instance ignored due to missing SOP instance UID tag" )
179
- continue
180
- sop_instance_uid = sop_instance ["SOPInstanceUID" ].value
181
- if "SOPClassUID" not in sop_instance :
182
- self ._logger .warn (f"Instance ignored due to missing SOP Class UID tag, { sop_instance_uid } " )
183
- continue
184
- if sop_instance ["SOPClassUID" ].value in DICOMDataLoaderOperator .SOP_CLASSES_TO_IGNORE :
185
- self ._logger .warn (f"Instance ignored for being in the ignored class, { sop_instance_uid } " )
186
- continue
187
-
188
- if study_instance_uid not in study_dict :
189
- study = DICOMStudy (study_instance_uid )
190
- self .populate_study_attributes (study , sop_instance )
191
- study_dict [study_instance_uid ] = study
192
-
193
- series_instance_uid = sop_instance [0x0020 , 0x000E ].value .name # name is the UID as str
194
-
195
- if series_instance_uid not in series_dict :
196
- series = DICOMSeries (series_instance_uid )
197
- series_dict [series_instance_uid ] = series
198
- self .populate_series_attributes (series , sop_instance )
199
- study_dict [study_instance_uid ].add_series (series )
200
-
201
- series_dict [series_instance_uid ].add_sop_instance (sop_instance )
177
+ try :
178
+ study_instance_uid = sop_instance [0x0020 , 0x000D ].value .name # name is the UID as str
179
+
180
+ # First need to eliminate the SOP instances whose SOP Class is to be ignored.
181
+ if "SOPInstanceUID" not in sop_instance :
182
+ self ._logger .warn ("Instance ignored due to missing SOP instance UID tag" )
183
+ continue
184
+ sop_instance_uid = sop_instance ["SOPInstanceUID" ].value
185
+ if "SOPClassUID" not in sop_instance :
186
+ self ._logger .warn (f"Instance ignored due to missing SOP Class UID tag, { sop_instance_uid } " )
187
+ continue
188
+ if sop_instance ["SOPClassUID" ].value in DICOMDataLoaderOperator .SOP_CLASSES_TO_IGNORE :
189
+ self ._logger .warn (f"Instance ignored for being in the ignored class, { sop_instance_uid } " )
190
+ continue
191
+
192
+ if study_instance_uid not in study_dict :
193
+ study = DICOMStudy (study_instance_uid )
194
+ self .populate_study_attributes (study , sop_instance )
195
+ study_dict [study_instance_uid ] = study
196
+
197
+ series_instance_uid = sop_instance [0x0020 , 0x000E ].value .name # name is the UID as str
198
+
199
+ if series_instance_uid not in series_dict :
200
+ series = DICOMSeries (series_instance_uid )
201
+ series_dict [series_instance_uid ] = series
202
+ self .populate_series_attributes (series , sop_instance )
203
+ study_dict [study_instance_uid ].add_series (series )
204
+
205
+ # Prepare sop_map entry
206
+ series_key = (study_instance_uid , series_instance_uid )
207
+ sop_map .setdefault (series_key , {})
208
+ ipp = sop_instance .get ("ImagePositionPatient" , None )
209
+ if ipp is not None :
210
+ # Convert IPP to tuple
211
+ ipp_tuple = cast (Tuple [float , float , float ], tuple (float (v ) for v in ipp ))
212
+ else :
213
+ # Non-image files will be missing IPP; store SOP instance under "None" key, move on to next SOP instance
214
+ sop_map [series_key ].setdefault (ipp , []).append (sop_instance )
215
+ continue
216
+
217
+ sop_list = sop_map [series_key ].setdefault (ipp_tuple , [])
218
+
219
+ if not sop_list :
220
+ # First occurrence of this spatial position — store the SOP instance
221
+ sop_list .append (sop_instance )
222
+ else :
223
+ # Duplicate spatial location found — compare AcquisitionNumbers (if absent, set to -1)
224
+ exist = sop_list [0 ]
225
+ exist_acq_num = int (exist .get ("AcquisitionNumber" , - 1 ))
226
+ curr_acq_num = int (sop_instance .get ("AcquisitionNumber" , - 1 ))
227
+ if curr_acq_num > exist_acq_num :
228
+ # Current SOP instance AcquisitionNumber is greater - replace existing SOP instance
229
+ self ._logger .info (
230
+ f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP { ipp_tuple } "
231
+ f"in Series { series_instance_uid } ; removed SOP instance with lower AcquisitionNumber "
232
+ f"({ curr_acq_num } < { exist_acq_num } )"
233
+ )
234
+ sop_list [0 ] = sop_instance
235
+ elif curr_acq_num < exist_acq_num :
236
+ # Existing SOP instance AcquisitionNumber is greater - don't store current SOP instance
237
+ self ._logger .info (
238
+ f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP { ipp_tuple } "
239
+ f"in Series { series_instance_uid } ; kept SOP instance with higher AcquisitionNumber "
240
+ f"({ exist_acq_num } > { curr_acq_num } )"
241
+ )
242
+ elif curr_acq_num == - 1 :
243
+ # AcquisitionNumber tag is absent for compared SOP instances - don't store current SOP instance
244
+ self ._logger .info (
245
+ f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP { ipp_tuple } "
246
+ f"in Series { series_instance_uid } ; AcquisitionNumber tags are absent"
247
+ )
248
+ else :
249
+ # AcquisitionNumber tag values are equal for compared SOP instances - don't store current SOP instance
250
+ self ._logger .info (
251
+ f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP { ipp_tuple } "
252
+ f"in Series { series_instance_uid } ; AcquisitionNumber tag values are equal "
253
+ f"({ exist_acq_num } = { curr_acq_num } )"
254
+ )
255
+
256
+ except Exception as ex :
257
+ self ._logger .warn (f"Error parsing SOP Instance: { ex } " )
258
+
259
+ # Add unique SOPs to series_dict following potential duplication removal
260
+ for (_ , series_uid ), ipp_dict in sop_map .items ():
261
+ for _ , sop_list in ipp_dict .items ():
262
+ for sop_instance in sop_list :
263
+ series_dict [series_uid ].add_sop_instance (sop_instance )
264
+
202
265
return list (study_dict .values ())
203
266
204
267
def populate_study_attributes (self , study , sop_instance ):
0 commit comments