Skip to content

Commit ac6ec7b

Browse files
bluna301MMelQin
authored andcommitted
duplicate ipps non-loaded (#535)
Signed-off-by: bluna301 <luna.bryanr@gmail.com>
1 parent 9129771 commit ac6ec7b

File tree

1 file changed

+93
-30
lines changed

1 file changed

+93
-30
lines changed

monai/deploy/operators/dicom_data_loader_operator.py

Lines changed: 93 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2021-2023 MONAI Consortium
1+
# Copyright 2021-2025 MONAI Consortium
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -12,7 +12,9 @@
1212
import logging
1313
import os
1414
from pathlib import Path
15-
from typing import List
15+
from typing import Dict, List, Optional, Tuple, cast
16+
17+
from pydicom.dataset import Dataset
1618

1719
from monai.deploy.core import ConditionType, Fragment, Operator, OperatorSpec
1820
from monai.deploy.core.domain.dicom_series import DICOMSeries
@@ -163,6 +165,7 @@ def _load_data(self, files: List[str]):
163165
study_dict = {}
164166
series_dict = {}
165167
sop_instances = []
168+
sop_map: Dict[Tuple[str, str], Dict[Optional[Tuple[float, float, float]], List[Dataset]]] = {}
166169

167170
for file in files:
168171
try:
@@ -171,34 +174,94 @@ def _load_data(self, files: List[str]):
171174
self._logger.warn(f"Ignored {file}, reason being: {ex}")
172175

173176
for sop_instance in sop_instances:
174-
study_instance_uid = sop_instance[0x0020, 0x000D].value.name # name is the UID as str
175-
176-
# First need to eliminate the SOP instances whose SOP Class is to be ignored.
177-
if "SOPInstanceUID" not in sop_instance:
178-
self._logger.warn("Instance ignored due to missing SOP instance UID tag")
179-
continue
180-
sop_instance_uid = sop_instance["SOPInstanceUID"].value
181-
if "SOPClassUID" not in sop_instance:
182-
self._logger.warn(f"Instance ignored due to missing SOP Class UID tag, {sop_instance_uid}")
183-
continue
184-
if sop_instance["SOPClassUID"].value in DICOMDataLoaderOperator.SOP_CLASSES_TO_IGNORE:
185-
self._logger.warn(f"Instance ignored for being in the ignored class, {sop_instance_uid}")
186-
continue
187-
188-
if study_instance_uid not in study_dict:
189-
study = DICOMStudy(study_instance_uid)
190-
self.populate_study_attributes(study, sop_instance)
191-
study_dict[study_instance_uid] = study
192-
193-
series_instance_uid = sop_instance[0x0020, 0x000E].value.name # name is the UID as str
194-
195-
if series_instance_uid not in series_dict:
196-
series = DICOMSeries(series_instance_uid)
197-
series_dict[series_instance_uid] = series
198-
self.populate_series_attributes(series, sop_instance)
199-
study_dict[study_instance_uid].add_series(series)
200-
201-
series_dict[series_instance_uid].add_sop_instance(sop_instance)
177+
try:
178+
study_instance_uid = sop_instance[0x0020, 0x000D].value.name # name is the UID as str
179+
180+
# First need to eliminate the SOP instances whose SOP Class is to be ignored.
181+
if "SOPInstanceUID" not in sop_instance:
182+
self._logger.warn("Instance ignored due to missing SOP instance UID tag")
183+
continue
184+
sop_instance_uid = sop_instance["SOPInstanceUID"].value
185+
if "SOPClassUID" not in sop_instance:
186+
self._logger.warn(f"Instance ignored due to missing SOP Class UID tag, {sop_instance_uid}")
187+
continue
188+
if sop_instance["SOPClassUID"].value in DICOMDataLoaderOperator.SOP_CLASSES_TO_IGNORE:
189+
self._logger.warn(f"Instance ignored for being in the ignored class, {sop_instance_uid}")
190+
continue
191+
192+
if study_instance_uid not in study_dict:
193+
study = DICOMStudy(study_instance_uid)
194+
self.populate_study_attributes(study, sop_instance)
195+
study_dict[study_instance_uid] = study
196+
197+
series_instance_uid = sop_instance[0x0020, 0x000E].value.name # name is the UID as str
198+
199+
if series_instance_uid not in series_dict:
200+
series = DICOMSeries(series_instance_uid)
201+
series_dict[series_instance_uid] = series
202+
self.populate_series_attributes(series, sop_instance)
203+
study_dict[study_instance_uid].add_series(series)
204+
205+
# Prepare sop_map entry
206+
series_key = (study_instance_uid, series_instance_uid)
207+
sop_map.setdefault(series_key, {})
208+
ipp = sop_instance.get("ImagePositionPatient", None)
209+
if ipp is not None:
210+
# Convert IPP to tuple
211+
ipp_tuple = cast(Tuple[float, float, float], tuple(float(v) for v in ipp))
212+
else:
213+
# Non-image files will be missing IPP; store SOP instance under "None" key, move on to next SOP instance
214+
sop_map[series_key].setdefault(ipp, []).append(sop_instance)
215+
continue
216+
217+
sop_list = sop_map[series_key].setdefault(ipp_tuple, [])
218+
219+
if not sop_list:
220+
# First occurrence of this spatial position — store the SOP instance
221+
sop_list.append(sop_instance)
222+
else:
223+
# Duplicate spatial location found — compare AcquisitionNumbers (if absent, set to -1)
224+
exist = sop_list[0]
225+
exist_acq_num = int(exist.get("AcquisitionNumber", -1))
226+
curr_acq_num = int(sop_instance.get("AcquisitionNumber", -1))
227+
if curr_acq_num > exist_acq_num:
228+
# Current SOP instance AcquisitionNumber is greater - replace existing SOP instance
229+
self._logger.info(
230+
f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP {ipp_tuple} "
231+
f"in Series {series_instance_uid}; removed SOP instance with lower AcquisitionNumber "
232+
f"({curr_acq_num} < {exist_acq_num})"
233+
)
234+
sop_list[0] = sop_instance
235+
elif curr_acq_num < exist_acq_num:
236+
# Existing SOP instance AcquisitionNumber is greater - don't store current SOP instance
237+
self._logger.info(
238+
f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP {ipp_tuple} "
239+
f"in Series {series_instance_uid}; kept SOP instance with higher AcquisitionNumber "
240+
f"({exist_acq_num} > {curr_acq_num})"
241+
)
242+
elif curr_acq_num == -1:
243+
# AcquisitionNumber tag is absent for compared SOP instances - don't store current SOP instance
244+
self._logger.info(
245+
f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP {ipp_tuple} "
246+
f"in Series {series_instance_uid}; AcquisitionNumber tags are absent"
247+
)
248+
else:
249+
# AcquisitionNumber tag values are equal for compared SOP instances - don't store current SOP instance
250+
self._logger.info(
251+
f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP {ipp_tuple} "
252+
f"in Series {series_instance_uid}; AcquisitionNumber tag values are equal "
253+
f"({exist_acq_num} = {curr_acq_num})"
254+
)
255+
256+
except Exception as ex:
257+
self._logger.warn(f"Error parsing SOP Instance: {ex}")
258+
259+
# Add unique SOPs to series_dict following potential duplication removal
260+
for (_, series_uid), ipp_dict in sop_map.items():
261+
for _, sop_list in ipp_dict.items():
262+
for sop_instance in sop_list:
263+
series_dict[series_uid].add_sop_instance(sop_instance)
264+
202265
return list(study_dict.values())
203266

204267
def populate_study_attributes(self, study, sop_instance):

0 commit comments

Comments
 (0)