From 66ae578572f86f9e34a6be669001a27fc7858cf9 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Sat, 17 Aug 2024 00:58:17 +0000
Subject: [PATCH 01/23] add quality check

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/scripts/quality_check.py | 136 ++++++++++++++++++++++
 generation/maisi/scripts/sample.py        |  33 +++---
 2 files changed, 153 insertions(+), 16 deletions(-)
 create mode 100644 generation/maisi/scripts/quality_check.py

diff --git a/generation/maisi/scripts/quality_check.py b/generation/maisi/scripts/quality_check.py
new file mode 100644
index 0000000000..7cc0aaaee7
--- /dev/null
+++ b/generation/maisi/scripts/quality_check.py
@@ -0,0 +1,136 @@
+import os
+import json
+import time
+import nibabel as nib
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import entropy
+
+def get_masked_data(label_data, image_data, labels):
+    """
+    Extracts and returns the image data corresponding to specified labels within a 3D volume.
+
+    This function efficiently masks the `image_data` array based on the provided `labels` in the `label_data` array. 
+    The function handles cases with both a large and small number of labels, optimizing performance accordingly.
+
+    Args:
+        label_data (np.ndarray): A NumPy array containing label data, representing different anatomical 
+                                 regions or classes in a 3D medical image.
+        image_data (np.ndarray): A NumPy array containing the image data from which the relevant regions 
+                                 will be extracted.
+        labels (list of int): A list of integers representing the label values to be used for masking.
+
+    Returns:
+        np.ndarray: A NumPy array containing the elements of `image_data` that correspond to the specified 
+                    labels in `label_data`. If no labels are provided, an empty array is returned.
+
+    Raises:
+        ValueError: If `image_data` and `label_data` do not have the same shape.
+
+    Example:
+        label_int_dict = {"liver": [1], "kidney": [5, 14]}
+        masked_data = get_masked_data(label_data, image_data, label_int_dict["kidney"])
+    """
+    
+    # Check if the shapes of image_data and label_data match
+    if image_data.shape != label_data.shape:
+        raise ValueError(f"Shape mismatch: image_data has shape {image_data.shape}, "
+                         f"but label_data has shape {label_data.shape}. They must be the same.")
+    
+    if not labels:
+        return np.array([])  # Return an empty array if no labels are provided
+
+    # Optimize performance based on the number of labels
+    if len(labels) >= 3:
+        label_set = set(labels)  # Convert labels to a set for faster membership testing
+        mask = np.isin(label_data, list(label_set))
+    else:
+        # Use logical OR to combine masks if the number of labels is small
+        mask = np.zeros_like(label_data, dtype=bool)
+        for label in labels:
+            mask = np.logical_or(mask, label_data == label)
+
+    # Retrieve the masked data
+    masked_data = image_data[mask.astype(bool)]
+
+    return masked_data
+
+    
+def is_outlier(statistics, image_data, label_data, label_int_dict):
+    """
+    Perform a quality check on the generated image by comparing its statistics with precomputed thresholds.
+
+    Args:
+        statistics (dict): Dictionary containing precomputed statistics including mean +/- 3sigma ranges.
+        image_data (np.ndarray): The image data to be checked, typically a 3D NumPy array.
+        label_data (np.ndarray): The label data corresponding to the image, used for masking regions of interest.
+        label_int_dict (dict): Dictionary mapping label names to their corresponding integer lists.
+            e.g., label_int_dict = {"liver": [1], "kidney": [5, 14]}
+
+    Returns:
+        dict: A dictionary with labels as keys, each containing the quality check result,
+              including whether it's an outlier, the median value, and the thresholds used.
+              If no data is found for a label, the median value will be `None` and `is_outlier` will be `False`.
+
+    Example:
+        # Example input data
+        statistics = {
+            "liver": {
+                "sigma_6_low": -21.596463547885904,
+                "sigma_6_high": 156.27881534763367
+            },
+            "kidney": {
+                "sigma_6_low": -15.0,
+                "sigma_6_high": 120.0
+            }
+        }
+        label_int_dict = {
+            "liver": [1],
+            "kidney": [5, 14]
+        }
+        image_data = np.random.rand(100, 100, 100)  # Replace with actual image data
+        label_data = np.zeros((100, 100, 100))  # Replace with actual label data
+        label_data[40:60, 40:60, 40:60] = 1  # Example region for liver
+        label_data[70:90, 70:90, 70:90] = 5  # Example region for kidney
+        result = is_outlier(statistics, image_data, label_data, label_int_dict)
+    """
+    outlier_results = {}
+
+    for label_name, stats in statistics.items():
+        # Get the thresholds from the statistics
+        low_thresh = stats["sigma_6_low"]  # or "sigma_12_low" depending on your needs
+        high_thresh = stats["sigma_6_high"]  # or "sigma_12_high" depending on your needs
+        
+        # Retrieve the corresponding label integers
+        labels = label_int_dict.get(label_name, [])
+        masked_data = get_masked_data(label_data, image_data, labels)
+        masked_data = masked_data[~np.isnan(masked_data)]
+        
+        if len(masked_data) == 0 or masked_data.size == 0:
+            outlier_results[label_name] = {
+                "is_outlier": False,
+                "median_value": None,
+                "low_thresh": low_thresh,
+                "high_thresh": high_thresh
+            }
+            continue
+
+
+        # Compute the median of the masked region
+        median_value = np.median(masked_data)
+        
+        if np.isnan(median_value):
+            median_value = None
+            is_outlier = False
+        else:
+            # Determine if the median value is an outlier
+            is_outlier = median_value < low_thresh or median_value > high_thresh
+        
+        outlier_results[label_name] = {
+            "is_outlier": is_outlier,
+            "median_value": median_value,
+            "low_thresh": low_thresh,
+            "high_thresh": high_thresh
+        }
+
+    return outlier_results
\ No newline at end of file
diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 2257b341e7..9a4d9f25de 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -29,6 +29,7 @@
 from .augmentation import augmentation
 from .find_masks import find_masks
 from .utils import binarize_labels, general_mask_generation_post_process, get_body_region_index_from_mask, remap_labels
+from .quality_check import is_outlier
 
 
 class ReconModel(torch.nn.Module):
@@ -497,7 +498,7 @@ def __init__(
         controllable_anatomy_size,
         image_output_ext=".nii.gz",
         label_output_ext=".nii.gz",
-        quality_check_args=None,
+        real_img_median_statistics="./configs/image_median_statistics.json",
         spacing=[1, 1, 1],
         num_inference_steps=None,
         mask_generation_num_inference_steps=None,
@@ -563,9 +564,15 @@ def __init__(
         self.autoencoder_sliding_window_infer_size = autoencoder_sliding_window_infer_size
         self.autoencoder_sliding_window_infer_overlap = autoencoder_sliding_window_infer_overlap
 
-        # quality check disabled for this version
-        self.quality_check_args = quality_check_args
+        # quality check args
+        self.max_try_time = 5 # if not pass quality check, will try self.max_try_time times
+        with open(real_img_median_statistics, 'r') as json_file:
+            self.median_statistics = json.load(json_file)        
+        self.label_int_dict = {"liver":[1], "spleen":[3], "pancreas":[4], "kidney":[5,14], "lung":[28,29,30,31,31], "brain":[22],
+             "hepatic tumor": [26], "bone lesion":[128], "lung tumor": [23], "colon cancer primaries":[27],"pancreatic tumor":[24],
+             "bone":list(range(33,57))+list(range(63,98))+[120,122,127]}
 
+        # networks
         self.autoencoder.eval()
         self.diffusion_unet.eval()
         self.controlnet.eval()
@@ -669,8 +676,8 @@ def sample_multiple_images(self, num_img):
                     spacing_tensor,
                 )
                 # current quality always return True
-                pass_quality_check = self.quality_check(synthetic_images)
-                if pass_quality_check or try_time > 3:
+                pass_quality_check = self.quality_check(synthetic_images.cpu().detach().numpy(), comebine_label_or.cpu().detach().numpy())
+                if pass_quality_check or try_time > self.max_try_time:
                     # save image/label pairs
                     output_postfix = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
                     synthetic_labels.meta["filename_or_obj"] = "sample.nii.gz"
@@ -1006,15 +1013,9 @@ def find_closest_masks(self, num_img):
             raise ValueError("Cannot find body region with given organ list.")
         return final_candidates
 
-    def quality_check(self, image):
-        """
-        Perform a quality check on the generated image. This version disabled quality check and always return True.
-
-        Args:
-            image (torch.Tensor): The generated image.
-
-        Returns:
-            bool: True if the image passes the quality check, False otherwise.
-        """
-        # This version disabled quality check
+    def quality_check(self, image_data, label_data):
+        outlier_results = is_outlier(self.median_statistics, image_data, label_data, self.label_int_dict)
+        for label, result in outlier_results.items():
+            if result.get("is_outlier", False):
+                return False
         return True

From 28f5c96c8d204206eae9b00a9408a8bf5eb4d55a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 17 Aug 2024 01:00:52 +0000
Subject: [PATCH 02/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 generation/maisi/scripts/quality_check.py | 36 ++++++++++++-----------
 generation/maisi/scripts/sample.py        | 27 ++++++++++++-----
 2 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/generation/maisi/scripts/quality_check.py b/generation/maisi/scripts/quality_check.py
index 7cc0aaaee7..a4f600a06a 100644
--- a/generation/maisi/scripts/quality_check.py
+++ b/generation/maisi/scripts/quality_check.py
@@ -6,22 +6,23 @@
 import matplotlib.pyplot as plt
 from scipy.stats import entropy
 
+
 def get_masked_data(label_data, image_data, labels):
     """
     Extracts and returns the image data corresponding to specified labels within a 3D volume.
 
-    This function efficiently masks the `image_data` array based on the provided `labels` in the `label_data` array. 
+    This function efficiently masks the `image_data` array based on the provided `labels` in the `label_data` array.
     The function handles cases with both a large and small number of labels, optimizing performance accordingly.
 
     Args:
-        label_data (np.ndarray): A NumPy array containing label data, representing different anatomical 
+        label_data (np.ndarray): A NumPy array containing label data, representing different anatomical
                                  regions or classes in a 3D medical image.
-        image_data (np.ndarray): A NumPy array containing the image data from which the relevant regions 
+        image_data (np.ndarray): A NumPy array containing the image data from which the relevant regions
                                  will be extracted.
         labels (list of int): A list of integers representing the label values to be used for masking.
 
     Returns:
-        np.ndarray: A NumPy array containing the elements of `image_data` that correspond to the specified 
+        np.ndarray: A NumPy array containing the elements of `image_data` that correspond to the specified
                     labels in `label_data`. If no labels are provided, an empty array is returned.
 
     Raises:
@@ -31,12 +32,14 @@ def get_masked_data(label_data, image_data, labels):
         label_int_dict = {"liver": [1], "kidney": [5, 14]}
         masked_data = get_masked_data(label_data, image_data, label_int_dict["kidney"])
     """
-    
+
     # Check if the shapes of image_data and label_data match
     if image_data.shape != label_data.shape:
-        raise ValueError(f"Shape mismatch: image_data has shape {image_data.shape}, "
-                         f"but label_data has shape {label_data.shape}. They must be the same.")
-    
+        raise ValueError(
+            f"Shape mismatch: image_data has shape {image_data.shape}, "
+            f"but label_data has shape {label_data.shape}. They must be the same."
+        )
+
     if not labels:
         return np.array([])  # Return an empty array if no labels are provided
 
@@ -55,7 +58,7 @@ def get_masked_data(label_data, image_data, labels):
 
     return masked_data
 
-    
+
 def is_outlier(statistics, image_data, label_data, label_int_dict):
     """
     Perform a quality check on the generated image by comparing its statistics with precomputed thresholds.
@@ -100,37 +103,36 @@ def is_outlier(statistics, image_data, label_data, label_int_dict):
         # Get the thresholds from the statistics
         low_thresh = stats["sigma_6_low"]  # or "sigma_12_low" depending on your needs
         high_thresh = stats["sigma_6_high"]  # or "sigma_12_high" depending on your needs
-        
+
         # Retrieve the corresponding label integers
         labels = label_int_dict.get(label_name, [])
         masked_data = get_masked_data(label_data, image_data, labels)
         masked_data = masked_data[~np.isnan(masked_data)]
-        
+
         if len(masked_data) == 0 or masked_data.size == 0:
             outlier_results[label_name] = {
                 "is_outlier": False,
                 "median_value": None,
                 "low_thresh": low_thresh,
-                "high_thresh": high_thresh
+                "high_thresh": high_thresh,
             }
             continue
 
-
         # Compute the median of the masked region
         median_value = np.median(masked_data)
-        
+
         if np.isnan(median_value):
             median_value = None
             is_outlier = False
         else:
             # Determine if the median value is an outlier
             is_outlier = median_value < low_thresh or median_value > high_thresh
-        
+
         outlier_results[label_name] = {
             "is_outlier": is_outlier,
             "median_value": median_value,
             "low_thresh": low_thresh,
-            "high_thresh": high_thresh
+            "high_thresh": high_thresh,
         }
 
-    return outlier_results
\ No newline at end of file
+    return outlier_results
diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 9a4d9f25de..0bb6695b78 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -565,12 +565,23 @@ def __init__(
         self.autoencoder_sliding_window_infer_overlap = autoencoder_sliding_window_infer_overlap
 
         # quality check args
-        self.max_try_time = 5 # if not pass quality check, will try self.max_try_time times
-        with open(real_img_median_statistics, 'r') as json_file:
-            self.median_statistics = json.load(json_file)        
-        self.label_int_dict = {"liver":[1], "spleen":[3], "pancreas":[4], "kidney":[5,14], "lung":[28,29,30,31,31], "brain":[22],
-             "hepatic tumor": [26], "bone lesion":[128], "lung tumor": [23], "colon cancer primaries":[27],"pancreatic tumor":[24],
-             "bone":list(range(33,57))+list(range(63,98))+[120,122,127]}
+        self.max_try_time = 5  # if not pass quality check, will try self.max_try_time times
+        with open(real_img_median_statistics, "r") as json_file:
+            self.median_statistics = json.load(json_file)
+        self.label_int_dict = {
+            "liver": [1],
+            "spleen": [3],
+            "pancreas": [4],
+            "kidney": [5, 14],
+            "lung": [28, 29, 30, 31, 31],
+            "brain": [22],
+            "hepatic tumor": [26],
+            "bone lesion": [128],
+            "lung tumor": [23],
+            "colon cancer primaries": [27],
+            "pancreatic tumor": [24],
+            "bone": list(range(33, 57)) + list(range(63, 98)) + [120, 122, 127],
+        }
 
         # networks
         self.autoencoder.eval()
@@ -676,7 +687,9 @@ def sample_multiple_images(self, num_img):
                     spacing_tensor,
                 )
                 # current quality always return True
-                pass_quality_check = self.quality_check(synthetic_images.cpu().detach().numpy(), comebine_label_or.cpu().detach().numpy())
+                pass_quality_check = self.quality_check(
+                    synthetic_images.cpu().detach().numpy(), comebine_label_or.cpu().detach().numpy()
+                )
                 if pass_quality_check or try_time > self.max_try_time:
                     # save image/label pairs
                     output_postfix = datetime.now().strftime("%Y%m%d_%H%M%S_%f")

From 7c0bd1195e1b528a0734383a9227e8a6619ef9d8 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Sat, 17 Aug 2024 01:15:14 +0000
Subject: [PATCH 03/23] add quality check

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/scripts/quality_check.py | 47 ++++++++++++++---------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/generation/maisi/scripts/quality_check.py b/generation/maisi/scripts/quality_check.py
index a4f600a06a..76d1498e8b 100644
--- a/generation/maisi/scripts/quality_check.py
+++ b/generation/maisi/scripts/quality_check.py
@@ -1,3 +1,14 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import json
 import time
@@ -6,23 +17,22 @@
 import matplotlib.pyplot as plt
 from scipy.stats import entropy
 
-
 def get_masked_data(label_data, image_data, labels):
     """
     Extracts and returns the image data corresponding to specified labels within a 3D volume.
 
-    This function efficiently masks the `image_data` array based on the provided `labels` in the `label_data` array.
+    This function efficiently masks the `image_data` array based on the provided `labels` in the `label_data` array. 
     The function handles cases with both a large and small number of labels, optimizing performance accordingly.
 
     Args:
-        label_data (np.ndarray): A NumPy array containing label data, representing different anatomical
+        label_data (np.ndarray): A NumPy array containing label data, representing different anatomical 
                                  regions or classes in a 3D medical image.
-        image_data (np.ndarray): A NumPy array containing the image data from which the relevant regions
+        image_data (np.ndarray): A NumPy array containing the image data from which the relevant regions 
                                  will be extracted.
         labels (list of int): A list of integers representing the label values to be used for masking.
 
     Returns:
-        np.ndarray: A NumPy array containing the elements of `image_data` that correspond to the specified
+        np.ndarray: A NumPy array containing the elements of `image_data` that correspond to the specified 
                     labels in `label_data`. If no labels are provided, an empty array is returned.
 
     Raises:
@@ -32,14 +42,12 @@ def get_masked_data(label_data, image_data, labels):
         label_int_dict = {"liver": [1], "kidney": [5, 14]}
         masked_data = get_masked_data(label_data, image_data, label_int_dict["kidney"])
     """
-
+    
     # Check if the shapes of image_data and label_data match
     if image_data.shape != label_data.shape:
-        raise ValueError(
-            f"Shape mismatch: image_data has shape {image_data.shape}, "
-            f"but label_data has shape {label_data.shape}. They must be the same."
-        )
-
+        raise ValueError(f"Shape mismatch: image_data has shape {image_data.shape}, "
+                         f"but label_data has shape {label_data.shape}. They must be the same.")
+    
     if not labels:
         return np.array([])  # Return an empty array if no labels are provided
 
@@ -58,7 +66,7 @@ def get_masked_data(label_data, image_data, labels):
 
     return masked_data
 
-
+    
 def is_outlier(statistics, image_data, label_data, label_int_dict):
     """
     Perform a quality check on the generated image by comparing its statistics with precomputed thresholds.
@@ -103,36 +111,37 @@ def is_outlier(statistics, image_data, label_data, label_int_dict):
         # Get the thresholds from the statistics
         low_thresh = stats["sigma_6_low"]  # or "sigma_12_low" depending on your needs
         high_thresh = stats["sigma_6_high"]  # or "sigma_12_high" depending on your needs
-
+        
         # Retrieve the corresponding label integers
         labels = label_int_dict.get(label_name, [])
         masked_data = get_masked_data(label_data, image_data, labels)
         masked_data = masked_data[~np.isnan(masked_data)]
-
+        
         if len(masked_data) == 0 or masked_data.size == 0:
             outlier_results[label_name] = {
                 "is_outlier": False,
                 "median_value": None,
                 "low_thresh": low_thresh,
-                "high_thresh": high_thresh,
+                "high_thresh": high_thresh
             }
             continue
 
+
         # Compute the median of the masked region
         median_value = np.median(masked_data)
-
+        
         if np.isnan(median_value):
             median_value = None
             is_outlier = False
         else:
             # Determine if the median value is an outlier
             is_outlier = median_value < low_thresh or median_value > high_thresh
-
+        
         outlier_results[label_name] = {
             "is_outlier": is_outlier,
             "median_value": median_value,
             "low_thresh": low_thresh,
-            "high_thresh": high_thresh,
+            "high_thresh": high_thresh
         }
 
-    return outlier_results
+    return outlier_results
\ No newline at end of file

From 91ad0680e6bb083c7db81d827e38007959ecdf5e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 17 Aug 2024 01:16:32 +0000
Subject: [PATCH 04/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 generation/maisi/scripts/quality_check.py | 36 ++++++++++++-----------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/generation/maisi/scripts/quality_check.py b/generation/maisi/scripts/quality_check.py
index 76d1498e8b..247c410dfb 100644
--- a/generation/maisi/scripts/quality_check.py
+++ b/generation/maisi/scripts/quality_check.py
@@ -17,22 +17,23 @@
 import matplotlib.pyplot as plt
 from scipy.stats import entropy
 
+
 def get_masked_data(label_data, image_data, labels):
     """
     Extracts and returns the image data corresponding to specified labels within a 3D volume.
 
-    This function efficiently masks the `image_data` array based on the provided `labels` in the `label_data` array. 
+    This function efficiently masks the `image_data` array based on the provided `labels` in the `label_data` array.
     The function handles cases with both a large and small number of labels, optimizing performance accordingly.
 
     Args:
-        label_data (np.ndarray): A NumPy array containing label data, representing different anatomical 
+        label_data (np.ndarray): A NumPy array containing label data, representing different anatomical
                                  regions or classes in a 3D medical image.
-        image_data (np.ndarray): A NumPy array containing the image data from which the relevant regions 
+        image_data (np.ndarray): A NumPy array containing the image data from which the relevant regions
                                  will be extracted.
         labels (list of int): A list of integers representing the label values to be used for masking.
 
     Returns:
-        np.ndarray: A NumPy array containing the elements of `image_data` that correspond to the specified 
+        np.ndarray: A NumPy array containing the elements of `image_data` that correspond to the specified
                     labels in `label_data`. If no labels are provided, an empty array is returned.
 
     Raises:
@@ -42,12 +43,14 @@ def get_masked_data(label_data, image_data, labels):
         label_int_dict = {"liver": [1], "kidney": [5, 14]}
         masked_data = get_masked_data(label_data, image_data, label_int_dict["kidney"])
     """
-    
+
     # Check if the shapes of image_data and label_data match
     if image_data.shape != label_data.shape:
-        raise ValueError(f"Shape mismatch: image_data has shape {image_data.shape}, "
-                         f"but label_data has shape {label_data.shape}. They must be the same.")
-    
+        raise ValueError(
+            f"Shape mismatch: image_data has shape {image_data.shape}, "
+            f"but label_data has shape {label_data.shape}. They must be the same."
+        )
+
     if not labels:
         return np.array([])  # Return an empty array if no labels are provided
 
@@ -66,7 +69,7 @@ def get_masked_data(label_data, image_data, labels):
 
     return masked_data
 
-    
+
 def is_outlier(statistics, image_data, label_data, label_int_dict):
     """
     Perform a quality check on the generated image by comparing its statistics with precomputed thresholds.
@@ -111,37 +114,36 @@ def is_outlier(statistics, image_data, label_data, label_int_dict):
         # Get the thresholds from the statistics
         low_thresh = stats["sigma_6_low"]  # or "sigma_12_low" depending on your needs
         high_thresh = stats["sigma_6_high"]  # or "sigma_12_high" depending on your needs
-        
+
         # Retrieve the corresponding label integers
         labels = label_int_dict.get(label_name, [])
         masked_data = get_masked_data(label_data, image_data, labels)
         masked_data = masked_data[~np.isnan(masked_data)]
-        
+
         if len(masked_data) == 0 or masked_data.size == 0:
             outlier_results[label_name] = {
                 "is_outlier": False,
                 "median_value": None,
                 "low_thresh": low_thresh,
-                "high_thresh": high_thresh
+                "high_thresh": high_thresh,
             }
             continue
 
-
         # Compute the median of the masked region
         median_value = np.median(masked_data)
-        
+
         if np.isnan(median_value):
             median_value = None
             is_outlier = False
         else:
             # Determine if the median value is an outlier
             is_outlier = median_value < low_thresh or median_value > high_thresh
-        
+
         outlier_results[label_name] = {
             "is_outlier": is_outlier,
             "median_value": median_value,
             "low_thresh": low_thresh,
-            "high_thresh": high_thresh
+            "high_thresh": high_thresh,
         }
 
-    return outlier_results
\ No newline at end of file
+    return outlier_results

From 494ed3633f46b6d0365c7a8580abd4813db406e4 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Sat, 17 Aug 2024 01:58:52 +0000
Subject: [PATCH 05/23] refactor

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/scripts/quality_check.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/generation/maisi/scripts/quality_check.py b/generation/maisi/scripts/quality_check.py
index 247c410dfb..0a523f0c95 100644
--- a/generation/maisi/scripts/quality_check.py
+++ b/generation/maisi/scripts/quality_check.py
@@ -9,13 +9,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-import json
-import time
 import nibabel as nib
 import numpy as np
-import matplotlib.pyplot as plt
-from scipy.stats import entropy
 
 
 def get_masked_data(label_data, image_data, labels):

From ad5afba9600b0cab710f40c027238e31e367c3e4 Mon Sep 17 00:00:00 2001
From: Can Zhao <69829124+Can-Zhao@users.noreply.github.com>
Date: Sun, 18 Aug 2024 16:57:30 -0700
Subject: [PATCH 06/23] add docstring

Signed-off-by: Can Zhao <69829124+Can-Zhao@users.noreply.github.com>
---
 generation/maisi/scripts/sample.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 0bb6695b78..3841250e25 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -1027,6 +1027,14 @@ def find_closest_masks(self, num_img):
         return final_candidates
 
     def quality_check(self, image_data, label_data):
+        """
+        Perform a quality check on the generated image.
+        Args:
+            image_data (np.ndarray): The generated image.
+            label_data (np.ndarray): The corresponding whole body mask.
+        Returns:
+            bool: True if the image passes the quality check, False otherwise.
+        """
         outlier_results = is_outlier(self.median_statistics, image_data, label_data, self.label_int_dict)
         for label, result in outlier_results.items():
             if result.get("is_outlier", False):

From 74d4e8741b1e547e2bc2b3f9de93461078768b27 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 15:23:38 +0000
Subject: [PATCH 07/23] rm unused import, correct typo, add statistics file

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 .../configs/image_median_statistics.json      | 72 +++++++++++++++++++
 generation/maisi/scripts/diff_model_infer.py  |  2 +-
 generation/maisi/scripts/infer_controlnet.py  |  2 +-
 generation/maisi/scripts/sample.py            | 70 +++++++++---------
 4 files changed, 109 insertions(+), 37 deletions(-)
 create mode 100644 generation/maisi/configs/image_median_statistics.json

diff --git a/generation/maisi/configs/image_median_statistics.json b/generation/maisi/configs/image_median_statistics.json
new file mode 100644
index 0000000000..3bea6b161c
--- /dev/null
+++ b/generation/maisi/configs/image_median_statistics.json
@@ -0,0 +1,72 @@
+{
+    "liver": {
+        "min_median": -14.0,
+        "max_median": 1000.0,
+        "percentile_0_5": 9.530000000000001,
+        "percentile_99_5": 162.0,
+        "sigma_6_low": -21.596463547885904,
+        "sigma_6_high": 156.27881534763367,
+        "sigma_12_low": -110.53410299564568,
+        "sigma_12_high": 245.21645479539342
+    },
+    "spleen": {
+        "min_median": -69.0,
+        "max_median": 1000.0,
+        "percentile_0_5": 16.925000000000004,
+        "percentile_99_5": 184.07500000000073,
+        "sigma_6_low": -43.133891656525165,
+        "sigma_6_high": 177.40494997185993,
+        "sigma_12_low": -153.4033124707177,
+        "sigma_12_high": 287.6743707860525
+    },
+    "pancreas": {
+        "min_median": -124.0,
+        "max_median": 1000.0,
+        "percentile_0_5": -29.0,
+        "percentile_99_5": 145.92000000000007,
+        "sigma_6_low": -56.59382515620725,
+        "sigma_6_high": 149.50627399318438,
+        "sigma_12_low": -159.64387473090306,
+        "sigma_12_high": 252.5563235678802
+    },
+    "kidney": {
+        "min_median": -165.5,
+        "max_median": 819.0,
+        "percentile_0_5": -40.0,
+        "percentile_99_5": 254.61999999999898,
+        "sigma_6_low": -130.56375604853028,
+        "sigma_6_high": 267.28163511081016,
+        "sigma_12_low": -329.4864516282005,
+        "sigma_12_high": 466.20433069048045
+    },
+    "lung": {
+        "min_median": -1000.0,
+        "max_median": 65.0,
+        "percentile_0_5": -937.0,
+        "percentile_99_5": -366.9500000000007,
+        "sigma_6_low": -1088.5583843889117,
+        "sigma_6_high": -551.8503346949108,
+        "sigma_12_low": -1356.912409235912,
+        "sigma_12_high": -283.4963098479103
+    },
+    "bone": {
+        "min_median": 77.5,
+        "max_median": 1000.0,
+        "percentile_0_5": 136.45499999999998,
+        "percentile_99_5": 551.6350000000002,
+        "sigma_6_low": 71.39901958080469,
+        "sigma_6_high": 471.9957615639765,
+        "sigma_12_low": -128.8993514107812,
+        "sigma_12_high": 672.2941325555623
+    },
+    "brain": {
+        "min_median": -1000.0,
+        "max_median": 238.0,
+        "percentile_0_5": -951.0,
+        "percentile_99_5": 126.25,
+        "sigma_6_low": -304.8208236135867,
+        "sigma_6_high": 369.5118535139189,
+        "sigma_12_low": -641.9871621773394,
+        "sigma_12_high": 706.6781920776717
+    }
+}
\ No newline at end of file
diff --git a/generation/maisi/scripts/diff_model_infer.py b/generation/maisi/scripts/diff_model_infer.py
index 93dbf8c22f..2de5faa259 100644
--- a/generation/maisi/scripts/diff_model_infer.py
+++ b/generation/maisi/scripts/diff_model_infer.py
@@ -27,7 +27,7 @@
 
 from .diff_model_setting import initialize_distributed, load_config, setup_logging
 from .sample import ReconModel
-from .utils import define_instance, load_autoencoder_ckpt
+from .utils import define_instance
 
 
 def set_random_seed(seed: int) -> int:
diff --git a/generation/maisi/scripts/infer_controlnet.py b/generation/maisi/scripts/infer_controlnet.py
index 6931c31e51..cb4d3c9fc0 100644
--- a/generation/maisi/scripts/infer_controlnet.py
+++ b/generation/maisi/scripts/infer_controlnet.py
@@ -24,7 +24,7 @@
 from monai.utils import RankFilter
 
 from .sample import ldm_conditional_sample_one_image
-from .utils import define_instance, load_autoencoder_ckpt, prepare_maisi_controlnet_json_dataloader, setup_ddp
+from .utils import define_instance, prepare_maisi_controlnet_json_dataloader, setup_ddp
 
 
 @torch.inference_mode()
diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 3841250e25..47ea8782cb 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -182,7 +182,7 @@ def ldm_conditional_sample_one_image(
     noise_scheduler,
     scale_factor,
     device,
-    comebine_label_or,
+    combine_label_or,
     top_region_index_tensor,
     bottom_region_index_tensor,
     spacing_tensor,
@@ -203,7 +203,7 @@ def ldm_conditional_sample_one_image(
         noise_scheduler: The noise scheduler for the diffusion process.
         scale_factor (float): Scaling factor for the latent space.
         device (torch.device): The device to run the computation on.
-        comebine_label_or (torch.Tensor): The combined label tensor.
+        combine_label_or (torch.Tensor): The combined label tensor.
         top_region_index_tensor (torch.Tensor): Tensor specifying the top region index.
         bottom_region_index_tensor (torch.Tensor): Tensor specifying the bottom region index.
         spacing_tensor (torch.Tensor): Tensor specifying the spacing.
@@ -230,18 +230,18 @@ def ldm_conditional_sample_one_image(
         logging.info("---- Start generating latent features... ----")
         start_time = time.time()
         # generate segmentation mask
-        comebine_label = comebine_label_or.to(device)
+        combine_label = combine_label_or.to(device)
         if (
-            output_size[0] != comebine_label.shape[2]
-            or output_size[1] != comebine_label.shape[3]
-            or output_size[2] != comebine_label.shape[4]
+            output_size[0] != combine_label.shape[2]
+            or output_size[1] != combine_label.shape[3]
+            or output_size[2] != combine_label.shape[4]
         ):
             logging.info(
                 "output_size is not a desired value. Need to interpolate the mask to match with output_size. The result image will be very low quality."
             )
-            comebine_label = torch.nn.functional.interpolate(comebine_label, size=output_size, mode="nearest")
+            combine_label = torch.nn.functional.interpolate(combine_label, size=output_size, mode="nearest")
 
-        controlnet_cond_vis = binarize_labels(comebine_label.as_tensor().long()).half()
+        controlnet_cond_vis = binarize_labels(combine_label.as_tensor().long()).half()
 
         # Generate random noise
         latents = initialize_noise_latents(latent_shape, device) * noise_factor
@@ -302,18 +302,18 @@ def ldm_conditional_sample_one_image(
         # project output to [-1000, 1000]
         synthetic_images = synthetic_images * (a_max - a_min) + a_min
         # regularize background intensities
-        synthetic_images = crop_img_body_mask(synthetic_images, comebine_label)
+        synthetic_images = crop_img_body_mask(synthetic_images, combine_label)
         torch.cuda.empty_cache()
 
-    return synthetic_images, comebine_label
+    return synthetic_images, combine_label
 
 
-def filter_mask_with_organs(comebine_label, anatomy_list):
+def filter_mask_with_organs(combine_label, anatomy_list):
     """
     Filter a mask to only include specified organs.
 
     Args:
-        comebine_label (torch.Tensor): The input mask.
+        combine_label (torch.Tensor): The input mask.
         anatomy_list (list): List of organ labels to keep.
 
     Returns:
@@ -321,31 +321,31 @@ def filter_mask_with_organs(comebine_label, anatomy_list):
     """
     # final output mask file has shape of output_size, contains labels in anatomy_list
     # it is already interpolated to target size
-    comebine_label = comebine_label.long()
+    combine_label = combine_label.long()
     # filter out the organs that are not in anatomy_list
     for i in range(len(anatomy_list)):
         organ = anatomy_list[i]
         # replace it with a negative value so it will get mixed
-        comebine_label[comebine_label == organ] = -(i + 1)
+        combine_label[combine_label == organ] = -(i + 1)
     # zero-out voxels with value not in anatomy_list
-    comebine_label[comebine_label > 0] = 0
+    combine_label[combine_label > 0] = 0
     # output positive values
-    comebine_label = -comebine_label
-    return comebine_label
+    combine_label = -combine_label
+    return combine_label
 
 
-def crop_img_body_mask(synthetic_images, comebine_label):
+def crop_img_body_mask(synthetic_images, combine_label):
     """
     Crop the synthetic image using a body mask.
 
     Args:
         synthetic_images (torch.Tensor): The synthetic images.
-        comebine_label (torch.Tensor): The body mask.
+        combine_label (torch.Tensor): The body mask.
 
     Returns:
         torch.Tensor: The cropped synthetic images.
     """
-    synthetic_images[comebine_label == 0] = -1000
+    synthetic_images[combine_label == 0] = -1000
     return synthetic_images
 
 
@@ -653,7 +653,7 @@ def sample_multiple_images(self, num_img):
             if len(self.controllable_anatomy_size) > 0:
                 # generate a synthetic mask
                 (
-                    comebine_label_or,
+                    combine_label_or,
                     top_region_index_tensor,
                     bottom_region_index_tensor,
                     spacing_tensor,
@@ -663,16 +663,16 @@ def sample_multiple_images(self, num_img):
                 mask_file = item["mask_file"]
                 if_aug = item["if_aug"]
                 (
-                    comebine_label_or,
+                    combine_label_or,
                     top_region_index_tensor,
                     bottom_region_index_tensor,
                     spacing_tensor,
                 ) = self.read_mask_information(mask_file)
                 if need_resample:
-                    comebine_label_or = self.ensure_output_size_and_spacing(comebine_label_or)
+                    combine_label_or = self.ensure_output_size_and_spacing(combine_label_or)
                 # mask augmentation
                 if if_aug:
-                    comebine_label_or = augmentation(comebine_label_or, self.output_size)
+                    combine_label_or = augmentation(combine_label_or, self.output_size)
             end_time = time.time()
             logging.info(f"---- Mask preparation time: {end_time - start_time} seconds ----")
             torch.cuda.empty_cache()
@@ -681,14 +681,14 @@ def sample_multiple_images(self, num_img):
             try_time = 0
             while to_generate:
                 synthetic_images, synthetic_labels = self.sample_one_pair(
-                    comebine_label_or,
+                    combine_label_or,
                     top_region_index_tensor,
                     bottom_region_index_tensor,
                     spacing_tensor,
                 )
-                # current quality always return True
+                # synthetic image quality check
                 pass_quality_check = self.quality_check(
-                    synthetic_images.cpu().detach().numpy(), comebine_label_or.cpu().detach().numpy()
+                    synthetic_images.cpu().detach().numpy(), combine_label_or.cpu().detach().numpy()
                 )
                 if pass_quality_check or try_time > self.max_try_time:
                     # save image/label pairs
@@ -747,7 +747,7 @@ def select_mask(self, candidate_mask_files, num_img):
 
     def sample_one_pair(
         self,
-        comebine_label_or_aug,
+        combine_label_or_aug,
         top_region_index_tensor,
         bottom_region_index_tensor,
         spacing_tensor,
@@ -756,7 +756,7 @@ def sample_one_pair(
         Generate a single pair of synthetic image and mask.
 
         Args:
-            comebine_label_or_aug (torch.Tensor): Combined label tensor or augmented label.
+            combine_label_or_aug (torch.Tensor): Combined label tensor or augmented label.
             top_region_index_tensor (torch.Tensor): Tensor specifying the top region index.
             bottom_region_index_tensor (torch.Tensor): Tensor specifying the bottom region index.
             spacing_tensor (torch.Tensor): Tensor specifying the spacing.
@@ -772,7 +772,7 @@ def sample_one_pair(
             noise_scheduler=self.noise_scheduler,
             scale_factor=self.scale_factor,
             device=self.device,
-            comebine_label_or=comebine_label_or_aug,
+            combine_label_or=combine_label_or_aug,
             top_region_index_tensor=top_region_index_tensor,
             bottom_region_index_tensor=bottom_region_index_tensor,
             spacing_tensor=spacing_tensor,
@@ -848,23 +848,23 @@ def prepare_one_mask_and_meta_info(self, anatomy_size_condtion):
         Returns:
             tuple: A tuple containing the prepared mask and associated tensors.
         """
-        comebine_label_or = self.sample_one_mask(anatomy_size=anatomy_size_condtion)
+        combine_label_or = self.sample_one_mask(anatomy_size=anatomy_size_condtion)
         # TODO: current mask generation model only can generate 256^3 volumes with 1.5 mm spacing.
         affine = torch.zeros((4, 4))
         affine[0, 0] = 1.5
         affine[1, 1] = 1.5
         affine[2, 2] = 1.5
         affine[3, 3] = 1.0  # dummy
-        comebine_label_or = MetaTensor(comebine_label_or, affine=affine)
-        comebine_label_or = self.ensure_output_size_and_spacing(comebine_label_or)
+        combine_label_or = MetaTensor(combine_label_or, affine=affine)
+        combine_label_or = self.ensure_output_size_and_spacing(combine_label_or)
 
-        top_region_index, bottom_region_index = get_body_region_index_from_mask(comebine_label_or)
+        top_region_index, bottom_region_index = get_body_region_index_from_mask(combine_label_or)
 
         spacing_tensor = torch.FloatTensor(self.spacing).unsqueeze(0).half().to(self.device) * 1e2
         top_region_index_tensor = torch.FloatTensor(top_region_index).unsqueeze(0).half().to(self.device) * 1e2
         bottom_region_index_tensor = torch.FloatTensor(bottom_region_index).unsqueeze(0).half().to(self.device) * 1e2
 
-        return comebine_label_or, top_region_index_tensor, bottom_region_index_tensor, spacing_tensor
+        return combine_label_or, top_region_index_tensor, bottom_region_index_tensor, spacing_tensor
 
     def sample_one_mask(self, anatomy_size):
         """

From 38daf8abc463fdd3ca993495af3e9dfa4e1cba00 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 15:24:47 +0000
Subject: [PATCH 08/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 generation/maisi/configs/image_median_statistics.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generation/maisi/configs/image_median_statistics.json b/generation/maisi/configs/image_median_statistics.json
index 3bea6b161c..df9665386c 100644
--- a/generation/maisi/configs/image_median_statistics.json
+++ b/generation/maisi/configs/image_median_statistics.json
@@ -69,4 +69,4 @@
         "sigma_12_low": -641.9871621773394,
         "sigma_12_high": 706.6781920776717
     }
-}
\ No newline at end of file
+}

From 38aacc3fe4ddac0da38bb283df6577588e91a928 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 15:29:53 +0000
Subject: [PATCH 09/23] np.nanmedian

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/scripts/quality_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generation/maisi/scripts/quality_check.py b/generation/maisi/scripts/quality_check.py
index 0a523f0c95..d73b0aca92 100644
--- a/generation/maisi/scripts/quality_check.py
+++ b/generation/maisi/scripts/quality_check.py
@@ -125,7 +125,7 @@ def is_outlier(statistics, image_data, label_data, label_int_dict):
             continue
 
         # Compute the median of the masked region
-        median_value = np.median(masked_data)
+        median_value = np.nanmedian(masked_data)
 
         if np.isnan(median_value):
             median_value = None

From d1e096c1a98108b2fb6d007658f812c1c038e34d Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 15:52:38 +0000
Subject: [PATCH 10/23] add logging

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/scripts/sample.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 47ea8782cb..05cbbff995 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -1038,5 +1038,6 @@ def quality_check(self, image_data, label_data):
         outlier_results = is_outlier(self.median_statistics, image_data, label_data, self.label_int_dict)
         for label, result in outlier_results.items():
             if result.get("is_outlier", False):
+                logging.info(f"Generated image quality check for label '{label}' failed: median value {result['median_value']} is outside the acceptable range ({result['low_thresh']} - {result['high_thresh']}).")
                 return False
         return True

From 0db8d902bef0cf14353d2529472292c4efa718f5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 15:53:47 +0000
Subject: [PATCH 11/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 generation/maisi/scripts/sample.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 05cbbff995..aafeaa63f1 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -1038,6 +1038,8 @@ def quality_check(self, image_data, label_data):
         outlier_results = is_outlier(self.median_statistics, image_data, label_data, self.label_int_dict)
         for label, result in outlier_results.items():
             if result.get("is_outlier", False):
-                logging.info(f"Generated image quality check for label '{label}' failed: median value {result['median_value']} is outside the acceptable range ({result['low_thresh']} - {result['high_thresh']}).")
+                logging.info(
+                    f"Generated image quality check for label '{label}' failed: median value {result['median_value']} is outside the acceptable range ({result['low_thresh']} - {result['high_thresh']})."
+                )
                 return False
         return True

From 4e0c77e71c619614321e15e2a4071730e604ce81 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 15:57:36 +0000
Subject: [PATCH 12/23] add description on num_label_acceleration_thresh

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/scripts/quality_check.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/generation/maisi/scripts/quality_check.py b/generation/maisi/scripts/quality_check.py
index d73b0aca92..223732761a 100644
--- a/generation/maisi/scripts/quality_check.py
+++ b/generation/maisi/scripts/quality_check.py
@@ -49,10 +49,13 @@ def get_masked_data(label_data, image_data, labels):
     if not labels:
         return np.array([])  # Return an empty array if no labels are provided
 
+    labels = list(set(labels))  # remove duplicate items
+
     # Optimize performance based on the number of labels
-    if len(labels) >= 3:
-        label_set = set(labels)  # Convert labels to a set for faster membership testing
-        mask = np.isin(label_data, list(label_set))
+    num_label_acceleration_thresh = 3
+    if len(labels) >= num_label_acceleration_thresh:
+        # if many labels, np.isin is faster
+        mask = np.isin(label_data, labels)
     else:
         # Use logical OR to combine masks if the number of labels is small
         mask = np.zeros_like(label_data, dtype=bool)

From 36b3f0e822dcf4791928589e6d34322af73a3126 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 19:35:24 +0000
Subject: [PATCH 13/23] add description on quality check

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/README.md | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/generation/maisi/README.md b/generation/maisi/README.md
index 6709e4a84f..cd81bd2ea7 100644
--- a/generation/maisi/README.md
+++ b/generation/maisi/README.md
@@ -54,20 +54,18 @@ MAISI is based on the following papers:
 Network definition is stored in [./configs/config_maisi.json](./configs/config_maisi.json). Training and inference should use the same [./configs/config_maisi.json](./configs/config_maisi.json).
 
 ### 2. Model Inference
+#### Inference parameters:
 The information for the inference input, like body region and anatomy to generate, is stored in [./configs/config_infer.json](./configs/config_infer.json). Please feel free to play with it. Here are the details of the parameters.
 
 - `"num_output_samples"`: int, the number of output image/mask pairs it will generate.
 - `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5x1.5x2.0 mm.
-- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers.
+- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We suggest that when using a small `"output_size"` in xy-plane, a large `"spacing"` in xy-plane is desired to ensure that the FOV that covers the whole body in xy-plane. There is no such restriction for z-axis.
 - `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. The output will contain paired image and segmentation mask for the controllable anatomy.
 - `"body_region"`: If "controllable_anatomy_size" is not specified, "body_region" will be used to constrain the region of generated images. It needs to be chosen from "head", "chest", "thorax", "abdomen", "pelvis", "lower".
 - `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json".
 - `"autoencoder_sliding_window_infer_size"`: in order to save GPU memory, we use sliding window inference when decoding latents to image when `"output_size"` is large. This is the patch size of the sliding window. Small value will reduce GPU memory but increase time cost. They need to be divisible by 16.
 - `"autoencoder_sliding_window_infer_overlap"`: float between 0 and 1. Large value will reduce the stitching artifacts when stitching patches during sliding window inference, but increase time cost. If you do not observe seam lines in the generated image result, you can use a smaller value to save inference time.
 
-
-Please refer to [maisi_inference_tutorial.ipynb](maisi_inference_tutorial.ipynb) for the tutorial for MAISI model inference.
-
 #### Execute Inference:
 To run the inference script, please run:
 ```bash
@@ -75,6 +73,11 @@ export MONAI_DATA_DIRECTORY=<dir_you_will_download_data>
 python -m scripts.inference -c ./configs/config_maisi.json -i ./configs/config_infer.json -e ./configs/environment.json --random-seed 0
 ```
 
+Please refer to [maisi_inference_tutorial.ipynb](maisi_inference_tutorial.ipynb) for the tutorial for MAISI model inference.
+
+#### Quality Check:
+We have implemented a quality check function for the generated CT images. The main idea behind this function is to ensure that the Hounsfield units (HU) intensity for each organ in the CT images remains within a defined range. For each training image used in the Diffusion network, we computed the median value for a few major organs. Then we summarize the statistics of these median values and save it to [./config/image_median_statistics.json](./config/image_median_statistics.json). During inference, for each generated image, we compute the median HU values for the major organs and check whether they fall within the normal range.
+
 ### 3. Model Training
 Training data preparation can be found in [./data/README.md](./data/README.md)
 

From 83d8811e514aaba32e00007b6bb943ac00662084 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 19:43:46 +0000
Subject: [PATCH 14/23] add description on input FOV

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/README.md         | 2 +-
 generation/maisi/scripts/sample.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/generation/maisi/README.md b/generation/maisi/README.md
index cd81bd2ea7..a376942b8b 100644
--- a/generation/maisi/README.md
+++ b/generation/maisi/README.md
@@ -59,7 +59,7 @@ The information for the inference input, like body region and anatomy to generat
 
 - `"num_output_samples"`: int, the number of output image/mask pairs it will generate.
 - `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5x1.5x2.0 mm.
-- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We suggest that when using a small `"output_size"` in xy-plane, a large `"spacing"` in xy-plane is desired to ensure that the FOV that covers the whole body in xy-plane. There is no such restriction for z-axis.
+- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We suggest that when using a small `"output_size"` in xy-plane, a large `"spacing"` in xy-plane is desired to ensure that the FOV that covers the whole body in xy-plane. For chest and abdomen region, a recommended FOV for xy-plane will be around 512mm. There is no such restriction for z-axis.
 - `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. The output will contain paired image and segmentation mask for the controllable anatomy.
 - `"body_region"`: If "controllable_anatomy_size" is not specified, "body_region" will be used to constrain the region of generated images. It needs to be chosen from "head", "chest", "thorax", "abdomen", "pelvis", "lower".
 - `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json".
diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index aafeaa63f1..edd1ad3653 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -386,6 +386,12 @@ def check_input(
             f"spacing[0] have to be between 0.5 and 3.0 mm, spacing[2] have to be between 0.5 and 5.0 mm, yet got {spacing}."
         )
 
+    if output_size[0]*spacing[0] < 128:
+        FOV = [output_size[axis]*spacing[axis] for axis in range(3)]
+        raise ValueError(
+            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. "
+        )
+
     # check controllable_anatomy_size format
     if len(controllable_anatomy_size) > 10:
         raise ValueError(

From 4e80b50ed586da1ad0e84af38656d66bcfcf2f1e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:44:49 +0000
Subject: [PATCH 15/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 generation/maisi/scripts/sample.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index edd1ad3653..b0e99dcf99 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -386,8 +386,8 @@ def check_input(
             f"spacing[0] have to be between 0.5 and 3.0 mm, spacing[2] have to be between 0.5 and 5.0 mm, yet got {spacing}."
         )
 
-    if output_size[0]*spacing[0] < 128:
-        FOV = [output_size[axis]*spacing[axis] for axis in range(3)]
+    if output_size[0] * spacing[0] < 128:
+        FOV = [output_size[axis] * spacing[axis] for axis in range(3)]
         raise ValueError(
             f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. "
         )

From 2fdedb0741c07fa40079a75a4645e191774373d9 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 19:47:32 +0000
Subject: [PATCH 16/23] add description on input FOV

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/README.md         | 2 +-
 generation/maisi/scripts/sample.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/generation/maisi/README.md b/generation/maisi/README.md
index a376942b8b..692f085165 100644
--- a/generation/maisi/README.md
+++ b/generation/maisi/README.md
@@ -59,7 +59,7 @@ The information for the inference input, like body region and anatomy to generat
 
 - `"num_output_samples"`: int, the number of output image/mask pairs it will generate.
 - `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5x1.5x2.0 mm.
-- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We suggest that when using a small `"output_size"` in xy-plane, a large `"spacing"` in xy-plane is desired to ensure that the FOV that covers the whole body in xy-plane. For chest and abdomen region, a recommended FOV for xy-plane will be around 512mm. There is no such restriction for z-axis.
+- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommand the FOV in x and y axis to be around 512mm for chest and abdomen region, and at least 256mm for head. There is no such restriction for z-axis.
 - `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. The output will contain paired image and segmentation mask for the controllable anatomy.
 - `"body_region"`: If "controllable_anatomy_size" is not specified, "body_region" will be used to constrain the region of generated images. It needs to be chosen from "head", "chest", "thorax", "abdomen", "pelvis", "lower".
 - `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json".
diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index edd1ad3653..31fecd907f 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -389,7 +389,7 @@ def check_input(
     if output_size[0]*spacing[0] < 128:
         FOV = [output_size[axis]*spacing[axis] for axis in range(3)]
         raise ValueError(
-            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. "
+            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. We recommand it to be around 512mm for chest and abdomen region, and around 256mm for head."
         )
 
     # check controllable_anatomy_size format

From e1d082152336174c9da148ddcf1d053e052c5688 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 19:48:10 +0000
Subject: [PATCH 17/23] add description on input FOV

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/scripts/sample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 31fecd907f..4345667de8 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -389,7 +389,7 @@ def check_input(
     if output_size[0]*spacing[0] < 128:
         FOV = [output_size[axis]*spacing[axis] for axis in range(3)]
         raise ValueError(
-            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. We recommand it to be around 512mm for chest and abdomen region, and around 256mm for head."
+            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. We recommand it to be around 512mm for chest and abdomen region, and at least 256mm for head."
         )
 
     # check controllable_anatomy_size format

From c38039d3e130928c163975d2ce1795a53442c61f Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 19:52:11 +0000
Subject: [PATCH 18/23] typo

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/README.md         | 2 +-
 generation/maisi/scripts/sample.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/generation/maisi/README.md b/generation/maisi/README.md
index 692f085165..af762528e4 100644
--- a/generation/maisi/README.md
+++ b/generation/maisi/README.md
@@ -59,7 +59,7 @@ The information for the inference input, like body region and anatomy to generat
 
 - `"num_output_samples"`: int, the number of output image/mask pairs it will generate.
 - `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5x1.5x2.0 mm.
-- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommand the FOV in x and y axis to be around 512mm for chest and abdomen region, and at least 256mm for head. There is no such restriction for z-axis.
+- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommend the FOV in x and y axis to be around 512mm for chest and abdomen region, and at least 256mm for head. There is no such restriction for z-axis.
 - `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. The output will contain paired image and segmentation mask for the controllable anatomy.
 - `"body_region"`: If "controllable_anatomy_size" is not specified, "body_region" will be used to constrain the region of generated images. It needs to be chosen from "head", "chest", "thorax", "abdomen", "pelvis", "lower".
 - `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json".
diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index e121556d91..898e432cc0 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -386,10 +386,10 @@ def check_input(
             f"spacing[0] have to be between 0.5 and 3.0 mm, spacing[2] have to be between 0.5 and 5.0 mm, yet got {spacing}."
         )
 
-    if output_size[0] * spacing[0] < 128:
-        FOV = [output_size[axis] * spacing[axis] for axis in range(3)]
+    if output_size[0]*spacing[0] < 128:
+        FOV = [output_size[axis]*spacing[axis] for axis in range(3)]
         raise ValueError(
-            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. We recommand it to be around 512mm for chest and abdomen region, and at least 256mm for head."
+            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. We recommend it to be around 512mm for chest and abdomen region, and at least 256mm for head."
         )
 
     # check controllable_anatomy_size format

From 828c4e3a232b829af50a5f7bf8bfada7d109e0c2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:53:27 +0000
Subject: [PATCH 19/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 generation/maisi/scripts/sample.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 898e432cc0..1f72a969cc 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -386,8 +386,8 @@ def check_input(
             f"spacing[0] have to be between 0.5 and 3.0 mm, spacing[2] have to be between 0.5 and 5.0 mm, yet got {spacing}."
         )
 
-    if output_size[0]*spacing[0] < 128:
-        FOV = [output_size[axis]*spacing[axis] for axis in range(3)]
+    if output_size[0] * spacing[0] < 128:
+        FOV = [output_size[axis] * spacing[axis] for axis in range(3)]
         raise ValueError(
             f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. We recommend it to be around 512mm for chest and abdomen region, and at least 256mm for head."
         )

From b4b2159ee10a7b15737e91637bc055c7d3493d74 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 20:01:36 +0000
Subject: [PATCH 20/23] typo

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generation/maisi/README.md b/generation/maisi/README.md
index af762528e4..e8f68d7683 100644
--- a/generation/maisi/README.md
+++ b/generation/maisi/README.md
@@ -76,7 +76,7 @@ python -m scripts.inference -c ./configs/config_maisi.json -i ./configs/config_i
 Please refer to [maisi_inference_tutorial.ipynb](maisi_inference_tutorial.ipynb) for the tutorial for MAISI model inference.
 
 #### Quality Check:
-We have implemented a quality check function for the generated CT images. The main idea behind this function is to ensure that the Hounsfield units (HU) intensity for each organ in the CT images remains within a defined range. For each training image used in the Diffusion network, we computed the median value for a few major organs. Then we summarize the statistics of these median values and save it to [./config/image_median_statistics.json](./config/image_median_statistics.json). During inference, for each generated image, we compute the median HU values for the major organs and check whether they fall within the normal range.
+We have implemented a quality check function for the generated CT images. The main idea behind this function is to ensure that the Hounsfield units (HU) intensity for each organ in the CT images remains within a defined range. For each training image used in the Diffusion network, we computed the median value for a few major organs. Then we summarize the statistics of these median values and save it to [./configs/image_median_statistics.json](./configs/image_median_statistics.json). During inference, for each generated image, we compute the median HU values for the major organs and check whether they fall within the normal range.
 
 ### 3. Model Training
 Training data preparation can be found in [./data/README.md](./data/README.md)

From 6caf7ad3625763c924bff412b23eb66ad9569f69 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 20:06:57 +0000
Subject: [PATCH 21/23] add description on input FOV

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/README.md         | 2 +-
 generation/maisi/scripts/sample.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/generation/maisi/README.md b/generation/maisi/README.md
index e8f68d7683..6a2ddc23c7 100644
--- a/generation/maisi/README.md
+++ b/generation/maisi/README.md
@@ -59,7 +59,7 @@ The information for the inference input, like body region and anatomy to generat
 
 - `"num_output_samples"`: int, the number of output image/mask pairs it will generate.
 - `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5x1.5x2.0 mm.
-- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommend the FOV in x and y axis to be around 512mm for chest and abdomen region, and at least 256mm for head. There is no such restriction for z-axis.
+- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommend the FOV in x and y axis to be at least 256mm for head, and around 512mm for other body regions like abdomen. There is no such restriction for z-axis.
 - `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. The output will contain paired image and segmentation mask for the controllable anatomy.
 - `"body_region"`: If "controllable_anatomy_size" is not specified, "body_region" will be used to constrain the region of generated images. It needs to be chosen from "head", "chest", "thorax", "abdomen", "pelvis", "lower".
 - `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json".
diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 1f72a969cc..e0cba473f8 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -389,7 +389,7 @@ def check_input(
     if output_size[0] * spacing[0] < 128:
         FOV = [output_size[axis] * spacing[axis] for axis in range(3)]
         raise ValueError(
-            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We require the FOV of xy-plane to be at least 128mm. We recommend it to be around 512mm for chest and abdomen region, and at least 256mm for head."
+            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We recommend the FOV in x and y axis to be at least 256mm for head, and around 512mm for other body regions like abdomen. There is no such restriction for z-axis."
         )
 
     # check controllable_anatomy_size format

From 7622ad6dfbbf187e433cc89b511209201cf4e655 Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 22:03:11 +0000
Subject: [PATCH 22/23] add description on input FOV

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/README.md         | 2 +-
 generation/maisi/scripts/sample.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/generation/maisi/README.md b/generation/maisi/README.md
index 6a2ddc23c7..1d0a9b5d2e 100644
--- a/generation/maisi/README.md
+++ b/generation/maisi/README.md
@@ -59,7 +59,7 @@ The information for the inference input, like body region and anatomy to generat
 
 - `"num_output_samples"`: int, the number of output image/mask pairs it will generate.
 - `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5x1.5x2.0 mm.
-- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommend the FOV in x and y axis to be at least 256mm for head, and around 512mm for other body regions like abdomen. There is no such restriction for z-axis.
+- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommend the FOV in x and y axis to be at least 256mm for head, and at least 384mm for other body regions like abdomen. There is no such restriction for z-axis.
 - `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. The output will contain paired image and segmentation mask for the controllable anatomy.
 - `"body_region"`: If "controllable_anatomy_size" is not specified, "body_region" will be used to constrain the region of generated images. It needs to be chosen from "head", "chest", "thorax", "abdomen", "pelvis", "lower".
 - `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json".
diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index e0cba473f8..736cfb15a3 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -389,7 +389,7 @@ def check_input(
     if output_size[0] * spacing[0] < 128:
         FOV = [output_size[axis] * spacing[axis] for axis in range(3)]
         raise ValueError(
-            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We recommend the FOV in x and y axis to be at least 256mm for head, and around 512mm for other body regions like abdomen. There is no such restriction for z-axis."
+            f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We recommend the FOV in x and y axis to be at least 256mm for head, and at least 384mm for other body regions like abdomen. There is no such restriction for z-axis."
         )
 
     # check controllable_anatomy_size format

From 8266e9d5f16ef4fa939edbc65b94c3d17465b93b Mon Sep 17 00:00:00 2001
From: Can-Zhao <volcanofly@gmail.com>
Date: Tue, 20 Aug 2024 23:48:26 +0000
Subject: [PATCH 23/23] update checking on input FOV

Signed-off-by: Can-Zhao <volcanofly@gmail.com>
---
 generation/maisi/scripts/sample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py
index 736cfb15a3..69a7869a43 100644
--- a/generation/maisi/scripts/sample.py
+++ b/generation/maisi/scripts/sample.py
@@ -386,7 +386,7 @@ def check_input(
             f"spacing[0] have to be between 0.5 and 3.0 mm, spacing[2] have to be between 0.5 and 5.0 mm, yet got {spacing}."
         )
 
-    if output_size[0] * spacing[0] < 128:
+    if output_size[0] * spacing[0] < 256:
         FOV = [output_size[axis] * spacing[axis] for axis in range(3)]
         raise ValueError(
             f"`'spacing'({spacing}mm) and 'output_size'({output_size}) together decide the output field of view (FOV). The FOV will be {FOV}mm. We recommend the FOV in x and y axis to be at least 256mm for head, and at least 384mm for other body regions like abdomen. There is no such restriction for z-axis."