update maisi ckpt link, update load functions (#1792)

Can-Zhao · pre-commit-ci[bot] · web-flow · commit 7f397e5ceeee · 2024-08-20T23:05:05.000+08:00
Fixes #1772. ### Description update maisi ckpt link, update load functions ### Checks  - [ ] Avoid including large-size files in the PR. - [ ] Clean up long text outputs from code cells in the notebook. - [ ] For security purposes, please check the contents and remove any sensitive info such as user names and private key. - [ ] Ensure (1) hyperlinks and markdown anchors are working (2) use relative paths for tutorial repo files (3) put figure and graphs in the `./figure` folder - [ ] Notebook runs automatically `./runner.sh -t <path to .ipynb file>` --------- Signed-off-by: Can-Zhao <volcanofly@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/generation/maisi/configs/config_maisi.json b/generation/maisi/configs/config_maisi.json
@@ -32,7 +32,7 @@
         "use_checkpointing": false,
         "use_convtranspose": false,
         "norm_float16": true,
-        "num_splits": 16,
+        "num_splits": 8,
         "dim_split": 1
     },
     "diffusion_unet_def": {
diff --git a/generation/maisi/maisi_inference_tutorial.ipynb b/generation/maisi/maisi_inference_tutorial.ipynb
diff --git a/generation/maisi/scripts/diff_model_infer.py b/generation/maisi/scripts/diff_model_infer.py
@@ -59,7 +59,7 @@ def load_models(args: argparse.Namespace, device: torch.device, logger: logging.
     """
     autoencoder = define_instance(args, "autoencoder_def").to(device)
     try:
-        checkpoint_autoencoder = load_autoencoder_ckpt(args.trained_autoencoder_path)
+        checkpoint_autoencoder = torch.load(args.trained_autoencoder_path)
         autoencoder.load_state_dict(checkpoint_autoencoder)
     except Exception:
         logger.error("The trained_autoencoder_path does not exist!")
diff --git a/generation/maisi/scripts/infer_controlnet.py b/generation/maisi/scripts/infer_controlnet.py
@@ -98,7 +98,7 @@ def main():
     if args.trained_autoencoder_path is not None:
         if not os.path.exists(args.trained_autoencoder_path):
             raise ValueError("Please download the autoencoder checkpoint.")
-        autoencoder_ckpt = load_autoencoder_ckpt(args.trained_autoencoder_path)
+        autoencoder_ckpt = torch.load(args.trained_autoencoder_path)
         autoencoder.load_state_dict(autoencoder_ckpt)
         logger.info(f"Load trained diffusion model from {args.trained_autoencoder_path}.")
     else:
diff --git a/generation/maisi/scripts/inference.py b/generation/maisi/scripts/inference.py
@@ -24,7 +24,7 @@
 from monai.transforms import LoadImage, Orientation
 from monai.utils import set_determinism
 from scripts.sample import LDMSampler, check_input
-from scripts.utils import define_instance, load_autoencoder_ckpt, load_diffusion_ckpt
+from scripts.utils import define_instance
 from scripts.utils_plot import find_label_center_loc, get_xyz_plot, show_image
 
 
@@ -76,23 +76,23 @@ def main():
     files = [
         {
             "path": "models/autoencoder_epoch273.pt",
-            "url": "https://drive.google.com/file/d/1jQefG0yJPzSvTG5rIJVHNqDReBTvVmZ0/view?usp=drive_link",
+            "url": "https://drive.google.com/file/d/1Ojw25lFO8QbHkxazdK4CgZTyp3GFNZGz/view?usp=sharing",
         },
         {
             "path": "models/input_unet3d_data-all_steps1000size512ddpm_random_current_inputx_v1.pt",
-            "url": "https://drive.google.com/file/d/1FtOHBGUF5dLZNHtiuhf5EH448EQGGs-_/view?usp=sharing",
+            "url": "https://drive.google.com/file/d/1lklNv4MTdI_9bwFRMd98QQ7JLerR5gC_/view?usp=drive_link",
         },
         {
             "path": "models/controlnet-20datasets-e20wl100fold0bc_noi_dia_fsize_current.pt",
-            "url": "https://drive.google.com/file/d/1izr52Whkk56OevNTk2QzI86eJV9TTaLk/view?usp=sharing",
+            "url": "https://drive.google.com/file/d/1mLYeqeZ819_WpZPlAInhcWuCIHgn3QNT/view?usp=drive_link",
         },
         {
             "path": "models/mask_generation_autoencoder.pt",
-            "url": "https://drive.google.com/file/d/1FzWrpv6ornYUaPiAWGOOxhRx2P9Wnynm/view?usp=drive_link",
+            "url": "https://drive.google.com/file/d/19JnX-C6QAg4RfghTwpPnj4KEWhtawpCy/view?usp=drive_link",
         },
         {
             "path": "models/mask_generation_diffusion_unet.pt",
-            "url": "https://drive.google.com/file/d/11SA9RUZ6XmCOJr5v6w6UW1kDzr6hlymw/view?usp=drive_link",
+            "url": "https://drive.google.com/file/d/1yOQvlhXFGY1ZYavADM3N34vgg5AEitda/view?usp=drive_link",
         },
         {
             "path": "configs/candidate_masks_flexible_size_and_spacing_3000.json",
@@ -155,29 +155,27 @@ def main():
     device = torch.device("cuda")
 
     autoencoder = define_instance(args, "autoencoder_def").to(device)
-    checkpoint_autoencoder = load_autoencoder_ckpt(args.trained_autoencoder_path)
+    checkpoint_autoencoder = torch.load(args.trained_autoencoder_path)
     autoencoder.load_state_dict(checkpoint_autoencoder)
 
     diffusion_unet = define_instance(args, "diffusion_unet_def").to(device)
     checkpoint_diffusion_unet = torch.load(args.trained_diffusion_path)
-    new_dict = load_diffusion_ckpt(diffusion_unet.state_dict(), checkpoint_diffusion_unet["unet_state_dict"])
-    diffusion_unet.load_state_dict(new_dict, strict=True)
+    diffusion_unet.load_state_dict(checkpoint_diffusion_unet["unet_state_dict"], strict=True)
     scale_factor = checkpoint_diffusion_unet["scale_factor"].to(device)
 
     controlnet = define_instance(args, "controlnet_def").to(device)
     checkpoint_controlnet = torch.load(args.trained_controlnet_path)
-    new_dict = load_diffusion_ckpt(controlnet.state_dict(), checkpoint_controlnet["controlnet_state_dict"])
     monai.networks.utils.copy_model_state(controlnet, diffusion_unet.state_dict())
-    controlnet.load_state_dict(new_dict, strict=True)
+    controlnet.load_state_dict(checkpoint_controlnet["controlnet_state_dict"], strict=True)
 
     mask_generation_autoencoder = define_instance(args, "mask_generation_autoencoder_def").to(device)
-    checkpoint_mask_generation_autoencoder = load_autoencoder_ckpt(args.trained_mask_generation_autoencoder_path)
+    checkpoint_mask_generation_autoencoder = torch.load(args.trained_mask_generation_autoencoder_path)
     mask_generation_autoencoder.load_state_dict(checkpoint_mask_generation_autoencoder)
 
     mask_generation_diffusion_unet = define_instance(args, "mask_generation_diffusion_def").to(device)
     checkpoint_mask_generation_diffusion_unet = torch.load(args.trained_mask_generation_diffusion_path)
-    mask_generation_diffusion_unet.load_old_state_dict(checkpoint_mask_generation_diffusion_unet)
-    mask_generation_scale_factor = args.mask_generation_scale_factor
+    mask_generation_diffusion_unet.load_state_dict(checkpoint_mask_generation_diffusion_unet["unet_state_dict"])
+    mask_generation_scale_factor = checkpoint_mask_generation_diffusion_unet["scale_factor"]
 
     print("All the trained model weights have been loaded.")
 
diff --git a/generation/maisi/scripts/utils.py b/generation/maisi/scripts/utils.py
@@ -669,96 +669,6 @@ def __call__(self, img: NdarrayOrTensor):
         return out
 
 
-def load_autoencoder_ckpt(load_autoencoder_path):
-    """
-    Load a state dict from an autoencoder checkpoint trained with
-    [MONAI Generative](https://github.com/Project-MONAI/GenerativeModels).
-
-    The loaded state dict is for
-    monai.apps.generation.maisi.networks.autoencoderkl_maisi.AutoencoderKlMaisi.
-
-    Args:
-        load_autoencoder_path (str): Path to the autoencoder checkpoint file.
-
-    Returns:
-        dict: Processed state dictionary for the autoencoder.
-    """
-    checkpoint_autoencoder = torch.load(load_autoencoder_path)
-    new_state_dict = {}
-    for k, v in checkpoint_autoencoder.items():
-        if "decoder" in k and "conv" in k:
-            new_key = (
-                k.replace("conv.weight", "conv.conv.weight")
-                if "conv.weight" in k
-                else k.replace("conv.bias", "conv.conv.bias")
-            )
-            new_state_dict[new_key] = v
-        elif "encoder" in k and "conv" in k:
-            new_key = (
-                k.replace("conv.weight", "conv.conv.weight")
-                if "conv.weight" in k
-                else k.replace("conv.bias", "conv.conv.bias")
-            )
-            new_state_dict[new_key] = v
-        else:
-            new_state_dict[k] = v
-    checkpoint_autoencoder = new_state_dict
-    return checkpoint_autoencoder
-
-
-def load_diffusion_ckpt(new_state_dict: dict, old_state_dict: dict, verbose=False) -> dict:
-    """
-    Load a state dict from a DiffusionModelUNet trained with
-    [MONAI Generative](https://github.com/Project-MONAI/GenerativeModels).
-
-    The loaded state dict is for
-    monai.apps.generation.maisi.networks.diffusion_model_unet_maisi.DiffusionModelUNetMaisi.
-
-    Args:
-        new_state_dict: state dict from the new model.
-        old_state_dict: state dict from the old model.
-    """
-    if verbose:
-        # print all new_state_dict keys that are not in old_state_dict
-        for k in new_state_dict:
-            if k not in old_state_dict:
-                logging.info(f"New key {k} not found in old state dict")
-        # and vice versa
-        for k in old_state_dict:
-            if k not in new_state_dict:
-                logging.info(f"Old key {k} not found in new state dict")
-
-    # copy over all matching keys
-    for k in new_state_dict:
-        if k in old_state_dict:
-            new_state_dict[k] = old_state_dict.pop(k)
-
-    # fix the attention blocks
-    # attention_blocks = [k.replace(".attn1.qkv.weight", "") for k in new_state_dict if "attn1.qkv.weight" in k]
-    attention_blocks = [k.replace(".attn.to_k.weight", "") for k in new_state_dict if "attn.to_k.weight" in k]
-    for block in attention_blocks:
-        new_state_dict[f"{block}.attn.to_q.weight"] = old_state_dict.pop(f"{block}.to_q.weight")
-        new_state_dict[f"{block}.attn.to_k.weight"] = old_state_dict.pop(f"{block}.to_k.weight")
-        new_state_dict[f"{block}.attn.to_v.weight"] = old_state_dict.pop(f"{block}.to_v.weight")
-        new_state_dict[f"{block}.attn.to_q.bias"] = old_state_dict.pop(f"{block}.to_q.bias")
-        new_state_dict[f"{block}.attn.to_k.bias"] = old_state_dict.pop(f"{block}.to_k.bias")
-        new_state_dict[f"{block}.attn.to_v.bias"] = old_state_dict.pop(f"{block}.to_v.bias")
-
-        # projection
-        new_state_dict[f"{block}.attn.out_proj.weight"] = old_state_dict.pop(f"{block}.proj_attn.weight")
-        new_state_dict[f"{block}.attn.out_proj.bias"] = old_state_dict.pop(f"{block}.proj_attn.bias")
-
-    # fix the upsample conv blocks which were renamed postconv
-    for k in new_state_dict:
-        if "postconv" in k:
-            old_name = k.replace("postconv", "conv")
-            # new_state_dict[k] = old_state_dict[old_name]
-            new_state_dict[k] = old_state_dict.pop(old_name)
-    if len(old_state_dict.keys()) > 0:
-        logging.info(f"{old_state_dict.keys()} remaining***********")
-    return new_state_dict
-
-
 def KL_loss(z_mu, z_sigma):
     """
     Compute the Kullback-Leibler (KL) divergence loss for a variational autoencoder (VAE).