Post-refactor fixes

semjon00 · semjon00 · commit 88aa86f3f305 · 2023-07-11T17:22:26.000+03:00
diff --git a/README.md b/README.md
@@ -21,10 +21,10 @@ video by [@graemeniedermayer](https://github.com/graemeniedermayer), more exampl
 images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56#issuecomment-1367596463).
 
 ## Changelog
-* v0.3.13 
-    * Large code refactor
-    * Improved interface
-    * Slightly changed the behaviour of various options
+* v0.4.0 large code refactor
+    * UI improvements
+    * slightly changed the behaviour of various options
+    * extension may partially work even if some of the dependencies are unmet
 * v0.3.12
     * Fixed stereo image generation
     * Other bugfixes
diff --git a/scripts/core.py b/scripts/core.py
@@ -98,7 +98,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
     stereo_separation = inp["stereo_separation"]
 
     # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure
-    print(f"\n{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})")
+    print(f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})")
 
     unload_sd_model()
 
@@ -230,7 +230,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
 
             if show_heat:
                 from dzoedepth.utils.misc import colorize
-                heatmap = colorize(img_output, cmap='inferno')
+                heatmap = Image.fromarray(colorize(img_output, cmap='inferno'))
                 generated_images[count]['heatmap'] = heatmap
 
             if gen_stereo:
@@ -325,7 +325,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
             print(f'{str(e)}, some issue with generating inpainted mesh')
 
     reload_sd_model()
-    print("All done.")
+    print("All done.\n")
     return generated_images, mesh_fi, meshsimple_fi
 
 
diff --git a/scripts/depthmap_generation.py b/scripts/depthmap_generation.py
@@ -3,6 +3,7 @@
 from PIL import Image
 from torchvision.transforms import Compose, transforms
 
+# TODO: depthmap_generation should not depend on WebUI
 from modules import shared, devices
 from modules.shared import opts, cmd_opts
 
@@ -29,7 +30,6 @@
 from pix2pix.options.test_options import TestOptions
 from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel
 
-
 # zoedepth
 from dzoedepth.models.builder import build_model
 from dzoedepth.utils.config import get_config
@@ -59,9 +59,6 @@ def ensure_models(self, model_type, device: torch.device, boost: bool):
 
     def load_models(self, model_type, device: torch.device, boost: bool):
         """Ensure that the depth model is loaded"""
-        # TODO: supply correct values for zoedepth
-        net_width = 512
-        net_height = 512
 
         # model path and name
         model_dir = "./models/midas"
@@ -171,22 +168,21 @@ def load_models(self, model_type, device: torch.device, boost: bool):
                 mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
             )
 
+        # When loading, zoedepth models will report the default net size.
+        # It will be overridden by the generation settings.
         elif model_type == 7:  # zoedepth_n
             print("zoedepth_n\n")
             conf = get_config("zoedepth", "infer")
-            conf.img_size = [net_width, net_height]
             model = build_model(conf)
 
         elif model_type == 8:  # zoedepth_k
             print("zoedepth_k\n")
             conf = get_config("zoedepth", "infer", config_version="kitti")
-            conf.img_size = [net_width, net_height]
             model = build_model(conf)
 
         elif model_type == 9:  # zoedepth_nk
             print("zoedepth_nk\n")
             conf = get_config("zoedepth_nk", "infer")
-            conf.img_size = [net_width, net_height]
             model = build_model(conf)
 
         model.eval()  # prepare for evaluation
@@ -221,15 +217,20 @@ def load_models(self, model_type, device: torch.device, boost: bool):
 
         devices.torch_gc()
 
-    def get_default_net_size(self, model_type):
+    @staticmethod
+    def get_default_net_size(model_type):
         # TODO: fill in, use in the GUI
         sizes = {
+            0: [448, 448],
             1: [512, 512],
             2: [384, 384],
             3: [384, 384],
             4: [384, 384],
             5: [384, 384],
             6: [256, 256],
+            7: [384, 512],
+            8: [384, 768],
+            9: [384, 512]
         }
         if model_type in sizes:
             return sizes[model_type]
@@ -254,8 +255,9 @@ def unload_models(self):
         self.device = None
 
     def get_raw_prediction(self, input, net_width, net_height):
-        """Get prediction from the model currently loaded by the class.
+        """Get prediction from the model currently loaded by the ModelHolder object.
         If boost is enabled, net_width and net_height will be ignored."""
+        # TODO: supply net size for zoedepth
         global device
         device = self.device
         # input image
@@ -264,17 +266,14 @@ def get_raw_prediction(self, input, net_width, net_height):
         if self.pix2pix_model is None:
             if self.depth_model_type == 0:
                 raw_prediction = estimateleres(img, self.depth_model, net_width, net_height)
-                raw_prediction_invert = True
             elif self.depth_model_type in [7, 8, 9]:
                 raw_prediction = estimatezoedepth(input, self.depth_model, net_width, net_height)
-                raw_prediction_invert = True
             else:
                 raw_prediction = estimatemidas(img, self.depth_model, net_width, net_height,
                                                self.resize_mode, self.normalization)
-                raw_prediction_invert = False
         else:
             raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model)
-            raw_prediction_invert = False
+        raw_prediction_invert = self.depth_model_type in [0, 7, 8, 9]
         return raw_prediction, raw_prediction_invert
 
 
diff --git a/scripts/interface_webui.py b/scripts/interface_webui.py
@@ -8,11 +8,12 @@
 from modules.shared import opts
 from modules.ui import plaintext_to_html
 from pathlib import Path
+from PIL import Image
 
 from scripts.gradio_args_transport import GradioComponentBundle
 from scripts.main import *
 from scripts.core import core_generation_funnel, unload_models, run_makevideo
-from PIL import Image
+from scripts.depthmap_generation import ModelHolder
 
 
 # Ugly workaround to fix gradio tempfile issue
@@ -102,19 +103,19 @@ def main_ui_panel(is_depth_tab):
         with gr.Group():
             with gr.Row():
                 inp += "gen_mesh", gr.Checkbox(
-                    label="Generate simple 3D mesh. "
-                          "(Fast, accurate only with ZoeDepth models and no boost, no custom maps)",
-                    value=False, visible=True)
+                    label="Generate simple 3D mesh", value=False, visible=True)
             with gr.Row(visible=False) as mesh_options_row_0:
+                gr.Label(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps")
                 inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True)
                 inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True)
 
         if is_depth_tab:
             with gr.Group():
                 with gr.Row():
                     inp += "inpaint", gr.Checkbox(
-                        label="Generate 3D inpainted mesh. (Sloooow, required for generating videos)", value=False)
+                        label="Generate 3D inpainted mesh", value=False)
                 with gr.Group(visible=False) as inpaint_options_row_0:
+                    gr.Label("Generation is sloooow, required for generating videos")
                     inp += "inpaint_vids", gr.Checkbox(
                         label="Generate 4 demo videos with 3D inpainted mesh.", value=False)
                     gr.HTML("More options for generating video can be found in the Generate video tab")
@@ -139,6 +140,15 @@ def main_ui_panel(is_depth_tab):
 
         inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False)
 
+        def update_delault_net_size(model_type):
+            w, h = ModelHolder.get_default_net_size(model_type)
+            return inp['net_width'].update(value=w), inp['net_height'].update(value=h)
+        inp['model_type'].change(
+            fn=update_delault_net_size,
+            inputs=inp['model_type'],
+            outputs=[inp['net_width'], inp['net_height']]
+        )
+
         inp['boost'].change(
             fn=lambda a, b: (options_depend_on_boost.update(visible=not a),
                              options_depend_on_match_size.update(visible=not a and not b)),
@@ -309,6 +319,7 @@ def on_ui_tabs():
                             inp += gr.Image(label="Source", source="upload", interactive=True, type="pil",
                                             elem_id="depthmap_input_image")
                             with gr.Group(visible=False) as custom_depthmap_row_0:
+                                # TODO: depthmap generation settings should disappear when using this
                                 inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True,
                                                type="file", elem_id='custom_depthmap_img')
                         inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False)
@@ -471,13 +482,12 @@ def run_generate(*inputs):
         inputnames.append(None)
         if custom_depthmap:
             if custom_depthmap_img is None:
-                return [], None, None, "Custom depthmap is not specified. " \
-                                       "Please either supply it or disable this option.", ""
-            inputdepthmaps.append(custom_depthmap_img)
+                return [], None, None,\
+                    "Custom depthmap is not specified. Please either supply it or disable this option.", ""
+            inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name)))
         else:
             inputdepthmaps.append(None)
     if depthmap_mode == '1':  # Batch Process
-        # convert files to pillow images
         for img in image_batch:
             image = Image.open(os.path.abspath(img.name))
             inputimages.append(image)
diff --git a/scripts/main.py b/scripts/main.py
@@ -4,7 +4,7 @@
 import torch
 
 SCRIPT_NAME = "DepthMap"
-SCRIPT_VERSION = "v0.3.13"
+SCRIPT_VERSION = "v0.4.0"
 
 commit_hash = None  # TODO: understand why it would spam to stderr if changed to ... = get_commit_hash()
 def get_commit_hash():
diff --git a/scripts/stereoimage_generation.py b/scripts/stereoimage_generation.py
@@ -1,4 +1,11 @@
-from numba import njit, prange
+try:
+    from numba import njit, prange
+except Exception as e:
+    print(f"WARINING! Numba failed to import! Stereoimage generation will be much slower! ({str(e)})")
+    from builtins import range as prange
+    def njit(parallel=False):
+        def Inner(func): return lambda *args, **kwargs: func(*args, **kwargs)
+        return Inner
 import numpy as np
 from PIL import Image
 
@@ -73,7 +80,7 @@ def apply_stereo_divergence(original_image, depth, divergence, separation, fill_
         )
 
 
-@njit
+@njit(parallel=False)
 def apply_stereo_divergence_naive(
         original_image, normalized_depth, divergence_px: float, separation_px: float, fill_technique):
     h, w, c = original_image.shape