From 27f3ff64418e293a12a9173b29d0be8b0f9e8341 Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Tue, 18 Jul 2023 14:48:39 +0300
Subject: [PATCH 01/10] Remove ops from depthmap_generation

---
 scripts/depthmap.py        | 82 ++++++++++++++++++++++----------------
 src/backbone.py            |  2 +
 src/core.py                |  4 +-
 src/depthmap_generation.py | 40 ++++++++++++-------
 src/standalone.py          |  9 +++++
 5 files changed, 86 insertions(+), 51 deletions(-)
 create mode 100644 src/backbone.py
 create mode 100644 src/standalone.py

diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index a0e85a1..260e60f 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -1,12 +1,9 @@
 import gradio as gr
 import traceback
-import modules.scripts as scripts
-from modules import processing, images, shared
-from modules import script_callbacks
+
+from modules import shared
+from modules.images import save_image
 from modules.call_queue import wrap_gradio_gpu_call
-from modules.processing import create_infotext
-from modules.shared import opts
-from modules.ui import plaintext_to_html
 from pathlib import Path
 from PIL import Image
 
@@ -25,9 +22,20 @@ def ensure_gradio_temp_directory():
             os.mkdir(path)
     except Exception as e:
         traceback.print_exc()
+
+
 ensure_gradio_temp_directory()
 
 
+def gather_ops():
+    from modules.shared import opts, cmd_opts
+    ops = {}
+    if hasattr(opts, 'depthmap_script_boost_rmax'):
+        ops['boost_whole_size_threshold'] = opts.depthmap_script_boost_rmax
+    ops['precision'] = cmd_opts.precision
+    ops['no_half'] = cmd_opts.no_half
+    return ops
+
 def main_ui_panel(is_depth_tab):
     inp = GradioComponentBundle()
     # TODO: Greater visual separation
@@ -146,6 +154,7 @@ def main_ui_panel(is_depth_tab):
         def update_delault_net_size(model_type):
             w, h = ModelHolder.get_default_net_size(model_type)
             return inp['net_width'].update(value=w), inp['net_height'].update(value=h)
+
         inp['model_type'].change(
             fn=update_delault_net_size,
             inputs=inp['model_type'],
@@ -230,6 +239,7 @@ def background_removal_options_visibility(v):
     return inp
 
 
+import modules.scripts as scripts
 class Script(scripts.Script):
     def title(self):
         return SCRIPT_NAME
@@ -247,6 +257,9 @@ def ui(self, is_img2img):
 
     # run from script in txt2img or img2img
     def run(self, p, *inputs):
+        from modules import processing
+        from modules.processing import create_infotext
+
         inputs = GradioComponentBundle.enkey_to_dict(inputs)
 
         # sd process
@@ -256,15 +269,15 @@ def run(self, p, *inputs):
         inputimages = []
         for count in range(0, len(processed.images)):
             # skip first grid image
-            if count == 0 and len(processed.images) > 1 and opts.return_grid:
+            if count == 0 and len(processed.images) > 1 and shared.opts.return_grid:
                 continue
             inputimages.append(processed.images[count])
 
-        outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs)
+        outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, gather_ops())
 
         for input_i, imgs in enumerate(outputs):
             # get generation parameters
-            if hasattr(processed, 'all_prompts') and opts.enable_pnginfo:
+            if hasattr(processed, 'all_prompts') and shared.opts.enable_pnginfo:
                 info = create_infotext(processed, processed.all_prompts, processed.all_seeds, processed.all_subseeds,
                                        "", 0, input_i)
             else:
@@ -274,11 +287,11 @@ def run(self, p, *inputs):
                 if inputs["save_outputs"]:
                     try:
                         suffix = "" if image_type == "depth" else f"_{image_type}"
-                        images.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i],
-                                          prompt=processed.all_prompts[input_i], extension=opts.samples_format,
-                                          info=info,
-                                          p=processed,
-                                          suffix=suffix)
+                        save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i],
+                                   prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format,
+                                   info=info,
+                                   p=processed,
+                                   suffix=suffix)
                     except Exception as e:
                         if not ('image has wrong mode' in str(e) or 'I;16' in str(e)):
                             raise e
@@ -352,7 +365,6 @@ def on_ui_tabs():
                             result_images = gr.Gallery(label='Output', show_label=False,
                                                        elem_id=f"depthmap_gallery").style(grid=4)
                         with gr.Column():
-                            html_info_x = gr.HTML()
                             html_info = gr.HTML()
 
                     with gr.TabItem('3D Mesh'):
@@ -429,7 +441,6 @@ def custom_depthmap_visibility(v):
                 result_images,
                 fn_mesh,
                 result_depthmesh,
-                html_info_x,
                 html_info
             ]
         )
@@ -454,7 +465,7 @@ def custom_depthmap_visibility(v):
             ]
         )
 
-    return (depthmap_interface, "Depth", "depthmap_interface"),
+    return depthmap_interface
 
 
 # called from depth tab
@@ -478,17 +489,17 @@ def run_generate(*inputs):
     if depthmap_mode == '2' and depthmap_batch_output_dir != '':
         outpath = depthmap_batch_output_dir
     else:
-        outpath = opts.outdir_samples or opts.outdir_extras_samples
+        outpath = shared.opts.outdir_samples or shared.opts.outdir_extras_samples
 
     if depthmap_mode == '0':  # Single image
         if depthmap_input_image is None:
-            return [], None, None, "Please select an input image!", ""
+            return [], None, None, "Please select an input image!"
         inputimages.append(depthmap_input_image)
         inputnames.append(None)
         if custom_depthmap:
             if custom_depthmap_img is None:
-                return [], None, None,\
-                    "Custom depthmap is not specified. Please either supply it or disable this option.", ""
+                return [], None, None, \
+                    "Custom depthmap is not specified. Please either supply it or disable this option."
             inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name)))
         else:
             inputdepthmaps.append(None)
@@ -502,9 +513,9 @@ def run_generate(*inputs):
     elif depthmap_mode == '2':  # Batch from Directory
         assert not shared.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled'
         if depthmap_batch_input_dir == '':
-            return [], None, None, "Please select an input directory.", ""
+            return [], None, None, "Please select an input directory."
         if depthmap_batch_input_dir == depthmap_batch_output_dir:
-            return [], None, None, "Please pick different directories for batch processing.", ""
+            return [], None, None, "Please pick different directories for batch processing."
         image_list = shared.listfiles(depthmap_batch_input_dir)
         for path in image_list:
             try:
@@ -515,9 +526,9 @@ def run_generate(*inputs):
                 if depthmap_batch_reuse:
                     basename = Path(path).stem
                     # Custom names are not used in samples directory
-                    if outpath != opts.outdir_extras_samples:
+                    if outpath != shared.opts.outdir_extras_samples:
                         # Possible filenames that the custom depthmaps may have
-                        name_candidates = [f'{basename}-0000.{opts.samples_format}',  # current format
+                        name_candidates = [f'{basename}-0000.{shared.opts.samples_format}',  # current format
                                            f'{basename}.png',  # human-intuitive format
                                            f'{Path(path).name}']  # human-intuitive format (worse)
                         for fn_cand in name_candidates:
@@ -531,13 +542,13 @@ def run_generate(*inputs):
         inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None])
         print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused')
 
-    outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs)
+    outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, gather_ops())
     show_images = []
 
     # Saving images
     for input_i, imgs in enumerate(outputs):
         basename = 'depthmap'
-        if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != opts.outdir_extras_samples:
+        if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != shared.opts.outdir_extras_samples:
             basename = Path(inputnames[input_i]).stem
 
         for image_type, image in list(imgs.items()):
@@ -545,10 +556,10 @@ def run_generate(*inputs):
             if inputs["save_outputs"]:
                 try:
                     suffix = "" if image_type == "depth" else f"_{image_type}"
-                    images.save_image(image, path=outpath, basename=basename, seed=None,
-                                      prompt=None, extension=opts.samples_format, short_filename=True,
-                                      no_prompt=True, grid=False, pnginfo_section_name="extras",
-                                      suffix=suffix)
+                    save_image(image, path=outpath, basename=basename, seed=None,
+                               prompt=None, extension=shared.opts.samples_format, short_filename=True,
+                               no_prompt=True, grid=False, pnginfo_section_name="extras",
+                               suffix=suffix)
                 except Exception as e:
                     if not ('image has wrong mode' in str(e) or 'I;16' in str(e)):
                         raise e
@@ -556,15 +567,16 @@ def run_generate(*inputs):
                     traceback.print_exc()
 
     # use inpainted 3d mesh to show in 3d model output when enabled in settings
-    if hasattr(opts, 'depthmap_script_show_3d_inpaint') and opts.depthmap_script_show_3d_inpaint \
+    if hasattr(shared.opts, 'depthmap_script_show_3d_inpaint') and shared.opts.depthmap_script_show_3d_inpaint \
             and mesh_fi is not None and len(mesh_fi) > 0:
         meshsimple_fi = mesh_fi
     # however, don't show 3dmodel when disabled in settings
-    if hasattr(opts, 'depthmap_script_show_3d') and not opts.depthmap_script_show_3d:
+    if hasattr(shared.opts, 'depthmap_script_show_3d') and not shared.opts.depthmap_script_show_3d:
         meshsimple_fi = None
     # TODO: return more info
-    return show_images, mesh_fi, meshsimple_fi, plaintext_to_html('info'), ''
+    return show_images, mesh_fi, meshsimple_fi, 'Generated!'
 
 
+from modules import script_callbacks
 script_callbacks.on_ui_settings(on_ui_settings)
-script_callbacks.on_ui_tabs(on_ui_tabs)
+script_callbacks.on_ui_tabs(lambda: [(on_ui_tabs(), "Depth", "depthmap_interface")])
diff --git a/src/backbone.py b/src/backbone.py
new file mode 100644
index 0000000..6f41979
--- /dev/null
+++ b/src/backbone.py
@@ -0,0 +1,2 @@
+# This file contains stable-duiffusion-webui stuff that the plugin relies on.
+# Eventually, when we have a standalone interface, this will load either standalone backbone or webui backbone.
diff --git a/src/core.py b/src/core.py
index ccc3e6e..831d789 100644
--- a/src/core.py
+++ b/src/core.py
@@ -59,7 +59,7 @@ def reload_sd_model():
         shared.sd_model.first_stage_model.to(devices.device)
 
 
-def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp):
+def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None):
     if len(inputimages) == 0 or inputimages[0] is None:
         return [], '', ''
     if inputdepthmaps is None or len(inputdepthmaps) == 0:
@@ -97,6 +97,8 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
     stereo_modes = inp["stereo_modes"]
     stereo_separation = inp["stereo_separation"]
 
+    model_holder.update_settings(**ops)
+
     # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure
     print(SCRIPT_FULL_NAME)
 
diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py
index 6812d81..ec85770 100644
--- a/src/depthmap_generation.py
+++ b/src/depthmap_generation.py
@@ -4,8 +4,7 @@
 from torchvision.transforms import Compose, transforms
 
 # TODO: depthmap_generation should not depend on WebUI
-from modules import shared, devices
-from modules.shared import opts, cmd_opts
+from modules import devices
 
 import torch, gc
 import cv2
@@ -48,6 +47,20 @@ def __init__(self):
         self.resize_mode = None
         self.normalization = None
 
+        # Settings (initialized to sensible values, should be updated)
+        self.boost_whole_size_threshold = 1600  # R_max from the paper by default
+        self.no_half = False
+        self.precision = "autocast"
+
+    def update_settings(self, boost_whole_size_threshold=None, no_half=None, precision=None):
+        if boost_whole_size_threshold is not None:
+            self.boost_whole_size_threshold = boost_whole_size_threshold
+        if no_half is not None:
+            self.no_half = no_half
+        if precision is not None:
+            self.precision = precision
+
+
     def ensure_models(self, model_type, device: torch.device, boost: bool):
         # TODO: could make it more granular
         if model_type == -1 or model_type is None:
@@ -191,7 +204,7 @@ def load_models(self, model_type, device: torch.device, boost: bool):
         # optimize
         if device == torch.device("cuda") and model_type in [0, 1, 2, 3, 4, 5, 6]:
             model = model.to(memory_format=torch.channels_last)
-            if not cmd_opts.no_half and model_type != 0 and not boost:  # TODO: zoedepth, too?
+            if not self.no_half and model_type != 0 and not boost:  # TODO: zoedepth, too?
                 model = model.half()
         model.to(device)  # to correct device
 
@@ -221,7 +234,6 @@ def load_models(self, model_type, device: torch.device, boost: bool):
 
     @staticmethod
     def get_default_net_size(model_type):
-        # TODO: fill in, use in the GUI
         sizes = {
             0: [448, 448],
             1: [512, 512],
@@ -285,9 +297,11 @@ def get_raw_prediction(self, input, net_width, net_height):
                 raw_prediction = estimatezoedepth(input, self.depth_model, net_width, net_height)
             else:
                 raw_prediction = estimatemidas(img, self.depth_model, net_width, net_height,
-                                               self.resize_mode, self.normalization)
+                                               self.resize_mode, self.normalization, self.no_half,
+                                               self.precision == "autocast")
         else:
-            raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model)
+            raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model,
+                                           self.boost_whole_size_threshold)
         raw_prediction_invert = self.depth_model_type in [0, 7, 8, 9]
         return raw_prediction, raw_prediction_invert
 
@@ -341,7 +355,7 @@ def estimatezoedepth(img, model, w, h):
     return prediction
 
 
-def estimatemidas(img, model, w, h, resize_mode, normalization):
+def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precision_is_autocast):
     import contextlib
     # init transform
     transform = Compose(
@@ -364,13 +378,13 @@ def estimatemidas(img, model, w, h, resize_mode, normalization):
     img_input = transform({"image": img})["image"]
 
     # compute
-    precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" and device == torch.device(
+    precision_scope = torch.autocast if precision_is_autocast and device == torch.device(
         "cuda") else contextlib.nullcontext
     with torch.no_grad(), precision_scope("cuda"):
         sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
         if device == torch.device("cuda"):
             sample = sample.to(memory_format=torch.channels_last)
-            if not cmd_opts.no_half:
+            if not no_half:
                 sample = sample.half()
         prediction = model.forward(sample)
         prediction = (
@@ -600,12 +614,8 @@ def parse(self):
         return self.opt
 
 
-def estimateboost(img, model, model_type, pix2pixmodel):
-    pix2pixsize = 1024  # TODO: to setting?
-    whole_size_threshold = 1600  # R_max from the paper  # TODO: to setting?
-    # get settings
-    if hasattr(opts, 'depthmap_script_boost_rmax'):
-        whole_size_threshold = opts.depthmap_script_boost_rmax
+def estimateboost(img, model, model_type, pix2pixmodel, whole_size_threshold):
+    pix2pixsize = 1024  # TODO: pix2pixsize and whole_size_threshold to setting?
 
     if model_type == 0:  # leres
         net_receptive_field_size = 448
diff --git a/src/standalone.py b/src/standalone.py
new file mode 100644
index 0000000..a20ee99
--- /dev/null
+++ b/src/standalone.py
@@ -0,0 +1,9 @@
+# This launches Depth tab without the AUTOMATIC1111/stable-diffusion-webui
+# Does not work yet.
+
+import gradio as gr
+import scripts.depthmap
+
+demo = gr.Interface(fn=scripts.depthmap.on_ui_tabs, inputs="text", outputs="text")
+
+demo.launch()

From 7ec7167218e02c1d2e652d7a95e63c249c0fa488 Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Tue, 18 Jul 2023 16:47:07 +0300
Subject: [PATCH 02/10] Moved code

Broken
---
 src/common_ui.py | 465 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 465 insertions(+)
 create mode 100644 src/common_ui.py

diff --git a/src/common_ui.py b/src/common_ui.py
new file mode 100644
index 0000000..c5e76b7
--- /dev/null
+++ b/src/common_ui.py
@@ -0,0 +1,465 @@
+import traceback
+from pathlib import Path
+import gradio as gr
+from PIL import Image
+
+from src import backbone
+from src.core import core_generation_funnel, unload_models, run_makevideo
+from src.depthmap_generation import ModelHolder
+from src.gradio_args_transport import GradioComponentBundle
+from src.main import *
+
+
+def main_ui_panel(is_depth_tab):
+    inp = GradioComponentBundle()
+    # TODO: Greater visual separation
+    with gr.Blocks():
+        with gr.Row():
+            inp += 'compute_device', gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU')
+            # TODO: Should return value instead of index. Maybe Enum should be used?
+            inp += 'model_type', gr.Dropdown(label="Model",
+                                             choices=['res101', 'dpt_beit_large_512 (midas 3.1)',
+                                                      'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)',
+                                                      'dpt_hybrid_384 (midas 3.0)',
+                                                      'midas_v21', 'midas_v21_small',
+                                                      'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'],
+                                             value='res101',
+                                             type="index")
+        with gr.Group():
+            with gr.Row():
+                inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True)
+                with gr.Group(visible=False) as options_depend_on_boost:
+                    inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False)
+            with gr.Row(visible=False) as options_depend_on_match_size:
+                inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448)
+                inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448)
+
+        with gr.Group():
+            with gr.Row():
+                inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True)  # 50% of width
+                with gr.Group():  # 50% of width
+                    inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True)
+                    inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False)
+            with gr.Row() as options_depend_on_output_depth_1:
+                inp += "combine_output", gr.Checkbox(
+                    label="Combine input and depthmap into one image", value=False)
+                inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'],
+                                                       value='Horizontal', type="index", visible=False)
+        with gr.Group():
+            with gr.Row():
+                inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False)
+            with gr.Row(visible=False) as clip_options_row_1:
+                inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0)
+                inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1)
+
+        with gr.Group():
+            with gr.Row():
+                inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False)
+                # gr.Checkbox(label="Generate NormalMap", value=False)  # TODO: this is a fake door
+
+        with gr.Group():
+            with gr.Row():
+                inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False)
+            with gr.Group(visible=False) as stereo_options:
+                with gr.Row():
+                    with gr.Row():
+                        inp += "stereo_modes", gr.CheckboxGroup(
+                            ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"],
+                            label="Output", value=["left-right", "red-cyan-anaglyph"])
+                with gr.Row():
+                    inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01,
+                                                          label='Divergence (3D effect)',
+                                                          value=2.5)
+                    inp += "stereo_separation", gr.Slider(minimum=-5.0, maximum=5.0, step=0.01,
+                                                          label='Separation (moves images apart)',
+                                                          value=0.0)
+                with gr.Row():
+                    inp += "stereo_fill", gr.Dropdown(label="Gap fill technique",
+                                                      choices=['none', 'naive', 'naive_interpolating', 'polylines_soft',
+                                                               'polylines_sharp'], value='polylines_sharp',
+                                                      type="value")
+                    inp += "stereo_balance", gr.Slider(minimum=-1.0, maximum=1.0, step=0.05,
+                                                       label='Balance between eyes',
+                                                       value=0.0)
+
+        with gr.Group():
+            with gr.Row():
+                inp += "gen_mesh", gr.Checkbox(
+                    label="Generate simple 3D mesh", value=False, visible=True)
+            with gr.Group(visible=False) as mesh_options:
+                with gr.Row():
+                    gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps")
+                with gr.Row():
+                    inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True)
+                    inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True)
+
+        if is_depth_tab:
+            with gr.Group():
+                with gr.Row():
+                    inp += "inpaint", gr.Checkbox(
+                        label="Generate 3D inpainted mesh", value=False)
+                with gr.Group(visible=False) as inpaint_options_row_0:
+                    gr.HTML("Generation is sloooow, required for generating videos")
+                    inp += "inpaint_vids", gr.Checkbox(
+                        label="Generate 4 demo videos with 3D inpainted mesh.", value=False)
+                    gr.HTML("More options for generating video can be found in the Generate video tab")
+
+        with gr.Group():
+            # TODO: it should be clear from the UI that the background removal does not use the model selected above
+            with gr.Row():
+                inp += "background_removal", gr.Checkbox(label="Remove background", value=False)
+            with gr.Row(visible=False) as bgrem_options_row_1:
+                inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False)
+                inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False)
+            with gr.Row(visible=False) as bgrem_options_row_2:
+                inp += "background_removal_model", gr.Dropdown(label="Rembg Model",
+                                                               choices=['u2net', 'u2netp', 'u2net_human_seg',
+                                                                        'silueta'],
+                                                               value='u2net', type="value")
+
+        with gr.Box():
+            gr.HTML(f"{SCRIPT_FULL_NAME}<br/>")
+            gr.HTML("Information, comment and share @ <a "
+                    "href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>"
+                    "https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
+
+        inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False)
+
+        def update_delault_net_size(model_type):
+            w, h = ModelHolder.get_default_net_size(model_type)
+            return inp['net_width'].update(value=w), inp['net_height'].update(value=h)
+
+        inp['model_type'].change(
+            fn=update_delault_net_size,
+            inputs=inp['model_type'],
+            outputs=[inp['net_width'], inp['net_height']]
+        )
+
+        inp['boost'].change(
+            fn=lambda a, b: (options_depend_on_boost.update(visible=not a),
+                             options_depend_on_match_size.update(visible=not a and not b)),
+            inputs=[inp['boost'], inp['match_size']],
+            outputs=[options_depend_on_boost, options_depend_on_match_size]
+        )
+        inp['match_size'].change(
+            fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b),
+            inputs=[inp['boost'], inp['match_size']],
+            outputs=[options_depend_on_match_size]
+        )
+
+        inp['output_depth'].change(
+            fn=lambda a: (inp['invert_depth'].update(visible=a), options_depend_on_output_depth_1.update(visible=a)),
+            inputs=[inp['output_depth']],
+            outputs=[inp['invert_depth'], options_depend_on_output_depth_1]
+        )
+
+        inp['combine_output'].change(
+            fn=lambda v: inp['combine_output_axis'].update(visible=v),
+            inputs=[inp['combine_output']],
+            outputs=[inp['combine_output_axis']]
+        )
+
+        inp['clipdepth'].change(
+            fn=lambda v: clip_options_row_1.update(visible=v),
+            inputs=[inp['clipdepth']],
+            outputs=[clip_options_row_1]
+        )
+        inp['clipthreshold_far'].change(
+            fn=lambda a, b: a if b < a else b,
+            inputs=[inp['clipthreshold_far'], inp['clipthreshold_near']],
+            outputs=[inp['clipthreshold_near']]
+        )
+        inp['clipthreshold_near'].change(
+            fn=lambda a, b: a if b > a else b,
+            inputs=[inp['clipthreshold_near'], inp['clipthreshold_far']],
+            outputs=[inp['clipthreshold_far']]
+        )
+
+        def stereo_options_visibility(v):
+            return stereo_options.update(visible=v)
+
+        inp['gen_stereo'].change(
+            fn=stereo_options_visibility,
+            inputs=[inp['gen_stereo']],
+            outputs=[stereo_options]
+        )
+
+        inp['gen_mesh'].change(
+            fn=lambda v: mesh_options.update(visible=v),
+            inputs=[inp['gen_mesh']],
+            outputs=[mesh_options]
+        )
+
+        def inpaint_options_visibility(v):
+            return inpaint_options_row_0.update(visible=v)
+
+        if is_depth_tab:
+            inp['inpaint'].change(
+                fn=inpaint_options_visibility,
+                inputs=[inp['inpaint']],
+                outputs=[inpaint_options_row_0]
+            )
+
+        def background_removal_options_visibility(v):
+            return bgrem_options_row_1.update(visible=v), \
+                bgrem_options_row_2.update(visible=v)
+
+        inp['background_removal'].change(
+            fn=background_removal_options_visibility,
+            inputs=[inp['background_removal']],
+            outputs=[bgrem_options_row_1, bgrem_options_row_2]
+        )
+
+    return inp
+
+def on_ui_tabs():
+    inp = GradioComponentBundle()
+    with gr.Blocks(analytics_enabled=False) as depthmap_interface:
+        with gr.Row().style(equal_height=False):
+            with gr.Column(variant='panel'):
+                inp += 'depthmap_mode', gr.HTML(visible=False, value='0')
+                with gr.Tabs():
+                    with gr.TabItem('Single Image') as depthmap_mode_0:
+                        with gr.Row():
+                            inp += gr.Image(label="Source", source="upload", interactive=True, type="pil",
+                                            elem_id="depthmap_input_image")
+                            with gr.Group(visible=False) as custom_depthmap_row_0:
+                                # TODO: depthmap generation settings should disappear when using this
+                                inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True,
+                                               type="file", elem_id='custom_depthmap_img')
+                        inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False)
+                    with gr.TabItem('Batch Process') as depthmap_mode_1:
+                        inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple",
+                                       interactive=True, type="file")
+                    with gr.TabItem('Batch from Directory') as depthmap_mode_2:
+                        inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory",
+                                          **backbone.get_hide_dirs(),
+                                          placeholder="A directory on the same machine where the server is running.")
+                        inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory",
+                                          **backbone.get_hide_dirs,
+                                          placeholder="Leave blank to save images to the default path.")
+                        gr.HTML("Files in the output directory may be overwritten.")
+                        inp += gr.Checkbox(elem_id="depthmap_batch_reuse",
+                                           label="Skip generation and use (edited/custom) depthmaps "
+                                                 "in output directory when a file already exists.",
+                                           value=True)
+                submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary')
+                inp += main_ui_panel(True)  # Main panel is inserted here
+                unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels")
+
+            with gr.Column(variant='panel'):
+                with gr.Tabs(elem_id="mode_depthmap_output"):
+                    with gr.TabItem('Depth Output'):
+                        with gr.Group():
+                            result_images = gr.Gallery(label='Output', show_label=False,
+                                                       elem_id=f"depthmap_gallery").style(grid=4)
+                        with gr.Column():
+                            html_info = gr.HTML()
+
+                    with gr.TabItem('3D Mesh'):
+                        with gr.Group():
+                            result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0])
+                            with gr.Row():
+                                # loadmesh = gr.Button('Load')
+                                clearmesh = gr.Button('Clear')
+
+                    with gr.TabItem('Generate video'):
+                        # generate video
+                        with gr.Group():
+                            with gr.Row():
+                                gr.Markdown("Generate video from inpainted(!) mesh.")
+                            with gr.Row():
+                                depth_vid = gr.Video(interactive=False)
+                            with gr.Column():
+                                vid_html_info_x = gr.HTML()
+                                vid_html_info = gr.HTML()
+                                fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **shared.hide_dirs,
+                                                     placeholder="A file on the same machine where "
+                                                                 "the server is running.")
+                            with gr.Row():
+                                vid_numframes = gr.Textbox(label="Number of frames", value="300")
+                                vid_fps = gr.Textbox(label="Framerate", value="40")
+                                vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4',
+                                                         type="value", elem_id="video_format")
+                                vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3',
+                                                       type="value", elem_id="video_ssaa")
+                            with gr.Row():
+                                vid_traj = gr.Dropdown(label="Trajectory",
+                                                       choices=['straight-line', 'double-straight-line', 'circle'],
+                                                       value='double-straight-line', type="index",
+                                                       elem_id="video_trajectory")
+                                vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05")
+                                vid_border = gr.Textbox(label="Crop: top, left, bottom, right",
+                                                        value="0.03, 0.03, 0.05, 0.03")
+                                vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt")
+                            with gr.Row():
+                                submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo",
+                                                       variant='primary')
+
+        inp += inp.enkey_tail()
+
+        depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode'])
+        depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode'])
+        depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode'])
+
+        def custom_depthmap_visibility(v):
+            return custom_depthmap_row_0.update(visible=v)
+
+        inp['custom_depthmap'].change(
+            fn=custom_depthmap_visibility,
+            inputs=[inp['custom_depthmap']],
+            outputs=[custom_depthmap_row_0]
+        )
+
+        unloadmodels.click(
+            fn=unload_models,
+            inputs=[],
+            outputs=[]
+        )
+
+        clearmesh.click(
+            fn=lambda: None,
+            inputs=[],
+            outputs=[result_depthmesh]
+        )
+
+        submit.click(
+            fn=backbone.wrap_gradio_gpu_call(run_generate),
+            inputs=inp.enkey_body(),
+            outputs=[
+                result_images,
+                fn_mesh,
+                result_depthmesh,
+                html_info
+            ]
+        )
+
+        submit_vid.click(
+            fn=backbone.wrap_gradio_gpu_call(run_makevideo),
+            inputs=[
+                fn_mesh,
+                vid_numframes,
+                vid_fps,
+                vid_traj,
+                vid_shift,
+                vid_border,
+                vid_dolly,
+                vid_format,
+                vid_ssaa
+            ],
+            outputs=[
+                depth_vid,
+                vid_html_info_x,
+                vid_html_info
+            ]
+        )
+
+    return depthmap_interface
+
+
+def run_generate(*inputs):
+    inputs = GradioComponentBundle.enkey_to_dict(inputs)
+    depthmap_mode = inputs['depthmap_mode']
+    depthmap_batch_input_dir = inputs['depthmap_batch_input_dir']
+    image_batch = inputs['image_batch']
+    depthmap_input_image = inputs['depthmap_input_image']
+    depthmap_batch_output_dir = inputs['depthmap_batch_output_dir']
+    depthmap_batch_reuse = inputs['depthmap_batch_reuse']
+    custom_depthmap = inputs['custom_depthmap']
+    custom_depthmap_img = inputs['custom_depthmap_img']
+
+    inputimages = []
+    # Allow supplying custom depthmaps
+    inputdepthmaps = []
+    # Also keep track of original file names
+    inputnames = []
+
+    if depthmap_mode == '2' and depthmap_batch_output_dir != '':
+        outpath = depthmap_batch_output_dir
+    else:
+        outpath = backbone.opts.outdir_samples or backbone.opts.outdir_extras_samples
+
+    if depthmap_mode == '0':  # Single image
+        if depthmap_input_image is None:
+            return [], None, None, "Please select an input image!"
+        inputimages.append(depthmap_input_image)
+        inputnames.append(None)
+        if custom_depthmap:
+            if custom_depthmap_img is None:
+                return [], None, None, \
+                    "Custom depthmap is not specified. Please either supply it or disable this option."
+            inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name)))
+        else:
+            inputdepthmaps.append(None)
+    if depthmap_mode == '1':  # Batch Process
+        if image_batch is None:
+            return [], None, None, "Please select input images!", ""
+        for img in image_batch:
+            image = Image.open(os.path.abspath(img.name))
+            inputimages.append(image)
+            inputnames.append(os.path.splitext(img.orig_name)[0])
+    elif depthmap_mode == '2':  # Batch from Directory
+        assert not backbone.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled'
+        if depthmap_batch_input_dir == '':
+            return [], None, None, "Please select an input directory."
+        if depthmap_batch_input_dir == depthmap_batch_output_dir:
+            return [], None, None, "Please pick different directories for batch processing."
+        image_list = backbone.listfiles(depthmap_batch_input_dir)
+        for path in image_list:
+            try:
+                inputimages.append(Image.open(path))
+                inputnames.append(path)
+
+                custom_depthmap = None
+                if depthmap_batch_reuse:
+                    basename = Path(path).stem
+                    # Custom names are not used in samples directory
+                    if outpath != backbone.opts.outdir_extras_samples:
+                        # Possible filenames that the custom depthmaps may have
+                        name_candidates = [f'{basename}-0000.{backbone.opts.samples_format}',  # current format
+                                           f'{basename}.png',  # human-intuitive format
+                                           f'{Path(path).name}']  # human-intuitive format (worse)
+                        for fn_cand in name_candidates:
+                            path_cand = os.path.join(outpath, fn_cand)
+                            if os.path.isfile(path_cand):
+                                custom_depthmap = Image.open(os.path.abspath(path_cand))
+                                break
+                inputdepthmaps.append(custom_depthmap)
+            except Exception as e:
+                print(f'Failed to load {path}, ignoring. Exception: {str(e)}')
+        inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None])
+        print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused')
+
+    outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops())
+
+    # Saving images
+    show_images = []
+    for input_i, imgs in enumerate(outputs):
+        basename = 'depthmap'
+        if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.opts.outdir_extras_samples:
+            basename = Path(inputnames[input_i]).stem
+
+        for image_type, image in list(imgs.items()):
+            show_images += [image]
+            if inputs["save_outputs"]:
+                try:
+                    suffix = "" if image_type == "depth" else f"_{image_type}"
+                    backbone.save_image(image, path=outpath, basename=basename, seed=None,
+                               prompt=None, extension=backbone.opts.samples_format, short_filename=True,
+                               no_prompt=True, grid=False, pnginfo_section_name="extras",
+                               suffix=suffix)
+                except Exception as e:
+                    if not ('image has wrong mode' in str(e) or 'I;16' in str(e)):
+                        raise e
+                    print('Catched exception: image has wrong mode!')
+                    traceback.print_exc()
+
+    # use inpainted 3d mesh to show in 3d model output when enabled in settings
+    if hasattr(backbone.opts, 'depthmap_script_show_3d_inpaint') and backbone.opts.depthmap_script_show_3d_inpaint \
+            and mesh_fi is not None and len(mesh_fi) > 0:
+        meshsimple_fi = mesh_fi
+    # however, don't show 3dmodel when disabled in settings
+    if hasattr(backbone.opts, 'depthmap_script_show_3d') and not backbone.opts.depthmap_script_show_3d:
+        meshsimple_fi = None
+    # TODO: return more info
+    return show_images, mesh_fi, meshsimple_fi, 'Generated!'

From b345e78dae0eee89ba88d5124854c8f7ed5bffbc Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Tue, 18 Jul 2023 19:29:12 +0300
Subject: [PATCH 03/10] Standalone interface (barely works)

---
 main.py                    |  10 +
 scripts/depthmap.py        | 490 +------------------------------------
 src/backbone.py            |  94 +++++++
 src/common_ui.py           |  35 +--
 src/core.py                |  47 ++--
 src/depthmap_generation.py |  62 ++---
 src/standalone.py          |   9 -
 7 files changed, 173 insertions(+), 574 deletions(-)
 create mode 100644 main.py
 delete mode 100644 src/standalone.py

diff --git a/main.py b/main.py
new file mode 100644
index 0000000..8e72043
--- /dev/null
+++ b/main.py
@@ -0,0 +1,10 @@
+# This launches DepthMap without the AUTOMATIC1111/stable-diffusion-webui
+import argparse
+import src.common_ui
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--listen", help="Create public link")
+    args = parser.parse_args()
+
+    src.common_ui.on_ui_tabs().launch(share=args.listen)
diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index 260e60f..2a81fd7 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -1,16 +1,13 @@
-import gradio as gr
 import traceback
-
+import gradio as gr
 from modules import shared
-from modules.images import save_image
-from modules.call_queue import wrap_gradio_gpu_call
-from pathlib import Path
-from PIL import Image
+import modules.scripts as scripts
 
+from src import backbone
+from src import common_ui
+from src.core import core_generation_funnel
 from src.gradio_args_transport import GradioComponentBundle
 from src.main import *
-from src.core import core_generation_funnel, unload_models, run_makevideo
-from src.depthmap_generation import ModelHolder
 
 
 # Ugly workaround to fix gradio tempfile issue
@@ -27,219 +24,6 @@ def ensure_gradio_temp_directory():
 ensure_gradio_temp_directory()
 
 
-def gather_ops():
-    from modules.shared import opts, cmd_opts
-    ops = {}
-    if hasattr(opts, 'depthmap_script_boost_rmax'):
-        ops['boost_whole_size_threshold'] = opts.depthmap_script_boost_rmax
-    ops['precision'] = cmd_opts.precision
-    ops['no_half'] = cmd_opts.no_half
-    return ops
-
-def main_ui_panel(is_depth_tab):
-    inp = GradioComponentBundle()
-    # TODO: Greater visual separation
-    with gr.Blocks():
-        with gr.Row():
-            inp += 'compute_device', gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU')
-            # TODO: Should return value instead of index. Maybe Enum should be used?
-            inp += 'model_type', gr.Dropdown(label="Model",
-                                             choices=['res101', 'dpt_beit_large_512 (midas 3.1)',
-                                                      'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)',
-                                                      'dpt_hybrid_384 (midas 3.0)',
-                                                      'midas_v21', 'midas_v21_small',
-                                                      'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'],
-                                             value='res101',
-                                             type="index")
-        with gr.Group():
-            with gr.Row():
-                inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True)
-                with gr.Group(visible=False) as options_depend_on_boost:
-                    inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False)
-            with gr.Row(visible=False) as options_depend_on_match_size:
-                inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448)
-                inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448)
-
-        with gr.Group():
-            with gr.Row():
-                inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True)  # 50% of width
-                with gr.Group():  # 50% of width
-                    inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True)
-                    inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False)
-            with gr.Row() as options_depend_on_output_depth_1:
-                inp += "combine_output", gr.Checkbox(
-                    label="Combine input and depthmap into one image", value=False)
-                inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'],
-                                                       value='Horizontal', type="index", visible=False)
-        with gr.Group():
-            with gr.Row():
-                inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False)
-            with gr.Row(visible=False) as clip_options_row_1:
-                inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0)
-                inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1)
-
-        with gr.Group():
-            with gr.Row():
-                inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False)
-                # gr.Checkbox(label="Generate NormalMap", value=False)  # TODO: this is a fake door
-
-        with gr.Group():
-            with gr.Row():
-                inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False)
-            with gr.Group(visible=False) as stereo_options:
-                with gr.Row():
-                    with gr.Row():
-                        inp += "stereo_modes", gr.CheckboxGroup(
-                            ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"],
-                            label="Output", value=["left-right", "red-cyan-anaglyph"])
-                with gr.Row():
-                    inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01,
-                                                          label='Divergence (3D effect)',
-                                                          value=2.5)
-                    inp += "stereo_separation", gr.Slider(minimum=-5.0, maximum=5.0, step=0.01,
-                                                          label='Separation (moves images apart)',
-                                                          value=0.0)
-                with gr.Row():
-                    inp += "stereo_fill", gr.Dropdown(label="Gap fill technique",
-                                                      choices=['none', 'naive', 'naive_interpolating', 'polylines_soft',
-                                                               'polylines_sharp'], value='polylines_sharp',
-                                                      type="value")
-                    inp += "stereo_balance", gr.Slider(minimum=-1.0, maximum=1.0, step=0.05,
-                                                       label='Balance between eyes',
-                                                       value=0.0)
-
-        with gr.Group():
-            with gr.Row():
-                inp += "gen_mesh", gr.Checkbox(
-                    label="Generate simple 3D mesh", value=False, visible=True)
-            with gr.Group(visible=False) as mesh_options:
-                with gr.Row():
-                    gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps")
-                with gr.Row():
-                    inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True)
-                    inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True)
-
-        if is_depth_tab:
-            with gr.Group():
-                with gr.Row():
-                    inp += "inpaint", gr.Checkbox(
-                        label="Generate 3D inpainted mesh", value=False)
-                with gr.Group(visible=False) as inpaint_options_row_0:
-                    gr.HTML("Generation is sloooow, required for generating videos")
-                    inp += "inpaint_vids", gr.Checkbox(
-                        label="Generate 4 demo videos with 3D inpainted mesh.", value=False)
-                    gr.HTML("More options for generating video can be found in the Generate video tab")
-
-        with gr.Group():
-            # TODO: it should be clear from the UI that the background removal does not use the model selected above
-            with gr.Row():
-                inp += "background_removal", gr.Checkbox(label="Remove background", value=False)
-            with gr.Row(visible=False) as bgrem_options_row_1:
-                inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False)
-                inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False)
-            with gr.Row(visible=False) as bgrem_options_row_2:
-                inp += "background_removal_model", gr.Dropdown(label="Rembg Model",
-                                                               choices=['u2net', 'u2netp', 'u2net_human_seg',
-                                                                        'silueta'],
-                                                               value='u2net', type="value")
-
-        with gr.Box():
-            gr.HTML(f"{SCRIPT_FULL_NAME}<br/>")
-            gr.HTML("Information, comment and share @ <a "
-                    "href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>"
-                    "https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
-
-        inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False)
-
-        def update_delault_net_size(model_type):
-            w, h = ModelHolder.get_default_net_size(model_type)
-            return inp['net_width'].update(value=w), inp['net_height'].update(value=h)
-
-        inp['model_type'].change(
-            fn=update_delault_net_size,
-            inputs=inp['model_type'],
-            outputs=[inp['net_width'], inp['net_height']]
-        )
-
-        inp['boost'].change(
-            fn=lambda a, b: (options_depend_on_boost.update(visible=not a),
-                             options_depend_on_match_size.update(visible=not a and not b)),
-            inputs=[inp['boost'], inp['match_size']],
-            outputs=[options_depend_on_boost, options_depend_on_match_size]
-        )
-        inp['match_size'].change(
-            fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b),
-            inputs=[inp['boost'], inp['match_size']],
-            outputs=[options_depend_on_match_size]
-        )
-
-        inp['output_depth'].change(
-            fn=lambda a: (inp['invert_depth'].update(visible=a), options_depend_on_output_depth_1.update(visible=a)),
-            inputs=[inp['output_depth']],
-            outputs=[inp['invert_depth'], options_depend_on_output_depth_1]
-        )
-
-        inp['combine_output'].change(
-            fn=lambda v: inp['combine_output_axis'].update(visible=v),
-            inputs=[inp['combine_output']],
-            outputs=[inp['combine_output_axis']]
-        )
-
-        inp['clipdepth'].change(
-            fn=lambda v: clip_options_row_1.update(visible=v),
-            inputs=[inp['clipdepth']],
-            outputs=[clip_options_row_1]
-        )
-        inp['clipthreshold_far'].change(
-            fn=lambda a, b: a if b < a else b,
-            inputs=[inp['clipthreshold_far'], inp['clipthreshold_near']],
-            outputs=[inp['clipthreshold_near']]
-        )
-        inp['clipthreshold_near'].change(
-            fn=lambda a, b: a if b > a else b,
-            inputs=[inp['clipthreshold_near'], inp['clipthreshold_far']],
-            outputs=[inp['clipthreshold_far']]
-        )
-
-        def stereo_options_visibility(v):
-            return stereo_options.update(visible=v)
-
-        inp['gen_stereo'].change(
-            fn=stereo_options_visibility,
-            inputs=[inp['gen_stereo']],
-            outputs=[stereo_options]
-        )
-
-        inp['gen_mesh'].change(
-            fn=lambda v: mesh_options.update(visible=v),
-            inputs=[inp['gen_mesh']],
-            outputs=[mesh_options]
-        )
-
-        def inpaint_options_visibility(v):
-            return inpaint_options_row_0.update(visible=v)
-
-        if is_depth_tab:
-            inp['inpaint'].change(
-                fn=inpaint_options_visibility,
-                inputs=[inp['inpaint']],
-                outputs=[inpaint_options_row_0]
-            )
-
-        def background_removal_options_visibility(v):
-            return bgrem_options_row_1.update(visible=v), \
-                bgrem_options_row_2.update(visible=v)
-
-        inp['background_removal'].change(
-            fn=background_removal_options_visibility,
-            inputs=[inp['background_removal']],
-            outputs=[bgrem_options_row_1, bgrem_options_row_2]
-        )
-
-    return inp
-
-
-import modules.scripts as scripts
 class Script(scripts.Script):
     def title(self):
         return SCRIPT_NAME
@@ -251,7 +35,7 @@ def ui(self, is_img2img):
         gr.HTML()  # Work around a Gradio bug
         with gr.Column(variant='panel'):
             gr.HTML()  # Work around a Gradio bug
-            ret = main_ui_panel(False)
+            ret = common_ui.main_ui_panel(False)
             ret += ret.enkey_tail()
         return ret.enkey_body()
 
@@ -273,7 +57,7 @@ def run(self, p, *inputs):
                 continue
             inputimages.append(processed.images[count])
 
-        outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, gather_ops())
+        outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops())
 
         for input_i, imgs in enumerate(outputs):
             # get generation parameters
@@ -286,8 +70,8 @@ def run(self, p, *inputs):
                 processed.images.append(image)
                 if inputs["save_outputs"]:
                     try:
-                        suffix = "" if image_type == "depth" else f"_{image_type}"
-                        save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i],
+                        suffix = "" if image_type == "depth" else f"{image_type}"
+                        backbone.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i],
                                    prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format,
                                    info=info,
                                    p=processed,
@@ -323,260 +107,6 @@ def on_ui_settings():
                                              section=section))
 
 
-def on_ui_tabs():
-    inp = GradioComponentBundle()
-    with gr.Blocks(analytics_enabled=False) as depthmap_interface:
-        with gr.Row().style(equal_height=False):
-            with gr.Column(variant='panel'):
-                inp += 'depthmap_mode', gr.HTML(visible=False, value='0')
-                with gr.Tabs():
-                    with gr.TabItem('Single Image') as depthmap_mode_0:
-                        with gr.Row():
-                            inp += gr.Image(label="Source", source="upload", interactive=True, type="pil",
-                                            elem_id="depthmap_input_image")
-                            with gr.Group(visible=False) as custom_depthmap_row_0:
-                                # TODO: depthmap generation settings should disappear when using this
-                                inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True,
-                                               type="file", elem_id='custom_depthmap_img')
-                        inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False)
-                    with gr.TabItem('Batch Process') as depthmap_mode_1:
-                        inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple",
-                                       interactive=True, type="file")
-                    with gr.TabItem('Batch from Directory') as depthmap_mode_2:
-                        inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory",
-                                          **shared.hide_dirs,
-                                          placeholder="A directory on the same machine where the server is running.")
-                        inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory",
-                                          **shared.hide_dirs,
-                                          placeholder="Leave blank to save images to the default path.")
-                        gr.HTML("Files in the output directory may be overwritten.")
-                        inp += gr.Checkbox(elem_id="depthmap_batch_reuse",
-                                           label="Skip generation and use (edited/custom) depthmaps "
-                                                 "in output directory when a file already exists.",
-                                           value=True)
-                submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary')
-                inp += main_ui_panel(True)  # Main panel is inserted here
-                unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels")
-
-            with gr.Column(variant='panel'):
-                with gr.Tabs(elem_id="mode_depthmap_output"):
-                    with gr.TabItem('Depth Output'):
-                        with gr.Group():
-                            result_images = gr.Gallery(label='Output', show_label=False,
-                                                       elem_id=f"depthmap_gallery").style(grid=4)
-                        with gr.Column():
-                            html_info = gr.HTML()
-
-                    with gr.TabItem('3D Mesh'):
-                        with gr.Group():
-                            result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0])
-                            with gr.Row():
-                                # loadmesh = gr.Button('Load')
-                                clearmesh = gr.Button('Clear')
-
-                    with gr.TabItem('Generate video'):
-                        # generate video
-                        with gr.Group():
-                            with gr.Row():
-                                gr.Markdown("Generate video from inpainted(!) mesh.")
-                            with gr.Row():
-                                depth_vid = gr.Video(interactive=False)
-                            with gr.Column():
-                                vid_html_info_x = gr.HTML()
-                                vid_html_info = gr.HTML()
-                                fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **shared.hide_dirs,
-                                                     placeholder="A file on the same machine where "
-                                                                 "the server is running.")
-                            with gr.Row():
-                                vid_numframes = gr.Textbox(label="Number of frames", value="300")
-                                vid_fps = gr.Textbox(label="Framerate", value="40")
-                                vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4',
-                                                         type="value", elem_id="video_format")
-                                vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3',
-                                                       type="value", elem_id="video_ssaa")
-                            with gr.Row():
-                                vid_traj = gr.Dropdown(label="Trajectory",
-                                                       choices=['straight-line', 'double-straight-line', 'circle'],
-                                                       value='double-straight-line', type="index",
-                                                       elem_id="video_trajectory")
-                                vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05")
-                                vid_border = gr.Textbox(label="Crop: top, left, bottom, right",
-                                                        value="0.03, 0.03, 0.05, 0.03")
-                                vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt")
-                            with gr.Row():
-                                submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo",
-                                                       variant='primary')
-
-        inp += inp.enkey_tail()
-
-        depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode'])
-        depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode'])
-        depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode'])
-
-        def custom_depthmap_visibility(v):
-            return custom_depthmap_row_0.update(visible=v)
-
-        inp['custom_depthmap'].change(
-            fn=custom_depthmap_visibility,
-            inputs=[inp['custom_depthmap']],
-            outputs=[custom_depthmap_row_0]
-        )
-
-        unloadmodels.click(
-            fn=unload_models,
-            inputs=[],
-            outputs=[]
-        )
-
-        clearmesh.click(
-            fn=lambda: None,
-            inputs=[],
-            outputs=[result_depthmesh]
-        )
-
-        submit.click(
-            fn=wrap_gradio_gpu_call(run_generate),
-            inputs=inp.enkey_body(),
-            outputs=[
-                result_images,
-                fn_mesh,
-                result_depthmesh,
-                html_info
-            ]
-        )
-
-        submit_vid.click(
-            fn=wrap_gradio_gpu_call(run_makevideo),
-            inputs=[
-                fn_mesh,
-                vid_numframes,
-                vid_fps,
-                vid_traj,
-                vid_shift,
-                vid_border,
-                vid_dolly,
-                vid_format,
-                vid_ssaa
-            ],
-            outputs=[
-                depth_vid,
-                vid_html_info_x,
-                vid_html_info
-            ]
-        )
-
-    return depthmap_interface
-
-
-# called from depth tab
-def run_generate(*inputs):
-    inputs = GradioComponentBundle.enkey_to_dict(inputs)
-    depthmap_mode = inputs['depthmap_mode']
-    depthmap_batch_input_dir = inputs['depthmap_batch_input_dir']
-    image_batch = inputs['image_batch']
-    depthmap_input_image = inputs['depthmap_input_image']
-    depthmap_batch_output_dir = inputs['depthmap_batch_output_dir']
-    depthmap_batch_reuse = inputs['depthmap_batch_reuse']
-    custom_depthmap = inputs['custom_depthmap']
-    custom_depthmap_img = inputs['custom_depthmap_img']
-
-    inputimages = []
-    # Allow supplying custom depthmaps
-    inputdepthmaps = []
-    # Also keep track of original file names
-    inputnames = []
-
-    if depthmap_mode == '2' and depthmap_batch_output_dir != '':
-        outpath = depthmap_batch_output_dir
-    else:
-        outpath = shared.opts.outdir_samples or shared.opts.outdir_extras_samples
-
-    if depthmap_mode == '0':  # Single image
-        if depthmap_input_image is None:
-            return [], None, None, "Please select an input image!"
-        inputimages.append(depthmap_input_image)
-        inputnames.append(None)
-        if custom_depthmap:
-            if custom_depthmap_img is None:
-                return [], None, None, \
-                    "Custom depthmap is not specified. Please either supply it or disable this option."
-            inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name)))
-        else:
-            inputdepthmaps.append(None)
-    if depthmap_mode == '1':  # Batch Process
-        if image_batch is None:
-            return [], None, None, "Please select input images!", ""
-        for img in image_batch:
-            image = Image.open(os.path.abspath(img.name))
-            inputimages.append(image)
-            inputnames.append(os.path.splitext(img.orig_name)[0])
-    elif depthmap_mode == '2':  # Batch from Directory
-        assert not shared.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled'
-        if depthmap_batch_input_dir == '':
-            return [], None, None, "Please select an input directory."
-        if depthmap_batch_input_dir == depthmap_batch_output_dir:
-            return [], None, None, "Please pick different directories for batch processing."
-        image_list = shared.listfiles(depthmap_batch_input_dir)
-        for path in image_list:
-            try:
-                inputimages.append(Image.open(path))
-                inputnames.append(path)
-
-                custom_depthmap = None
-                if depthmap_batch_reuse:
-                    basename = Path(path).stem
-                    # Custom names are not used in samples directory
-                    if outpath != shared.opts.outdir_extras_samples:
-                        # Possible filenames that the custom depthmaps may have
-                        name_candidates = [f'{basename}-0000.{shared.opts.samples_format}',  # current format
-                                           f'{basename}.png',  # human-intuitive format
-                                           f'{Path(path).name}']  # human-intuitive format (worse)
-                        for fn_cand in name_candidates:
-                            path_cand = os.path.join(outpath, fn_cand)
-                            if os.path.isfile(path_cand):
-                                custom_depthmap = Image.open(os.path.abspath(path_cand))
-                                break
-                inputdepthmaps.append(custom_depthmap)
-            except Exception as e:
-                print(f'Failed to load {path}, ignoring. Exception: {str(e)}')
-        inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None])
-        print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused')
-
-    outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, gather_ops())
-    show_images = []
-
-    # Saving images
-    for input_i, imgs in enumerate(outputs):
-        basename = 'depthmap'
-        if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != shared.opts.outdir_extras_samples:
-            basename = Path(inputnames[input_i]).stem
-
-        for image_type, image in list(imgs.items()):
-            show_images += [image]
-            if inputs["save_outputs"]:
-                try:
-                    suffix = "" if image_type == "depth" else f"_{image_type}"
-                    save_image(image, path=outpath, basename=basename, seed=None,
-                               prompt=None, extension=shared.opts.samples_format, short_filename=True,
-                               no_prompt=True, grid=False, pnginfo_section_name="extras",
-                               suffix=suffix)
-                except Exception as e:
-                    if not ('image has wrong mode' in str(e) or 'I;16' in str(e)):
-                        raise e
-                    print('Catched exception: image has wrong mode!')
-                    traceback.print_exc()
-
-    # use inpainted 3d mesh to show in 3d model output when enabled in settings
-    if hasattr(shared.opts, 'depthmap_script_show_3d_inpaint') and shared.opts.depthmap_script_show_3d_inpaint \
-            and mesh_fi is not None and len(mesh_fi) > 0:
-        meshsimple_fi = mesh_fi
-    # however, don't show 3dmodel when disabled in settings
-    if hasattr(shared.opts, 'depthmap_script_show_3d') and not shared.opts.depthmap_script_show_3d:
-        meshsimple_fi = None
-    # TODO: return more info
-    return show_images, mesh_fi, meshsimple_fi, 'Generated!'
-
-
 from modules import script_callbacks
 script_callbacks.on_ui_settings(on_ui_settings)
-script_callbacks.on_ui_tabs(lambda: [(on_ui_tabs(), "Depth", "depthmap_interface")])
+script_callbacks.on_ui_tabs(lambda: [(common_ui.on_ui_tabs(), "Depth", "depthmap_interface")])
diff --git a/src/backbone.py b/src/backbone.py
index 6f41979..44fc3fd 100644
--- a/src/backbone.py
+++ b/src/backbone.py
@@ -1,2 +1,96 @@
 # This file contains stable-duiffusion-webui stuff that the plugin relies on.
 # Eventually, when we have a standalone interface, this will load either standalone backbone or webui backbone.
+try:
+    # stable-duiffusion-webui backbone
+    from modules.images import save_image  # Should fail if not on stable-duiffusion-webui
+    from modules.devices import torch_gc  # TODO: is this really sufficient?
+    from modules.images import get_next_sequence_number
+    from modules.call_queue import wrap_gradio_gpu_call
+    from modules.shared import listfiles
+
+    def get_opt(name, default):
+        from modules.shared import opts
+
+        if hasattr(opts, name):
+            return opts.__getattr__(name)
+        return default
+
+
+    def gather_ops():
+        from modules.shared import cmd_opts
+        ops = {}
+        if get_opt('depthmap_script_boost_rmax', None) is not None:
+            ops['boost_whole_size_threshold'] = get_opt('depthmap_script_boost_rmax', None)
+        ops['precision'] = cmd_opts.precision
+        ops['no_half'] = cmd_opts.no_half
+        return ops
+
+
+    def get_outpath():
+        path = get_opt('outdir_samples', None)
+        if path is None or len(path) == 0:
+            path = get_opt('outdir_extras_samples', None)
+        assert path is not None and len(path) > 0
+        return path
+
+
+    def unload_sd_model():
+        from modules import shared, devices
+        if shared.sd_model is not None:
+            shared.sd_model.cond_stage_model.to(devices.cpu)
+            shared.sd_model.first_stage_model.to(devices.cpu)
+
+
+    def reload_sd_model():
+        from modules import shared, devices
+        if shared.sd_model is not None:
+            shared.sd_model.cond_stage_model.to(devices.device)
+            shared.sd_model.first_stage_model.to(devices.device)
+
+    def get_hide_dirs():
+        import modules.shared
+        return modules.shared.hide_dirs
+except:
+    # Standalone backbone
+    print("DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n"
+          "The standalone backbone is not on par with the stable-duiffusion-webui backbone.\n"
+          "Some features may be missing or work differently.\n")
+
+    def save_image(image, path, basename, **kwargs):
+        import os
+        os.makedirs(path, exist_ok=True)
+        fullfn = os.path.join(path, f"{get_next_sequence_number()}-{basename}.{kwargs['extension']}")
+        image.save(fullfn, format=get_opt('samples_format', 'png'))
+
+    def torch_gc():
+        # TODO: is this really sufficient?
+        import torch
+        if torch.cuda.is_available():
+            with torch.cuda.device('cuda'):
+                torch.cuda.empty_cache()
+                torch.cuda.ipc_collect()
+
+    def get_next_sequence_number():
+        # Don't really care what the number will be... As long as it is unique.
+        from datetime import datetime, timezone
+        import random
+        return f"{int(datetime.now(timezone.utc).timestamp())}-{random.randint(1000,9999)}"
+
+    def wrap_gradio_gpu_call(f): return f  # Displaying various stats is not supported
+
+    def listfiles(dirname):
+        import os
+        filenames = [os.path.join(dirname, x) for x in sorted(os.listdir(dirname)) if not x.startswith(".")]
+        return [file for file in filenames if os.path.isfile(file)]
+
+    def get_opt(name, default): return default  # Configuring is not supported
+
+    def gather_ops(): return {}  # Configuring is not supported
+
+    def get_outpath(): return '.'
+
+    def unload_sd_model(): pass  # Not needed
+
+    def reload_sd_model(): pass  # Not needed
+
+    def get_hide_dirs(): return {}  # Directories will not be hidden from traversal
diff --git a/src/common_ui.py b/src/common_ui.py
index c5e76b7..3210c74 100644
--- a/src/common_ui.py
+++ b/src/common_ui.py
@@ -105,7 +105,8 @@ def main_ui_panel(is_depth_tab):
                     gr.HTML("More options for generating video can be found in the Generate video tab")
 
         with gr.Group():
-            # TODO: it should be clear from the UI that the background removal does not use the model selected above
+            # TODO: it should be clear from the UI that there is an option of the background removal
+            #  that does not use the model selected above
             with gr.Row():
                 inp += "background_removal", gr.Checkbox(label="Remove background", value=False)
             with gr.Row(visible=False) as bgrem_options_row_1:
@@ -236,7 +237,7 @@ def on_ui_tabs():
                                           **backbone.get_hide_dirs(),
                                           placeholder="A directory on the same machine where the server is running.")
                         inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory",
-                                          **backbone.get_hide_dirs,
+                                          **backbone.get_hide_dirs(),
                                           placeholder="Leave blank to save images to the default path.")
                         gr.HTML("Files in the output directory may be overwritten.")
                         inp += gr.Checkbox(elem_id="depthmap_batch_reuse",
@@ -273,7 +274,7 @@ def on_ui_tabs():
                             with gr.Column():
                                 vid_html_info_x = gr.HTML()
                                 vid_html_info = gr.HTML()
-                                fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **shared.hide_dirs,
+                                fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **backbone.get_hide_dirs(),
                                                      placeholder="A file on the same machine where "
                                                                  "the server is running.")
                             with gr.Row():
@@ -377,7 +378,7 @@ def run_generate(*inputs):
     if depthmap_mode == '2' and depthmap_batch_output_dir != '':
         outpath = depthmap_batch_output_dir
     else:
-        outpath = backbone.opts.outdir_samples or backbone.opts.outdir_extras_samples
+        outpath = backbone.get_outpath()
 
     if depthmap_mode == '0':  # Single image
         if depthmap_input_image is None:
@@ -399,7 +400,7 @@ def run_generate(*inputs):
             inputimages.append(image)
             inputnames.append(os.path.splitext(img.orig_name)[0])
     elif depthmap_mode == '2':  # Batch from Directory
-        assert not backbone.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled'
+        assert not backbone.get_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled'
         if depthmap_batch_input_dir == '':
             return [], None, None, "Please select an input directory."
         if depthmap_batch_input_dir == depthmap_batch_output_dir:
@@ -414,9 +415,9 @@ def run_generate(*inputs):
                 if depthmap_batch_reuse:
                     basename = Path(path).stem
                     # Custom names are not used in samples directory
-                    if outpath != backbone.opts.outdir_extras_samples:
+                    if outpath != backbone.get_opt('outdir_extras_samples', None):
                         # Possible filenames that the custom depthmaps may have
-                        name_candidates = [f'{basename}-0000.{backbone.opts.samples_format}',  # current format
+                        name_candidates = [f'{basename}-0000.{backbone.get_opt("samples_format", "png")}',  # current format
                                            f'{basename}.png',  # human-intuitive format
                                            f'{Path(path).name}']  # human-intuitive format (worse)
                         for fn_cand in name_candidates:
@@ -430,22 +431,22 @@ def run_generate(*inputs):
         inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None])
         print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused')
 
-    outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops())
+    outputs, fn_mesh, display_mesh = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops())
 
     # Saving images
     show_images = []
     for input_i, imgs in enumerate(outputs):
         basename = 'depthmap'
-        if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.opts.outdir_extras_samples:
+        if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.get_opt('outdir_extras_samples', None):
             basename = Path(inputnames[input_i]).stem
 
         for image_type, image in list(imgs.items()):
             show_images += [image]
             if inputs["save_outputs"]:
                 try:
-                    suffix = "" if image_type == "depth" else f"_{image_type}"
+                    suffix = "" if image_type == "depth" else f"{image_type}"
                     backbone.save_image(image, path=outpath, basename=basename, seed=None,
-                               prompt=None, extension=backbone.opts.samples_format, short_filename=True,
+                               prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True,
                                no_prompt=True, grid=False, pnginfo_section_name="extras",
                                suffix=suffix)
                 except Exception as e:
@@ -454,12 +455,12 @@ def run_generate(*inputs):
                     print('Catched exception: image has wrong mode!')
                     traceback.print_exc()
 
+    display_mesh = None
     # use inpainted 3d mesh to show in 3d model output when enabled in settings
-    if hasattr(backbone.opts, 'depthmap_script_show_3d_inpaint') and backbone.opts.depthmap_script_show_3d_inpaint \
-            and mesh_fi is not None and len(mesh_fi) > 0:
-        meshsimple_fi = mesh_fi
+    if backbone.get_opt('depthmap_script_show_3d_inpaint', True) and fn_mesh is not None and len(fn_mesh) > 0:
+        display_mesh = fn_mesh
     # however, don't show 3dmodel when disabled in settings
-    if hasattr(backbone.opts, 'depthmap_script_show_3d') and not backbone.opts.depthmap_script_show_3d:
-        meshsimple_fi = None
+    if not backbone.get_opt('depthmap_script_show_3d', True):
+        display_mesh = None
     # TODO: return more info
-    return show_images, mesh_fi, meshsimple_fi, 'Generated!'
+    return show_images, fn_mesh, display_mesh, 'Generated!'
diff --git a/src/core.py b/src/core.py
index 831d789..f9d65db 100644
--- a/src/core.py
+++ b/src/core.py
@@ -1,11 +1,6 @@
 from pathlib import Path
-
 from PIL import Image
 
-from modules import shared, devices
-from modules.images import get_next_sequence_number
-from modules.shared import opts, cmd_opts
-
 try:
     from tqdm import trange
 except:
@@ -24,6 +19,7 @@
 from src.main import *
 from src.stereoimage_generation import create_stereoimages
 from src.depthmap_generation import ModelHolder
+from src import backbone
 
 # 3d-photo-inpainting imports
 from inpaint.mesh import write_mesh, read_mesh, output_3d_photo
@@ -47,18 +43,6 @@ def convert_i16_to_rgb(image, like):
     return output
 
 
-def unload_sd_model():
-    if shared.sd_model is not None:
-        shared.sd_model.cond_stage_model.to(devices.cpu)
-        shared.sd_model.first_stage_model.to(devices.cpu)
-
-
-def reload_sd_model():
-    if shared.sd_model is not None:
-        shared.sd_model.cond_stage_model.to(devices.device)
-        shared.sd_model.first_stage_model.to(devices.device)
-
-
 def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None):
     if len(inputimages) == 0 or inputimages[0] is None:
         return [], '', ''
@@ -97,12 +81,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
     stereo_modes = inp["stereo_modes"]
     stereo_separation = inp["stereo_separation"]
 
+    if ops is None:
+        ops = {}
     model_holder.update_settings(**ops)
 
     # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure
     print(SCRIPT_FULL_NAME)
 
-    unload_sd_model()
+    backbone.unload_sd_model()
 
     # TODO: this still should not be here
     background_removed_images = []
@@ -308,7 +294,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
         else:
             raise e
     finally:
-        if hasattr(opts, 'depthmap_script_keepmodels') and opts.depthmap_script_keepmodels:
+        if backbone.get_opt('depthmap_script_keepmodels', False):
             model_holder.offload()  # Swap to CPU memory
         else:
             if 'model' in locals():
@@ -318,7 +304,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
             model_holder.unload_models()
 
         gc.collect()
-        devices.torch_gc()
+        backbone.torch_gc()
 
     # TODO: This should not be here
     mesh_fi = None
@@ -328,14 +314,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
         except Exception as e:
             print(f'{str(e)}, some issue with generating inpainted mesh')
 
-    reload_sd_model()
+    backbone.reload_sd_model()
     print("All done.\n")
     return generated_images, mesh_fi, meshsimple_fi
 
 
 def get_uniquefn(outpath, basename, ext):
     # Inefficient and may fail, maybe use unbounded binary search?
-    basecount = get_next_sequence_number(outpath, basename)
+    basecount = backbone.get_next_sequence_number(outpath, basename)
     if basecount > 0: basecount = basecount - 1
     fullfn = None
     for i in range(500):
@@ -403,10 +389,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v
         config['repeat_inpaint_edge'] = True
         config['ply_fmt'] = "bin"
 
-        config['save_ply'] = False
-        if hasattr(opts, 'depthmap_script_save_ply') and opts.depthmap_script_save_ply:
-            config['save_ply'] = True
-
+        config['save_ply'] = backbone.get_opt('depthmap_script_save_ply', False)
         config['save_obj'] = True
 
         if device == torch.device("cpu"):
@@ -473,7 +456,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v
                                    [-0.05, -0.05, -0.05, -0.05],
                                    ['dolly-zoom-in', 'zoom-in', 'circle', 'swing'], False, vid_format, vid_ssaa)
 
-            devices.torch_gc()
+            backbone.torch_gc()
 
     finally:
         del rgb_model
@@ -482,7 +465,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v
         depth_edge_model = None
         del depth_feat_model
         depth_feat_model = None
-        devices.torch_gc()
+        backbone.torch_gc()
 
     return mesh_fi
 
@@ -604,9 +587,9 @@ def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_bord
 
     # output path and filename mess ..
     basename = Path(fn_mesh).stem
-    outpath = opts.outdir_samples or opts.outdir_extras_samples
+    outpath = backbone.get_outpath()
     # unique filename
-    basecount = get_next_sequence_number(outpath, basename)
+    basecount = backbone.get_next_sequence_number(outpath, basename)
     if basecount > 0: basecount = basecount - 1
     fullfn = None
     for i in range(500):
@@ -699,9 +682,7 @@ def depth_edges_mask(depth):
 def create_mesh(image, depth, keep_edges=False, spherical=False):
     import trimesh
     from dzoedepth.utils.geometry import depth_to_points, create_triangles
-    maxsize = 1024
-    if hasattr(opts, 'depthmap_script_mesh_maxsize'):
-        maxsize = opts.depthmap_script_mesh_maxsize
+    maxsize = backbone.get_opt('depthmap_script_mesh_maxsize', 2048)
 
     # limit the size of the input image
     image.thumbnail((maxsize, maxsize))
diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py
index ec85770..ada3cf4 100644
--- a/src/depthmap_generation.py
+++ b/src/depthmap_generation.py
@@ -1,41 +1,35 @@
+import gc
+import os.path
 from operator import getitem
 
-from PIL import Image
-from torchvision.transforms import Compose, transforms
-
-# TODO: depthmap_generation should not depend on WebUI
-from modules import devices
-
-import torch, gc
 import cv2
-import os.path
 import numpy as np
 import skimage.measure
-
-# Our code
-from src.main import *
+from PIL import Image
+from torchvision.transforms import Compose, transforms
 
 # midas imports
 from dmidas.dpt_depth import DPTDepthModel
 from dmidas.midas_net import MidasNet
 from dmidas.midas_net_custom import MidasNet_small
 from dmidas.transforms import Resize, NormalizeImage, PrepareForNet
-
+# zoedepth
+from dzoedepth.models.builder import build_model
+from dzoedepth.utils.config import get_config
 # AdelaiDepth/LeReS imports
 from lib.multi_depth_model_woauxi import RelDepthModel
 from lib.net_tools import strip_prefix_if_present
-
+from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel
 # pix2pix/merge net imports
 from pix2pix.options.test_options import TestOptions
-from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel
 
-# zoedepth
-from dzoedepth.models.builder import build_model
-from dzoedepth.utils.config import get_config
+# Our code
+from src.main import *
+from src import backbone
 
-global device
+global depthmap_device
 
-class ModelHolder():
+class ModelHolder:
     def __init__(self):
         self.depth_model = None
         self.pix2pix_model = None
@@ -88,7 +82,6 @@ def load_models(self, model_type, device: torch.device, boost: bool):
         resize_mode = "minimal"
         normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
 
-        # TODO: net_w, net_h
         model = None
         if model_type == 0:  # "res101"
             model_path = f"{model_dir}/res101.pth"
@@ -106,7 +99,7 @@ def load_models(self, model_type, device: torch.device, boost: bool):
             model = RelDepthModel(backbone='resnext101')
             model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), strict=True)
             del checkpoint
-            devices.torch_gc()
+            backbone.torch_gc()
 
         if model_type == 1:  # "dpt_beit_large_512" midas 3.1
             model_path = f"{model_dir}/dpt_beit_large_512.pt"
@@ -203,7 +196,7 @@ def load_models(self, model_type, device: torch.device, boost: bool):
         model.eval()  # prepare for evaluation
         # optimize
         if device == torch.device("cuda") and model_type in [0, 1, 2, 3, 4, 5, 6]:
-            model = model.to(memory_format=torch.channels_last)
+            model = model.to(memory_format=torch.channels_last)  # TODO: weird
             if not self.no_half and model_type != 0 and not boost:  # TODO: zoedepth, too?
                 model = model.half()
         model.to(device)  # to correct device
@@ -230,7 +223,7 @@ def load_models(self, model_type, device: torch.device, boost: bool):
             self.pix2pix_model.load_networks('latest')
             self.pix2pix_model.eval()
 
-        devices.torch_gc()
+        backbone.torch_gc()
 
     @staticmethod
     def get_default_net_size(model_type):
@@ -276,7 +269,7 @@ def unload_models(self):
             del self.pix2pix_model
             self.pix2pix_model = None
             gc.collect()
-            devices.torch_gc()
+            backbone.torch_gc()
 
         self.depth_model_type = None
         self.device = None
@@ -284,9 +277,8 @@ def unload_models(self):
     def get_raw_prediction(self, input, net_width, net_height):
         """Get prediction from the model currently loaded by the ModelHolder object.
         If boost is enabled, net_width and net_height will be ignored."""
-        # TODO: supply net size for zoedepth
-        global device
-        device = self.device
+        global depthmap_device
+        depthmap_device = self.device
         # input image
         img = cv2.cvtColor(np.asarray(input), cv2.COLOR_BGR2RGB) / 255.0
         # compute depthmap
@@ -314,7 +306,7 @@ def estimateleres(img, model, w, h):
 
     # compute
     with torch.no_grad():
-        if device == torch.device("cuda"):
+        if depthmap_device == torch.device("cuda"):
             img_torch = img_torch.cuda()
         prediction = model.depth_model(img_torch)
 
@@ -346,7 +338,7 @@ def scale_torch(img):
 def estimatezoedepth(img, model, w, h):
     # x = transforms.ToTensor()(img).unsqueeze(0)
     # x = x.type(torch.float32)
-    # x.to(device)
+    # x.to(depthmap_device)
     # prediction = model.infer(x)
     model.core.prep.resizer._Resize__width = w
     model.core.prep.resizer._Resize__height = h
@@ -378,11 +370,11 @@ def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precisi
     img_input = transform({"image": img})["image"]
 
     # compute
-    precision_scope = torch.autocast if precision_is_autocast and device == torch.device(
+    precision_scope = torch.autocast if precision_is_autocast and depthmap_device == torch.device(
         "cuda") else contextlib.nullcontext
     with torch.no_grad(), precision_scope("cuda"):
-        sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
-        if device == torch.device("cuda"):
+        sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0)
+        if depthmap_device == torch.device("cuda"):
             sample = sample.to(memory_format=torch.channels_last)
             if not no_half:
                 sample = sample.half()
@@ -628,7 +620,7 @@ def estimateboost(img, model, model_type, pix2pixmodel, whole_size_threshold):
         patch_netsize = 2 * net_receptive_field_size
 
     gc.collect()
-    devices.torch_gc()
+    backbone.torch_gc()
 
     # Generate mask used to smoothly blend the local pathc estimations to the base estimate.
     # It is arbitrarily large to avoid artifacts during rescaling for each crop.
@@ -1034,8 +1026,8 @@ def estimatemidasBoost(img, model, w, h):
 
     # compute
     with torch.no_grad():
-        sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
-        if device == torch.device("cuda"):
+        sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0)
+        if depthmap_device == torch.device("cuda"):
             sample = sample.to(memory_format=torch.channels_last)
         prediction = model.forward(sample)
 
diff --git a/src/standalone.py b/src/standalone.py
deleted file mode 100644
index a20ee99..0000000
--- a/src/standalone.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# This launches Depth tab without the AUTOMATIC1111/stable-diffusion-webui
-# Does not work yet.
-
-import gradio as gr
-import scripts.depthmap
-
-demo = gr.Interface(fn=scripts.depthmap.on_ui_tabs, inputs="text", outputs="text")
-
-demo.launch()

From 5372a13d7d475ff81a8343d2c07a1a747cce4b73 Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Tue, 18 Jul 2023 19:58:43 +0300
Subject: [PATCH 04/10] Improvements for standalone interface

---
 src/common_ui.py | 36 ++++++++++++------------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/src/common_ui.py b/src/common_ui.py
index 3210c74..6c01534 100644
--- a/src/common_ui.py
+++ b/src/common_ui.py
@@ -28,8 +28,7 @@ def main_ui_panel(is_depth_tab):
         with gr.Group():
             with gr.Row():
                 inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True)
-                with gr.Group(visible=False) as options_depend_on_boost:
-                    inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False)
+                inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False)
             with gr.Row(visible=False) as options_depend_on_match_size:
                 inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448)
                 inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448)
@@ -62,10 +61,9 @@ def main_ui_panel(is_depth_tab):
                 inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False)
             with gr.Group(visible=False) as stereo_options:
                 with gr.Row():
-                    with gr.Row():
-                        inp += "stereo_modes", gr.CheckboxGroup(
-                            ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"],
-                            label="Output", value=["left-right", "red-cyan-anaglyph"])
+                    inp += "stereo_modes", gr.CheckboxGroup(
+                        ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"],
+                        label="Output", value=["left-right", "red-cyan-anaglyph"])
                 with gr.Row():
                     inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01,
                                                           label='Divergence (3D effect)',
@@ -137,10 +135,10 @@ def update_delault_net_size(model_type):
         )
 
         inp['boost'].change(
-            fn=lambda a, b: (options_depend_on_boost.update(visible=not a),
+            fn=lambda a, b: (inp['match_size'].update(visible=not a),
                              options_depend_on_match_size.update(visible=not a and not b)),
             inputs=[inp['boost'], inp['match_size']],
-            outputs=[options_depend_on_boost, options_depend_on_match_size]
+            outputs=[inp['match_size'], options_depend_on_match_size]
         )
         inp['match_size'].change(
             fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b),
@@ -176,11 +174,8 @@ def update_delault_net_size(model_type):
             outputs=[inp['clipthreshold_far']]
         )
 
-        def stereo_options_visibility(v):
-            return stereo_options.update(visible=v)
-
         inp['gen_stereo'].change(
-            fn=stereo_options_visibility,
+            fn=lambda v: stereo_options.update(visible=v),
             inputs=[inp['gen_stereo']],
             outputs=[stereo_options]
         )
@@ -191,22 +186,15 @@ def stereo_options_visibility(v):
             outputs=[mesh_options]
         )
 
-        def inpaint_options_visibility(v):
-            return inpaint_options_row_0.update(visible=v)
-
         if is_depth_tab:
             inp['inpaint'].change(
-                fn=inpaint_options_visibility,
+                fn=lambda v: inpaint_options_row_0.update(visible=v),
                 inputs=[inp['inpaint']],
                 outputs=[inpaint_options_row_0]
             )
 
-        def background_removal_options_visibility(v):
-            return bgrem_options_row_1.update(visible=v), \
-                bgrem_options_row_2.update(visible=v)
-
         inp['background_removal'].change(
-            fn=background_removal_options_visibility,
+            fn=lambda v: (bgrem_options_row_1.update(visible=v), bgrem_options_row_2.update(visible=v)),
             inputs=[inp['background_removal']],
             outputs=[bgrem_options_row_1, bgrem_options_row_2]
         )
@@ -215,7 +203,7 @@ def background_removal_options_visibility(v):
 
 def on_ui_tabs():
     inp = GradioComponentBundle()
-    with gr.Blocks(analytics_enabled=False) as depthmap_interface:
+    with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface:
         with gr.Row().style(equal_height=False):
             with gr.Column(variant='panel'):
                 inp += 'depthmap_mode', gr.HTML(visible=False, value='0')
@@ -382,7 +370,7 @@ def run_generate(*inputs):
 
     if depthmap_mode == '0':  # Single image
         if depthmap_input_image is None:
-            return [], None, None, "Please select an input image!"
+            return [], None, None, "Please select an input image"
         inputimages.append(depthmap_input_image)
         inputnames.append(None)
         if custom_depthmap:
@@ -394,7 +382,7 @@ def run_generate(*inputs):
             inputdepthmaps.append(None)
     if depthmap_mode == '1':  # Batch Process
         if image_batch is None:
-            return [], None, None, "Please select input images!", ""
+            return [], None, None, "Please select input images", ""
         for img in image_batch:
             image = Image.open(os.path.abspath(img.name))
             inputimages.append(image)

From 970d365c41778fec70ab98c0657c0cdf218f7eaf Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Tue, 18 Jul 2023 20:56:58 +0300
Subject: [PATCH 05/10] Questionable design decisions

Element hiding-unhiding how works in the standalone mode.
---
 src/common_ui.py | 67 ++++++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/src/common_ui.py b/src/common_ui.py
index 6c01534..6b45a3b 100644
--- a/src/common_ui.py
+++ b/src/common_ui.py
@@ -25,7 +25,7 @@ def main_ui_panel(is_depth_tab):
                                                       'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'],
                                              value='res101',
                                              type="index")
-        with gr.Group():
+        with gr.Box():
             with gr.Row():
                 inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True)
                 inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False)
@@ -33,9 +33,10 @@ def main_ui_panel(is_depth_tab):
                 inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448)
                 inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448)
 
-        with gr.Group():
+        with gr.Box():
             with gr.Row():
-                inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True)  # 50% of width
+                with gr.Group():
+                    inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True)  # 50% of width
                 with gr.Group():  # 50% of width
                     inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True)
                     inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False)
@@ -44,22 +45,22 @@ def main_ui_panel(is_depth_tab):
                     label="Combine input and depthmap into one image", value=False)
                 inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'],
                                                        value='Horizontal', type="index", visible=False)
-        with gr.Group():
+        with gr.Box():
             with gr.Row():
                 inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False)
             with gr.Row(visible=False) as clip_options_row_1:
                 inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0)
                 inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1)
 
-        with gr.Group():
+        with gr.Box():
             with gr.Row():
                 inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False)
                 # gr.Checkbox(label="Generate NormalMap", value=False)  # TODO: this is a fake door
 
-        with gr.Group():
+        with gr.Box():
             with gr.Row():
                 inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False)
-            with gr.Group(visible=False) as stereo_options:
+            with gr.Column(visible=False) as stereo_options:
                 with gr.Row():
                     inp += "stereo_modes", gr.CheckboxGroup(
                         ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"],
@@ -80,11 +81,11 @@ def main_ui_panel(is_depth_tab):
                                                        label='Balance between eyes',
                                                        value=0.0)
 
-        with gr.Group():
-            with gr.Row():
+        with gr.Box():
+            with gr.Column():
                 inp += "gen_mesh", gr.Checkbox(
                     label="Generate simple 3D mesh", value=False, visible=True)
-            with gr.Group(visible=False) as mesh_options:
+            with gr.Column(visible=False) as mesh_options:
                 with gr.Row():
                     gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps")
                 with gr.Row():
@@ -92,29 +93,30 @@ def main_ui_panel(is_depth_tab):
                     inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True)
 
         if is_depth_tab:
-            with gr.Group():
-                with gr.Row():
+            with gr.Box():
+                with gr.Column():
                     inp += "inpaint", gr.Checkbox(
                         label="Generate 3D inpainted mesh", value=False)
-                with gr.Group(visible=False) as inpaint_options_row_0:
+                with gr.Column(visible=False) as inpaint_options_row_0:
                     gr.HTML("Generation is sloooow, required for generating videos")
                     inp += "inpaint_vids", gr.Checkbox(
                         label="Generate 4 demo videos with 3D inpainted mesh.", value=False)
                     gr.HTML("More options for generating video can be found in the Generate video tab")
 
-        with gr.Group():
+        with gr.Box():
             # TODO: it should be clear from the UI that there is an option of the background removal
             #  that does not use the model selected above
             with gr.Row():
                 inp += "background_removal", gr.Checkbox(label="Remove background", value=False)
-            with gr.Row(visible=False) as bgrem_options_row_1:
-                inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False)
-                inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False)
-            with gr.Row(visible=False) as bgrem_options_row_2:
-                inp += "background_removal_model", gr.Dropdown(label="Rembg Model",
-                                                               choices=['u2net', 'u2netp', 'u2net_human_seg',
-                                                                        'silueta'],
-                                                               value='u2net', type="value")
+            with gr.Column(visible=False) as bgrem_options:
+                with gr.Row():
+                    inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False)
+                    inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False)
+                with gr.Row():
+                    inp += "background_removal_model", gr.Dropdown(label="Rembg Model",
+                                                                   choices=['u2net', 'u2netp', 'u2net_human_seg',
+                                                                            'silueta'],
+                                                                   value='u2net', type="value")
 
         with gr.Box():
             gr.HTML(f"{SCRIPT_FULL_NAME}<br/>")
@@ -194,9 +196,9 @@ def update_delault_net_size(model_type):
             )
 
         inp['background_removal'].change(
-            fn=lambda v: (bgrem_options_row_1.update(visible=v), bgrem_options_row_2.update(visible=v)),
+            fn=lambda v: bgrem_options.update(visible=v),
             inputs=[inp['background_removal']],
-            outputs=[bgrem_options_row_1, bgrem_options_row_2]
+            outputs=[bgrem_options]
         )
 
     return inp
@@ -209,13 +211,13 @@ def on_ui_tabs():
                 inp += 'depthmap_mode', gr.HTML(visible=False, value='0')
                 with gr.Tabs():
                     with gr.TabItem('Single Image') as depthmap_mode_0:
-                        with gr.Row():
-                            inp += gr.Image(label="Source", source="upload", interactive=True, type="pil",
-                                            elem_id="depthmap_input_image")
-                            with gr.Group(visible=False) as custom_depthmap_row_0:
+                        with gr.Group():
+                            with gr.Row():
+                                inp += gr.Image(label="Source", source="upload", interactive=True, type="pil",
+                                                elem_id="depthmap_input_image")
                                 # TODO: depthmap generation settings should disappear when using this
                                 inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True,
-                                               type="file", elem_id='custom_depthmap_img')
+                                               type="file", elem_id='custom_depthmap_img', visible=False)
                         inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False)
                     with gr.TabItem('Batch Process') as depthmap_mode_1:
                         inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple",
@@ -291,13 +293,10 @@ def on_ui_tabs():
         depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode'])
         depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode'])
 
-        def custom_depthmap_visibility(v):
-            return custom_depthmap_row_0.update(visible=v)
-
         inp['custom_depthmap'].change(
-            fn=custom_depthmap_visibility,
+            fn=lambda v: inp['custom_depthmap_img'].update(visible=v),
             inputs=[inp['custom_depthmap']],
-            outputs=[custom_depthmap_row_0]
+            outputs=[inp['custom_depthmap_img']]
         )
 
         unloadmodels.click(

From 35f77f6a981ff055b4945582d48ef838a89c4ec8 Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Tue, 18 Jul 2023 21:00:25 +0300
Subject: [PATCH 06/10] Rename main to misc to avoid confusion

---
 main.py                    | 3 +++
 scripts/depthmap.py        | 2 +-
 src/backbone.py            | 8 +++++---
 src/common_ui.py           | 2 +-
 src/core.py                | 2 +-
 src/depthmap_generation.py | 2 +-
 src/{main.py => misc.py}   | 0
 7 files changed, 12 insertions(+), 7 deletions(-)
 rename src/{main.py => misc.py} (100%)

diff --git a/main.py b/main.py
index 8e72043..8421011 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,7 @@
 # This launches DepthMap without the AUTOMATIC1111/stable-diffusion-webui
+# If DepthMap is installed as an extension,
+# you may want to change the working directory to the stable-diffusion-webui root.
+
 import argparse
 import src.common_ui
 
diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index 2a81fd7..82284b5 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -7,7 +7,7 @@
 from src import common_ui
 from src.core import core_generation_funnel
 from src.gradio_args_transport import GradioComponentBundle
-from src.main import *
+from src.misc import *
 
 
 # Ugly workaround to fix gradio tempfile issue
diff --git a/src/backbone.py b/src/backbone.py
index 44fc3fd..8be1265 100644
--- a/src/backbone.py
+++ b/src/backbone.py
@@ -1,5 +1,7 @@
-# This file contains stable-duiffusion-webui stuff that the plugin relies on.
-# Eventually, when we have a standalone interface, this will load either standalone backbone or webui backbone.
+# DepthMap can be run inside stable-duiffusion-webui, but also separately.
+# All the stable-duiffusion-webui stuff that the DepthMap relies on
+# must be resided in this file (or in the scripts folder).
+
 try:
     # stable-duiffusion-webui backbone
     from modules.images import save_image  # Should fail if not on stable-duiffusion-webui
@@ -93,4 +95,4 @@ def unload_sd_model(): pass  # Not needed
 
     def reload_sd_model(): pass  # Not needed
 
-    def get_hide_dirs(): return {}  # Directories will not be hidden from traversal
+    def get_hide_dirs(): return {}  # Directories will not be hidden from traversal (except when starts with the dot)
diff --git a/src/common_ui.py b/src/common_ui.py
index 6b45a3b..bb4b3c0 100644
--- a/src/common_ui.py
+++ b/src/common_ui.py
@@ -7,7 +7,7 @@
 from src.core import core_generation_funnel, unload_models, run_makevideo
 from src.depthmap_generation import ModelHolder
 from src.gradio_args_transport import GradioComponentBundle
-from src.main import *
+from src.misc import *
 
 
 def main_ui_panel(is_depth_tab):
diff --git a/src/core.py b/src/core.py
index f9d65db..142ac37 100644
--- a/src/core.py
+++ b/src/core.py
@@ -16,7 +16,7 @@
 import traceback
 
 # Our code
-from src.main import *
+from src.misc import *
 from src.stereoimage_generation import create_stereoimages
 from src.depthmap_generation import ModelHolder
 from src import backbone
diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py
index ada3cf4..5ecc02b 100644
--- a/src/depthmap_generation.py
+++ b/src/depthmap_generation.py
@@ -24,7 +24,7 @@
 from pix2pix.options.test_options import TestOptions
 
 # Our code
-from src.main import *
+from src.misc import *
 from src import backbone
 
 global depthmap_device
diff --git a/src/main.py b/src/misc.py
similarity index 100%
rename from src/main.py
rename to src/misc.py

From f5cff471974c26c63a6076573a214ac7f64c440b Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Tue, 18 Jul 2023 21:14:45 +0300
Subject: [PATCH 07/10] Bump version

Also do not needlessly unload models in standalone mode.
---
 README.md           |  2 ++
 scripts/depthmap.py | 14 --------------
 src/common_ui.py    | 14 ++++++++++++++
 src/core.py         |  2 +-
 src/misc.py         |  2 +-
 5 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index ccb66cc..1ed85b9 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@ video by [@graemeniedermayer](https://github.com/graemeniedermayer), more exampl
 images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56#issuecomment-1367596463).
 
 ## Changelog
+* v0.4.1 standalone mode
+    * ability to run DepthMap without WebUI (Use main.py. Make sure all the dependencies are installed. The support is not feature-complete.)
 * v0.4.0 large code refactor
     * UI improvements
     * improved Batch from Directory, Clip and renormalize DepthMap
diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index 82284b5..868add5 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -10,20 +10,6 @@
 from src.misc import *
 
 
-# Ugly workaround to fix gradio tempfile issue
-def ensure_gradio_temp_directory():
-    try:
-        import tempfile
-        path = os.path.join(tempfile.gettempdir(), 'gradio')
-        if not (os.path.exists(path)):
-            os.mkdir(path)
-    except Exception as e:
-        traceback.print_exc()
-
-
-ensure_gradio_temp_directory()
-
-
 class Script(scripts.Script):
     def title(self):
         return SCRIPT_NAME
diff --git a/src/common_ui.py b/src/common_ui.py
index bb4b3c0..f37c73c 100644
--- a/src/common_ui.py
+++ b/src/common_ui.py
@@ -10,6 +10,20 @@
 from src.misc import *
 
 
+# Ugly workaround to fix gradio tempfile issue
+def ensure_gradio_temp_directory():
+    try:
+        import tempfile
+        path = os.path.join(tempfile.gettempdir(), 'gradio')
+        if not (os.path.exists(path)):
+            os.mkdir(path)
+    except Exception as e:
+        traceback.print_exc()
+
+
+ensure_gradio_temp_directory()
+
+
 def main_ui_panel(is_depth_tab):
     inp = GradioComponentBundle()
     # TODO: Greater visual separation
diff --git a/src/core.py b/src/core.py
index 142ac37..32a81b1 100644
--- a/src/core.py
+++ b/src/core.py
@@ -294,7 +294,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
         else:
             raise e
     finally:
-        if backbone.get_opt('depthmap_script_keepmodels', False):
+        if backbone.get_opt('depthmap_script_keepmodels', True):
             model_holder.offload()  # Swap to CPU memory
         else:
             if 'model' in locals():
diff --git a/src/misc.py b/src/misc.py
index d3fed1d..f3d2bfd 100644
--- a/src/misc.py
+++ b/src/misc.py
@@ -16,7 +16,7 @@ def get_commit_hash():
 
 
 SCRIPT_NAME = "DepthMap"
-SCRIPT_VERSION = "v0.4.0"
+SCRIPT_VERSION = "v0.4.1"
 SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})"
 
 

From 7c44b702d0a4638c023d7a46667c42fb54c7b816 Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Wed, 19 Jul 2023 11:14:41 +0300
Subject: [PATCH 08/10] Add folder button, cmd_opts bugfix

---
 src/backbone.py  | 12 +++++++++++-
 src/common_ui.py | 26 +++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/backbone.py b/src/backbone.py
index 8be1265..202cf47 100644
--- a/src/backbone.py
+++ b/src/backbone.py
@@ -12,13 +12,19 @@
 
     def get_opt(name, default):
         from modules.shared import opts
-
         if hasattr(opts, name):
             return opts.__getattr__(name)
         return default
 
+    def get_cmd_opt(name, default):
+        """Get command line argument"""
+        from modules.shared import cmd_opts
+        if hasattr(cmd_opts, name):
+            return cmd_opts.__getattribute__(name)
+        return default
 
     def gather_ops():
+        """Parameters for depthmap generation"""
         from modules.shared import cmd_opts
         ops = {}
         if get_opt('depthmap_script_boost_rmax', None) is not None:
@@ -29,6 +35,7 @@ def gather_ops():
 
 
     def get_outpath():
+        """Get path where results are saved by default"""
         path = get_opt('outdir_samples', None)
         if path is None or len(path) == 0:
             path = get_opt('outdir_extras_samples', None)
@@ -87,6 +94,9 @@ def listfiles(dirname):
 
     def get_opt(name, default): return default  # Configuring is not supported
 
+
+    def get_cmd_opt(name, default): return default  # Configuring is not supported
+
     def gather_ops(): return {}  # Configuring is not supported
 
     def get_outpath(): return '.'
diff --git a/src/common_ui.py b/src/common_ui.py
index f37c73c..68583d3 100644
--- a/src/common_ui.py
+++ b/src/common_ui.py
@@ -217,6 +217,25 @@ def update_delault_net_size(model_type):
 
     return inp
 
+def open_folder_action():
+    # Adapted from stable-diffusion-webui
+    f = backbone.get_outpath()
+    if backbone.get_cmd_opt('hide_ui_dir_config', False):
+        return
+    if not os.path.exists(f) or not os.path.isdir(f):
+        raise "Couldn't open output folder"  # .isdir is security-related, do not remove!
+    import platform
+    import subprocess as sp
+    path = os.path.normpath(f)
+    if platform.system() == "Windows":
+        os.startfile(path)
+    elif platform.system() == "Darwin":
+        sp.Popen(["open", path])
+    elif "microsoft-standard-WSL2" in platform.uname().release:
+        sp.Popen(["wsl-open", path])
+    else:
+        sp.Popen(["xdg-open", path])
+
 def on_ui_tabs():
     inp = GradioComponentBundle()
     with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface:
@@ -260,6 +279,10 @@ def on_ui_tabs():
                                                        elem_id=f"depthmap_gallery").style(grid=4)
                         with gr.Column():
                             html_info = gr.HTML()
+                        folder_symbol = '\U0001f4c2'  # 📂
+                        gr.Button(folder_symbol, visible=not backbone.get_cmd_opt('hide_ui_dir_config', False)).click(
+                            fn=lambda: open_folder_action(), inputs=[], outputs=[],
+                        )
 
                     with gr.TabItem('3D Mesh'):
                         with gr.Group():
@@ -301,6 +324,7 @@ def on_ui_tabs():
                                 submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo",
                                                        variant='primary')
 
+
         inp += inp.enkey_tail()
 
         depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode'])
@@ -401,7 +425,7 @@ def run_generate(*inputs):
             inputimages.append(image)
             inputnames.append(os.path.splitext(img.orig_name)[0])
     elif depthmap_mode == '2':  # Batch from Directory
-        assert not backbone.get_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled'
+        assert not backbone.get_cmd_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled'
         if depthmap_batch_input_dir == '':
             return [], None, None, "Please select an input directory."
         if depthmap_batch_input_dir == depthmap_batch_output_dir:

From 13023f197ee5c9df05a422e7ff4705c5c6e97d35 Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Wed, 19 Jul 2023 22:21:43 +0300
Subject: [PATCH 09/10] Bugfix: standalone mesh generation

---
 src/backbone.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/backbone.py b/src/backbone.py
index 202cf47..bc1606e 100644
--- a/src/backbone.py
+++ b/src/backbone.py
@@ -63,7 +63,7 @@ def get_hide_dirs():
     # Standalone backbone
     print("DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n"
           "The standalone backbone is not on par with the stable-duiffusion-webui backbone.\n"
-          "Some features may be missing or work differently.\n")
+          "Some features may be missing or work differently. Please report bugs.\n")
 
     def save_image(image, path, basename, **kwargs):
         import os
@@ -79,11 +79,11 @@ def torch_gc():
                 torch.cuda.empty_cache()
                 torch.cuda.ipc_collect()
 
-    def get_next_sequence_number():
+    def get_next_sequence_number(outpath=None, basename=None):
         # Don't really care what the number will be... As long as it is unique.
         from datetime import datetime, timezone
         import random
-        return f"{int(datetime.now(timezone.utc).timestamp())}-{random.randint(1000,9999)}"
+        return int(f"{int(datetime.now(timezone.utc).timestamp())}{random.randint(1000,9999)}")
 
     def wrap_gradio_gpu_call(f): return f  # Displaying various stats is not supported
 

From 1d217ce55c8e4a659be4f0ec805a05b1db421f1a Mon Sep 17 00:00:00 2001
From: semjon00 <semjon.00@gmail.com>
Date: Thu, 20 Jul 2023 11:44:17 +0300
Subject: [PATCH 10/10] Standalone mode improvements

* Make standalone mode more self-aware if installed as webui extension
* Fix commit retrieval for standalone mode if not installed as webui extension
* Improved output saving
---
 main.py                    | 28 +++++++++++++++++++++++++++-
 src/backbone.py            | 30 ++++++++++++++++++++----------
 src/depthmap_generation.py |  1 +
 src/misc.py                | 23 ++++++++++++++++-------
 4 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/main.py b/main.py
index 8421011..fc7cf68 100644
--- a/main.py
+++ b/main.py
@@ -3,11 +3,37 @@
 # you may want to change the working directory to the stable-diffusion-webui root.
 
 import argparse
-import src.common_ui
+import os
+import pathlib
+import builtins
+
+import src.misc
+
+def maybe_chdir():
+    """Detects if DepthMap was installed as a stable-diffusion-webui script, but run without current directory set to
+    the stable-diffusion-webui root. Changes current directory if needed, to aviod clutter."""
+    try:
+        file_path = pathlib.Path(__file__)
+        path = file_path.parts
+        while len(path) > 0 and path[-1] != src.misc.REPOSITORY_NAME:
+            path = path[:-1]
+        if len(path) >= 2 and path[-1] == src.misc.REPOSITORY_NAME and path[-2] == "extensions":
+            path = path[:-2]
+        listdir = os.listdir(str(pathlib.Path(*path)))
+        if 'launch.py' in listdir and 'webui.py':
+            os.chdir(str(pathlib.Path(**path)))
+    except:
+        pass
+
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument("--listen", help="Create public link")
+    parser.add_argument("--no_chdir", help="Do not try to use the root of stable-diffusion-webui")
     args = parser.parse_args()
 
+    print(f"{src.misc.SCRIPT_FULL_NAME} running in standalone mode!")
+    import src.common_ui
+    if not args.no_chdir:
+        maybe_chdir()
     src.common_ui.on_ui_tabs().launch(share=args.listen)
diff --git a/src/backbone.py b/src/backbone.py
index bc1606e..0829ce0 100644
--- a/src/backbone.py
+++ b/src/backbone.py
@@ -1,6 +1,8 @@
 # DepthMap can be run inside stable-duiffusion-webui, but also separately.
 # All the stable-duiffusion-webui stuff that the DepthMap relies on
 # must be resided in this file (or in the scripts folder).
+import pathlib
+from datetime import datetime
 
 try:
     # stable-duiffusion-webui backbone
@@ -61,15 +63,21 @@ def get_hide_dirs():
         return modules.shared.hide_dirs
 except:
     # Standalone backbone
-    print("DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n"
-          "The standalone backbone is not on par with the stable-duiffusion-webui backbone.\n"
-          "Some features may be missing or work differently. Please report bugs.\n")
+    print(  # "  DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n"
+          "  The standalone mode is not on par with the stable-duiffusion-webui mode.\n"
+          "  Some features may be missing or work differently. Please report bugs.\n")
 
     def save_image(image, path, basename, **kwargs):
         import os
         os.makedirs(path, exist_ok=True)
-        fullfn = os.path.join(path, f"{get_next_sequence_number()}-{basename}.{kwargs['extension']}")
-        image.save(fullfn, format=get_opt('samples_format', 'png'))
+        if 'suffix' not in kwargs or len(kwargs['suffix']) == 0:
+            kwargs['suffix'] = ''
+        else:
+            kwargs['suffix'] = f"-{kwargs['suffix']}"
+        format = get_opt('samples_format', kwargs['extension'])
+        fullfn = os.path.join(
+            path, f"{basename}-{get_next_sequence_number(path, basename)}{kwargs['suffix']}.{format}")
+        image.save(fullfn, format=format)
 
     def torch_gc():
         # TODO: is this really sufficient?
@@ -79,11 +87,13 @@ def torch_gc():
                 torch.cuda.empty_cache()
                 torch.cuda.ipc_collect()
 
+    launched_at = int(datetime.now().timestamp())
+    backbone_current_seq_number = 0
+
     def get_next_sequence_number(outpath=None, basename=None):
-        # Don't really care what the number will be... As long as it is unique.
-        from datetime import datetime, timezone
-        import random
-        return int(f"{int(datetime.now(timezone.utc).timestamp())}{random.randint(1000,9999)}")
+        global backbone_current_seq_number
+        backbone_current_seq_number += 1
+        return int(f"{launched_at}{backbone_current_seq_number:04}")
 
     def wrap_gradio_gpu_call(f): return f  # Displaying various stats is not supported
 
@@ -99,7 +109,7 @@ def get_cmd_opt(name, default): return default  # Configuring is not supported
 
     def gather_ops(): return {}  # Configuring is not supported
 
-    def get_outpath(): return '.'
+    def get_outpath(): return str(pathlib.Path('.', 'outputs'))
 
     def unload_sd_model(): pass  # Not needed
 
diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py
index 5ecc02b..0ea4a37 100644
--- a/src/depthmap_generation.py
+++ b/src/depthmap_generation.py
@@ -6,6 +6,7 @@
 import numpy as np
 import skimage.measure
 from PIL import Image
+import torch
 from torchvision.transforms import Compose, transforms
 
 # midas imports
diff --git a/src/misc.py b/src/misc.py
index f3d2bfd..875211f 100644
--- a/src/misc.py
+++ b/src/misc.py
@@ -1,27 +1,36 @@
 import subprocess
 import os
 import pathlib
-import torch
+import builtins
 
 def get_commit_hash():
-    try:
+    def call_git(dir):
         return subprocess.check_output(
             [os.environ.get("GIT", "git"), "rev-parse", "HEAD"],
-            cwd=pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/'),
-            shell=False,
-            stderr=subprocess.DEVNULL,
-            encoding='utf8').strip()[0:8]
+            cwd=dir, shell=False, stderr=subprocess.DEVNULL, encoding='utf8').strip()[0:8]
+
+    try:
+        file_path = pathlib.Path(__file__)
+        path = file_path.parts
+        while len(path) > 0 and path[-1] != REPOSITORY_NAME:
+            path = path[:-1]
+        if len(path) >= 2 and path[-1] == REPOSITORY_NAME and path[-2] == "extensions":
+            return call_git(str(pathlib.Path(*path)))
+
+        return call_git(pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/'))
     except Exception:
         return "<none>"
 
 
+REPOSITORY_NAME = "stable-diffusion-webui-depthmap-script"
 SCRIPT_NAME = "DepthMap"
 SCRIPT_VERSION = "v0.4.1"
 SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})"
 
 
 def ensure_file_downloaded(filename, url, sha256_hash_prefix=None):
-    # Do not check the hash every time - it is somewhat time-consuming
+    import torch
+    # Do not check the hash every time - it is somewhat time-consumin
     if os.path.exists(filename):
         return