diff --git a/README.md b/README.md
index ccb66cc..1ed85b9 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@ video by [@graemeniedermayer](https://github.com/graemeniedermayer), more exampl
images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56#issuecomment-1367596463).
## Changelog
+* v0.4.1 standalone mode
+ * ability to run DepthMap without WebUI (Use main.py. Make sure all the dependencies are installed. The support is not feature-complete.)
* v0.4.0 large code refactor
* UI improvements
* improved Batch from Directory, Clip and renormalize DepthMap
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..fc7cf68
--- /dev/null
+++ b/main.py
@@ -0,0 +1,39 @@
+# This launches DepthMap without the AUTOMATIC1111/stable-diffusion-webui
+# If DepthMap is installed as an extension,
+# you may want to change the working directory to the stable-diffusion-webui root.
+
+import argparse
+import os
+import pathlib
+import builtins
+
+import src.misc
+
+def maybe_chdir():
+ """Detects if DepthMap was installed as a stable-diffusion-webui script, but run without current directory set to
+ the stable-diffusion-webui root. Changes current directory if needed, to aviod clutter."""
+ try:
+ file_path = pathlib.Path(__file__)
+ path = file_path.parts
+ while len(path) > 0 and path[-1] != src.misc.REPOSITORY_NAME:
+ path = path[:-1]
+ if len(path) >= 2 and path[-1] == src.misc.REPOSITORY_NAME and path[-2] == "extensions":
+ path = path[:-2]
+ listdir = os.listdir(str(pathlib.Path(*path)))
+ if 'launch.py' in listdir and 'webui.py':
+ os.chdir(str(pathlib.Path(**path)))
+ except:
+ pass
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--listen", help="Create public link")
+ parser.add_argument("--no_chdir", help="Do not try to use the root of stable-diffusion-webui")
+ args = parser.parse_args()
+
+ print(f"{src.misc.SCRIPT_FULL_NAME} running in standalone mode!")
+ import src.common_ui
+ if not args.no_chdir:
+ maybe_chdir()
+ src.common_ui.on_ui_tabs().launch(share=args.listen)
diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index a0e85a1..868add5 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -1,233 +1,13 @@
-import gradio as gr
import traceback
+import gradio as gr
+from modules import shared
import modules.scripts as scripts
-from modules import processing, images, shared
-from modules import script_callbacks
-from modules.call_queue import wrap_gradio_gpu_call
-from modules.processing import create_infotext
-from modules.shared import opts
-from modules.ui import plaintext_to_html
-from pathlib import Path
-from PIL import Image
+from src import backbone
+from src import common_ui
+from src.core import core_generation_funnel
from src.gradio_args_transport import GradioComponentBundle
-from src.main import *
-from src.core import core_generation_funnel, unload_models, run_makevideo
-from src.depthmap_generation import ModelHolder
-
-
-# Ugly workaround to fix gradio tempfile issue
-def ensure_gradio_temp_directory():
- try:
- import tempfile
- path = os.path.join(tempfile.gettempdir(), 'gradio')
- if not (os.path.exists(path)):
- os.mkdir(path)
- except Exception as e:
- traceback.print_exc()
-ensure_gradio_temp_directory()
-
-
-def main_ui_panel(is_depth_tab):
- inp = GradioComponentBundle()
- # TODO: Greater visual separation
- with gr.Blocks():
- with gr.Row():
- inp += 'compute_device', gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU')
- # TODO: Should return value instead of index. Maybe Enum should be used?
- inp += 'model_type', gr.Dropdown(label="Model",
- choices=['res101', 'dpt_beit_large_512 (midas 3.1)',
- 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)',
- 'dpt_hybrid_384 (midas 3.0)',
- 'midas_v21', 'midas_v21_small',
- 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'],
- value='res101',
- type="index")
- with gr.Group():
- with gr.Row():
- inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True)
- with gr.Group(visible=False) as options_depend_on_boost:
- inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False)
- with gr.Row(visible=False) as options_depend_on_match_size:
- inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448)
- inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448)
-
- with gr.Group():
- with gr.Row():
- inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) # 50% of width
- with gr.Group(): # 50% of width
- inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True)
- inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False)
- with gr.Row() as options_depend_on_output_depth_1:
- inp += "combine_output", gr.Checkbox(
- label="Combine input and depthmap into one image", value=False)
- inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'],
- value='Horizontal', type="index", visible=False)
- with gr.Group():
- with gr.Row():
- inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False)
- with gr.Row(visible=False) as clip_options_row_1:
- inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0)
- inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1)
-
- with gr.Group():
- with gr.Row():
- inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False)
- # gr.Checkbox(label="Generate NormalMap", value=False) # TODO: this is a fake door
-
- with gr.Group():
- with gr.Row():
- inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False)
- with gr.Group(visible=False) as stereo_options:
- with gr.Row():
- with gr.Row():
- inp += "stereo_modes", gr.CheckboxGroup(
- ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"],
- label="Output", value=["left-right", "red-cyan-anaglyph"])
- with gr.Row():
- inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01,
- label='Divergence (3D effect)',
- value=2.5)
- inp += "stereo_separation", gr.Slider(minimum=-5.0, maximum=5.0, step=0.01,
- label='Separation (moves images apart)',
- value=0.0)
- with gr.Row():
- inp += "stereo_fill", gr.Dropdown(label="Gap fill technique",
- choices=['none', 'naive', 'naive_interpolating', 'polylines_soft',
- 'polylines_sharp'], value='polylines_sharp',
- type="value")
- inp += "stereo_balance", gr.Slider(minimum=-1.0, maximum=1.0, step=0.05,
- label='Balance between eyes',
- value=0.0)
-
- with gr.Group():
- with gr.Row():
- inp += "gen_mesh", gr.Checkbox(
- label="Generate simple 3D mesh", value=False, visible=True)
- with gr.Group(visible=False) as mesh_options:
- with gr.Row():
- gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps")
- with gr.Row():
- inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True)
- inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True)
-
- if is_depth_tab:
- with gr.Group():
- with gr.Row():
- inp += "inpaint", gr.Checkbox(
- label="Generate 3D inpainted mesh", value=False)
- with gr.Group(visible=False) as inpaint_options_row_0:
- gr.HTML("Generation is sloooow, required for generating videos")
- inp += "inpaint_vids", gr.Checkbox(
- label="Generate 4 demo videos with 3D inpainted mesh.", value=False)
- gr.HTML("More options for generating video can be found in the Generate video tab")
-
- with gr.Group():
- # TODO: it should be clear from the UI that the background removal does not use the model selected above
- with gr.Row():
- inp += "background_removal", gr.Checkbox(label="Remove background", value=False)
- with gr.Row(visible=False) as bgrem_options_row_1:
- inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False)
- inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False)
- with gr.Row(visible=False) as bgrem_options_row_2:
- inp += "background_removal_model", gr.Dropdown(label="Rembg Model",
- choices=['u2net', 'u2netp', 'u2net_human_seg',
- 'silueta'],
- value='u2net', type="value")
-
- with gr.Box():
- gr.HTML(f"{SCRIPT_FULL_NAME}
")
- gr.HTML("Information, comment and share @ "
- "https://github.com/thygate/stable-diffusion-webui-depthmap-script")
-
- inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False)
-
- def update_delault_net_size(model_type):
- w, h = ModelHolder.get_default_net_size(model_type)
- return inp['net_width'].update(value=w), inp['net_height'].update(value=h)
- inp['model_type'].change(
- fn=update_delault_net_size,
- inputs=inp['model_type'],
- outputs=[inp['net_width'], inp['net_height']]
- )
-
- inp['boost'].change(
- fn=lambda a, b: (options_depend_on_boost.update(visible=not a),
- options_depend_on_match_size.update(visible=not a and not b)),
- inputs=[inp['boost'], inp['match_size']],
- outputs=[options_depend_on_boost, options_depend_on_match_size]
- )
- inp['match_size'].change(
- fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b),
- inputs=[inp['boost'], inp['match_size']],
- outputs=[options_depend_on_match_size]
- )
-
- inp['output_depth'].change(
- fn=lambda a: (inp['invert_depth'].update(visible=a), options_depend_on_output_depth_1.update(visible=a)),
- inputs=[inp['output_depth']],
- outputs=[inp['invert_depth'], options_depend_on_output_depth_1]
- )
-
- inp['combine_output'].change(
- fn=lambda v: inp['combine_output_axis'].update(visible=v),
- inputs=[inp['combine_output']],
- outputs=[inp['combine_output_axis']]
- )
-
- inp['clipdepth'].change(
- fn=lambda v: clip_options_row_1.update(visible=v),
- inputs=[inp['clipdepth']],
- outputs=[clip_options_row_1]
- )
- inp['clipthreshold_far'].change(
- fn=lambda a, b: a if b < a else b,
- inputs=[inp['clipthreshold_far'], inp['clipthreshold_near']],
- outputs=[inp['clipthreshold_near']]
- )
- inp['clipthreshold_near'].change(
- fn=lambda a, b: a if b > a else b,
- inputs=[inp['clipthreshold_near'], inp['clipthreshold_far']],
- outputs=[inp['clipthreshold_far']]
- )
-
- def stereo_options_visibility(v):
- return stereo_options.update(visible=v)
-
- inp['gen_stereo'].change(
- fn=stereo_options_visibility,
- inputs=[inp['gen_stereo']],
- outputs=[stereo_options]
- )
-
- inp['gen_mesh'].change(
- fn=lambda v: mesh_options.update(visible=v),
- inputs=[inp['gen_mesh']],
- outputs=[mesh_options]
- )
-
- def inpaint_options_visibility(v):
- return inpaint_options_row_0.update(visible=v)
-
- if is_depth_tab:
- inp['inpaint'].change(
- fn=inpaint_options_visibility,
- inputs=[inp['inpaint']],
- outputs=[inpaint_options_row_0]
- )
-
- def background_removal_options_visibility(v):
- return bgrem_options_row_1.update(visible=v), \
- bgrem_options_row_2.update(visible=v)
-
- inp['background_removal'].change(
- fn=background_removal_options_visibility,
- inputs=[inp['background_removal']],
- outputs=[bgrem_options_row_1, bgrem_options_row_2]
- )
-
- return inp
+from src.misc import *
class Script(scripts.Script):
@@ -241,12 +21,15 @@ def ui(self, is_img2img):
gr.HTML() # Work around a Gradio bug
with gr.Column(variant='panel'):
gr.HTML() # Work around a Gradio bug
- ret = main_ui_panel(False)
+ ret = common_ui.main_ui_panel(False)
ret += ret.enkey_tail()
return ret.enkey_body()
# run from script in txt2img or img2img
def run(self, p, *inputs):
+ from modules import processing
+ from modules.processing import create_infotext
+
inputs = GradioComponentBundle.enkey_to_dict(inputs)
# sd process
@@ -256,15 +39,15 @@ def run(self, p, *inputs):
inputimages = []
for count in range(0, len(processed.images)):
# skip first grid image
- if count == 0 and len(processed.images) > 1 and opts.return_grid:
+ if count == 0 and len(processed.images) > 1 and shared.opts.return_grid:
continue
inputimages.append(processed.images[count])
- outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs)
+ outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops())
for input_i, imgs in enumerate(outputs):
# get generation parameters
- if hasattr(processed, 'all_prompts') and opts.enable_pnginfo:
+ if hasattr(processed, 'all_prompts') and shared.opts.enable_pnginfo:
info = create_infotext(processed, processed.all_prompts, processed.all_seeds, processed.all_subseeds,
"", 0, input_i)
else:
@@ -273,12 +56,12 @@ def run(self, p, *inputs):
processed.images.append(image)
if inputs["save_outputs"]:
try:
- suffix = "" if image_type == "depth" else f"_{image_type}"
- images.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i],
- prompt=processed.all_prompts[input_i], extension=opts.samples_format,
- info=info,
- p=processed,
- suffix=suffix)
+ suffix = "" if image_type == "depth" else f"{image_type}"
+ backbone.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i],
+ prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format,
+ info=info,
+ p=processed,
+ suffix=suffix)
except Exception as e:
if not ('image has wrong mode' in str(e) or 'I;16' in str(e)):
raise e
@@ -310,261 +93,6 @@ def on_ui_settings():
section=section))
-def on_ui_tabs():
- inp = GradioComponentBundle()
- with gr.Blocks(analytics_enabled=False) as depthmap_interface:
- with gr.Row().style(equal_height=False):
- with gr.Column(variant='panel'):
- inp += 'depthmap_mode', gr.HTML(visible=False, value='0')
- with gr.Tabs():
- with gr.TabItem('Single Image') as depthmap_mode_0:
- with gr.Row():
- inp += gr.Image(label="Source", source="upload", interactive=True, type="pil",
- elem_id="depthmap_input_image")
- with gr.Group(visible=False) as custom_depthmap_row_0:
- # TODO: depthmap generation settings should disappear when using this
- inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True,
- type="file", elem_id='custom_depthmap_img')
- inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False)
- with gr.TabItem('Batch Process') as depthmap_mode_1:
- inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple",
- interactive=True, type="file")
- with gr.TabItem('Batch from Directory') as depthmap_mode_2:
- inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory",
- **shared.hide_dirs,
- placeholder="A directory on the same machine where the server is running.")
- inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory",
- **shared.hide_dirs,
- placeholder="Leave blank to save images to the default path.")
- gr.HTML("Files in the output directory may be overwritten.")
- inp += gr.Checkbox(elem_id="depthmap_batch_reuse",
- label="Skip generation and use (edited/custom) depthmaps "
- "in output directory when a file already exists.",
- value=True)
- submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary')
- inp += main_ui_panel(True) # Main panel is inserted here
- unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels")
-
- with gr.Column(variant='panel'):
- with gr.Tabs(elem_id="mode_depthmap_output"):
- with gr.TabItem('Depth Output'):
- with gr.Group():
- result_images = gr.Gallery(label='Output', show_label=False,
- elem_id=f"depthmap_gallery").style(grid=4)
- with gr.Column():
- html_info_x = gr.HTML()
- html_info = gr.HTML()
-
- with gr.TabItem('3D Mesh'):
- with gr.Group():
- result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0])
- with gr.Row():
- # loadmesh = gr.Button('Load')
- clearmesh = gr.Button('Clear')
-
- with gr.TabItem('Generate video'):
- # generate video
- with gr.Group():
- with gr.Row():
- gr.Markdown("Generate video from inpainted(!) mesh.")
- with gr.Row():
- depth_vid = gr.Video(interactive=False)
- with gr.Column():
- vid_html_info_x = gr.HTML()
- vid_html_info = gr.HTML()
- fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **shared.hide_dirs,
- placeholder="A file on the same machine where "
- "the server is running.")
- with gr.Row():
- vid_numframes = gr.Textbox(label="Number of frames", value="300")
- vid_fps = gr.Textbox(label="Framerate", value="40")
- vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4',
- type="value", elem_id="video_format")
- vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3',
- type="value", elem_id="video_ssaa")
- with gr.Row():
- vid_traj = gr.Dropdown(label="Trajectory",
- choices=['straight-line', 'double-straight-line', 'circle'],
- value='double-straight-line', type="index",
- elem_id="video_trajectory")
- vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05")
- vid_border = gr.Textbox(label="Crop: top, left, bottom, right",
- value="0.03, 0.03, 0.05, 0.03")
- vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt")
- with gr.Row():
- submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo",
- variant='primary')
-
- inp += inp.enkey_tail()
-
- depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode'])
- depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode'])
- depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode'])
-
- def custom_depthmap_visibility(v):
- return custom_depthmap_row_0.update(visible=v)
-
- inp['custom_depthmap'].change(
- fn=custom_depthmap_visibility,
- inputs=[inp['custom_depthmap']],
- outputs=[custom_depthmap_row_0]
- )
-
- unloadmodels.click(
- fn=unload_models,
- inputs=[],
- outputs=[]
- )
-
- clearmesh.click(
- fn=lambda: None,
- inputs=[],
- outputs=[result_depthmesh]
- )
-
- submit.click(
- fn=wrap_gradio_gpu_call(run_generate),
- inputs=inp.enkey_body(),
- outputs=[
- result_images,
- fn_mesh,
- result_depthmesh,
- html_info_x,
- html_info
- ]
- )
-
- submit_vid.click(
- fn=wrap_gradio_gpu_call(run_makevideo),
- inputs=[
- fn_mesh,
- vid_numframes,
- vid_fps,
- vid_traj,
- vid_shift,
- vid_border,
- vid_dolly,
- vid_format,
- vid_ssaa
- ],
- outputs=[
- depth_vid,
- vid_html_info_x,
- vid_html_info
- ]
- )
-
- return (depthmap_interface, "Depth", "depthmap_interface"),
-
-
-# called from depth tab
-def run_generate(*inputs):
- inputs = GradioComponentBundle.enkey_to_dict(inputs)
- depthmap_mode = inputs['depthmap_mode']
- depthmap_batch_input_dir = inputs['depthmap_batch_input_dir']
- image_batch = inputs['image_batch']
- depthmap_input_image = inputs['depthmap_input_image']
- depthmap_batch_output_dir = inputs['depthmap_batch_output_dir']
- depthmap_batch_reuse = inputs['depthmap_batch_reuse']
- custom_depthmap = inputs['custom_depthmap']
- custom_depthmap_img = inputs['custom_depthmap_img']
-
- inputimages = []
- # Allow supplying custom depthmaps
- inputdepthmaps = []
- # Also keep track of original file names
- inputnames = []
-
- if depthmap_mode == '2' and depthmap_batch_output_dir != '':
- outpath = depthmap_batch_output_dir
- else:
- outpath = opts.outdir_samples or opts.outdir_extras_samples
-
- if depthmap_mode == '0': # Single image
- if depthmap_input_image is None:
- return [], None, None, "Please select an input image!", ""
- inputimages.append(depthmap_input_image)
- inputnames.append(None)
- if custom_depthmap:
- if custom_depthmap_img is None:
- return [], None, None,\
- "Custom depthmap is not specified. Please either supply it or disable this option.", ""
- inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name)))
- else:
- inputdepthmaps.append(None)
- if depthmap_mode == '1': # Batch Process
- if image_batch is None:
- return [], None, None, "Please select input images!", ""
- for img in image_batch:
- image = Image.open(os.path.abspath(img.name))
- inputimages.append(image)
- inputnames.append(os.path.splitext(img.orig_name)[0])
- elif depthmap_mode == '2': # Batch from Directory
- assert not shared.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled'
- if depthmap_batch_input_dir == '':
- return [], None, None, "Please select an input directory.", ""
- if depthmap_batch_input_dir == depthmap_batch_output_dir:
- return [], None, None, "Please pick different directories for batch processing.", ""
- image_list = shared.listfiles(depthmap_batch_input_dir)
- for path in image_list:
- try:
- inputimages.append(Image.open(path))
- inputnames.append(path)
-
- custom_depthmap = None
- if depthmap_batch_reuse:
- basename = Path(path).stem
- # Custom names are not used in samples directory
- if outpath != opts.outdir_extras_samples:
- # Possible filenames that the custom depthmaps may have
- name_candidates = [f'{basename}-0000.{opts.samples_format}', # current format
- f'{basename}.png', # human-intuitive format
- f'{Path(path).name}'] # human-intuitive format (worse)
- for fn_cand in name_candidates:
- path_cand = os.path.join(outpath, fn_cand)
- if os.path.isfile(path_cand):
- custom_depthmap = Image.open(os.path.abspath(path_cand))
- break
- inputdepthmaps.append(custom_depthmap)
- except Exception as e:
- print(f'Failed to load {path}, ignoring. Exception: {str(e)}')
- inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None])
- print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused')
-
- outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs)
- show_images = []
-
- # Saving images
- for input_i, imgs in enumerate(outputs):
- basename = 'depthmap'
- if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != opts.outdir_extras_samples:
- basename = Path(inputnames[input_i]).stem
-
- for image_type, image in list(imgs.items()):
- show_images += [image]
- if inputs["save_outputs"]:
- try:
- suffix = "" if image_type == "depth" else f"_{image_type}"
- images.save_image(image, path=outpath, basename=basename, seed=None,
- prompt=None, extension=opts.samples_format, short_filename=True,
- no_prompt=True, grid=False, pnginfo_section_name="extras",
- suffix=suffix)
- except Exception as e:
- if not ('image has wrong mode' in str(e) or 'I;16' in str(e)):
- raise e
- print('Catched exception: image has wrong mode!')
- traceback.print_exc()
-
- # use inpainted 3d mesh to show in 3d model output when enabled in settings
- if hasattr(opts, 'depthmap_script_show_3d_inpaint') and opts.depthmap_script_show_3d_inpaint \
- and mesh_fi is not None and len(mesh_fi) > 0:
- meshsimple_fi = mesh_fi
- # however, don't show 3dmodel when disabled in settings
- if hasattr(opts, 'depthmap_script_show_3d') and not opts.depthmap_script_show_3d:
- meshsimple_fi = None
- # TODO: return more info
- return show_images, mesh_fi, meshsimple_fi, plaintext_to_html('info'), ''
-
-
+from modules import script_callbacks
script_callbacks.on_ui_settings(on_ui_settings)
-script_callbacks.on_ui_tabs(on_ui_tabs)
+script_callbacks.on_ui_tabs(lambda: [(common_ui.on_ui_tabs(), "Depth", "depthmap_interface")])
diff --git a/src/backbone.py b/src/backbone.py
new file mode 100644
index 0000000..0829ce0
--- /dev/null
+++ b/src/backbone.py
@@ -0,0 +1,118 @@
+# DepthMap can be run inside stable-duiffusion-webui, but also separately.
+# All the stable-duiffusion-webui stuff that the DepthMap relies on
+# must be resided in this file (or in the scripts folder).
+import pathlib
+from datetime import datetime
+
+try:
+ # stable-duiffusion-webui backbone
+ from modules.images import save_image # Should fail if not on stable-duiffusion-webui
+ from modules.devices import torch_gc # TODO: is this really sufficient?
+ from modules.images import get_next_sequence_number
+ from modules.call_queue import wrap_gradio_gpu_call
+ from modules.shared import listfiles
+
+ def get_opt(name, default):
+ from modules.shared import opts
+ if hasattr(opts, name):
+ return opts.__getattr__(name)
+ return default
+
+ def get_cmd_opt(name, default):
+ """Get command line argument"""
+ from modules.shared import cmd_opts
+ if hasattr(cmd_opts, name):
+ return cmd_opts.__getattribute__(name)
+ return default
+
+ def gather_ops():
+ """Parameters for depthmap generation"""
+ from modules.shared import cmd_opts
+ ops = {}
+ if get_opt('depthmap_script_boost_rmax', None) is not None:
+ ops['boost_whole_size_threshold'] = get_opt('depthmap_script_boost_rmax', None)
+ ops['precision'] = cmd_opts.precision
+ ops['no_half'] = cmd_opts.no_half
+ return ops
+
+
+ def get_outpath():
+ """Get path where results are saved by default"""
+ path = get_opt('outdir_samples', None)
+ if path is None or len(path) == 0:
+ path = get_opt('outdir_extras_samples', None)
+ assert path is not None and len(path) > 0
+ return path
+
+
+ def unload_sd_model():
+ from modules import shared, devices
+ if shared.sd_model is not None:
+ shared.sd_model.cond_stage_model.to(devices.cpu)
+ shared.sd_model.first_stage_model.to(devices.cpu)
+
+
+ def reload_sd_model():
+ from modules import shared, devices
+ if shared.sd_model is not None:
+ shared.sd_model.cond_stage_model.to(devices.device)
+ shared.sd_model.first_stage_model.to(devices.device)
+
+ def get_hide_dirs():
+ import modules.shared
+ return modules.shared.hide_dirs
+except:
+ # Standalone backbone
+ print( # " DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n"
+ " The standalone mode is not on par with the stable-duiffusion-webui mode.\n"
+ " Some features may be missing or work differently. Please report bugs.\n")
+
+ def save_image(image, path, basename, **kwargs):
+ import os
+ os.makedirs(path, exist_ok=True)
+ if 'suffix' not in kwargs or len(kwargs['suffix']) == 0:
+ kwargs['suffix'] = ''
+ else:
+ kwargs['suffix'] = f"-{kwargs['suffix']}"
+ format = get_opt('samples_format', kwargs['extension'])
+ fullfn = os.path.join(
+ path, f"{basename}-{get_next_sequence_number(path, basename)}{kwargs['suffix']}.{format}")
+ image.save(fullfn, format=format)
+
+ def torch_gc():
+ # TODO: is this really sufficient?
+ import torch
+ if torch.cuda.is_available():
+ with torch.cuda.device('cuda'):
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
+
+ launched_at = int(datetime.now().timestamp())
+ backbone_current_seq_number = 0
+
+ def get_next_sequence_number(outpath=None, basename=None):
+ global backbone_current_seq_number
+ backbone_current_seq_number += 1
+ return int(f"{launched_at}{backbone_current_seq_number:04}")
+
+ def wrap_gradio_gpu_call(f): return f # Displaying various stats is not supported
+
+ def listfiles(dirname):
+ import os
+ filenames = [os.path.join(dirname, x) for x in sorted(os.listdir(dirname)) if not x.startswith(".")]
+ return [file for file in filenames if os.path.isfile(file)]
+
+ def get_opt(name, default): return default # Configuring is not supported
+
+
+ def get_cmd_opt(name, default): return default # Configuring is not supported
+
+ def gather_ops(): return {} # Configuring is not supported
+
+ def get_outpath(): return str(pathlib.Path('.', 'outputs'))
+
+ def unload_sd_model(): pass # Not needed
+
+ def reload_sd_model(): pass # Not needed
+
+ def get_hide_dirs(): return {} # Directories will not be hidden from traversal (except when starts with the dot)
diff --git a/src/common_ui.py b/src/common_ui.py
new file mode 100644
index 0000000..68583d3
--- /dev/null
+++ b/src/common_ui.py
@@ -0,0 +1,491 @@
+import traceback
+from pathlib import Path
+import gradio as gr
+from PIL import Image
+
+from src import backbone
+from src.core import core_generation_funnel, unload_models, run_makevideo
+from src.depthmap_generation import ModelHolder
+from src.gradio_args_transport import GradioComponentBundle
+from src.misc import *
+
+
+# Ugly workaround to fix gradio tempfile issue
+def ensure_gradio_temp_directory():
+ try:
+ import tempfile
+ path = os.path.join(tempfile.gettempdir(), 'gradio')
+ if not (os.path.exists(path)):
+ os.mkdir(path)
+ except Exception as e:
+ traceback.print_exc()
+
+
+ensure_gradio_temp_directory()
+
+
+def main_ui_panel(is_depth_tab):
+ inp = GradioComponentBundle()
+ # TODO: Greater visual separation
+ with gr.Blocks():
+ with gr.Row():
+ inp += 'compute_device', gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU')
+ # TODO: Should return value instead of index. Maybe Enum should be used?
+ inp += 'model_type', gr.Dropdown(label="Model",
+ choices=['res101', 'dpt_beit_large_512 (midas 3.1)',
+ 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)',
+ 'dpt_hybrid_384 (midas 3.0)',
+ 'midas_v21', 'midas_v21_small',
+ 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'],
+ value='res101',
+ type="index")
+ with gr.Box():
+ with gr.Row():
+ inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True)
+ inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False)
+ with gr.Row(visible=False) as options_depend_on_match_size:
+ inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448)
+ inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448)
+
+ with gr.Box():
+ with gr.Row():
+ with gr.Group():
+ inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) # 50% of width
+ with gr.Group(): # 50% of width
+ inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True)
+ inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False)
+ with gr.Row() as options_depend_on_output_depth_1:
+ inp += "combine_output", gr.Checkbox(
+ label="Combine input and depthmap into one image", value=False)
+ inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'],
+ value='Horizontal', type="index", visible=False)
+ with gr.Box():
+ with gr.Row():
+ inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False)
+ with gr.Row(visible=False) as clip_options_row_1:
+ inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0)
+ inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1)
+
+ with gr.Box():
+ with gr.Row():
+ inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False)
+ # gr.Checkbox(label="Generate NormalMap", value=False) # TODO: this is a fake door
+
+ with gr.Box():
+ with gr.Row():
+ inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False)
+ with gr.Column(visible=False) as stereo_options:
+ with gr.Row():
+ inp += "stereo_modes", gr.CheckboxGroup(
+ ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"],
+ label="Output", value=["left-right", "red-cyan-anaglyph"])
+ with gr.Row():
+ inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01,
+ label='Divergence (3D effect)',
+ value=2.5)
+ inp += "stereo_separation", gr.Slider(minimum=-5.0, maximum=5.0, step=0.01,
+ label='Separation (moves images apart)',
+ value=0.0)
+ with gr.Row():
+ inp += "stereo_fill", gr.Dropdown(label="Gap fill technique",
+ choices=['none', 'naive', 'naive_interpolating', 'polylines_soft',
+ 'polylines_sharp'], value='polylines_sharp',
+ type="value")
+ inp += "stereo_balance", gr.Slider(minimum=-1.0, maximum=1.0, step=0.05,
+ label='Balance between eyes',
+ value=0.0)
+
+ with gr.Box():
+ with gr.Column():
+ inp += "gen_mesh", gr.Checkbox(
+ label="Generate simple 3D mesh", value=False, visible=True)
+ with gr.Column(visible=False) as mesh_options:
+ with gr.Row():
+ gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps")
+ with gr.Row():
+ inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True)
+ inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True)
+
+ if is_depth_tab:
+ with gr.Box():
+ with gr.Column():
+ inp += "inpaint", gr.Checkbox(
+ label="Generate 3D inpainted mesh", value=False)
+ with gr.Column(visible=False) as inpaint_options_row_0:
+ gr.HTML("Generation is sloooow, required for generating videos")
+ inp += "inpaint_vids", gr.Checkbox(
+ label="Generate 4 demo videos with 3D inpainted mesh.", value=False)
+ gr.HTML("More options for generating video can be found in the Generate video tab")
+
+ with gr.Box():
+ # TODO: it should be clear from the UI that there is an option of the background removal
+ # that does not use the model selected above
+ with gr.Row():
+ inp += "background_removal", gr.Checkbox(label="Remove background", value=False)
+ with gr.Column(visible=False) as bgrem_options:
+ with gr.Row():
+ inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False)
+ inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False)
+ with gr.Row():
+ inp += "background_removal_model", gr.Dropdown(label="Rembg Model",
+ choices=['u2net', 'u2netp', 'u2net_human_seg',
+ 'silueta'],
+ value='u2net', type="value")
+
+ with gr.Box():
+ gr.HTML(f"{SCRIPT_FULL_NAME}
")
+ gr.HTML("Information, comment and share @ "
+ "https://github.com/thygate/stable-diffusion-webui-depthmap-script")
+
+ inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False)
+
+ def update_delault_net_size(model_type):
+ w, h = ModelHolder.get_default_net_size(model_type)
+ return inp['net_width'].update(value=w), inp['net_height'].update(value=h)
+
+ inp['model_type'].change(
+ fn=update_delault_net_size,
+ inputs=inp['model_type'],
+ outputs=[inp['net_width'], inp['net_height']]
+ )
+
+ inp['boost'].change(
+ fn=lambda a, b: (inp['match_size'].update(visible=not a),
+ options_depend_on_match_size.update(visible=not a and not b)),
+ inputs=[inp['boost'], inp['match_size']],
+ outputs=[inp['match_size'], options_depend_on_match_size]
+ )
+ inp['match_size'].change(
+ fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b),
+ inputs=[inp['boost'], inp['match_size']],
+ outputs=[options_depend_on_match_size]
+ )
+
+ inp['output_depth'].change(
+ fn=lambda a: (inp['invert_depth'].update(visible=a), options_depend_on_output_depth_1.update(visible=a)),
+ inputs=[inp['output_depth']],
+ outputs=[inp['invert_depth'], options_depend_on_output_depth_1]
+ )
+
+ inp['combine_output'].change(
+ fn=lambda v: inp['combine_output_axis'].update(visible=v),
+ inputs=[inp['combine_output']],
+ outputs=[inp['combine_output_axis']]
+ )
+
+ inp['clipdepth'].change(
+ fn=lambda v: clip_options_row_1.update(visible=v),
+ inputs=[inp['clipdepth']],
+ outputs=[clip_options_row_1]
+ )
+ inp['clipthreshold_far'].change(
+ fn=lambda a, b: a if b < a else b,
+ inputs=[inp['clipthreshold_far'], inp['clipthreshold_near']],
+ outputs=[inp['clipthreshold_near']]
+ )
+ inp['clipthreshold_near'].change(
+ fn=lambda a, b: a if b > a else b,
+ inputs=[inp['clipthreshold_near'], inp['clipthreshold_far']],
+ outputs=[inp['clipthreshold_far']]
+ )
+
+ inp['gen_stereo'].change(
+ fn=lambda v: stereo_options.update(visible=v),
+ inputs=[inp['gen_stereo']],
+ outputs=[stereo_options]
+ )
+
+ inp['gen_mesh'].change(
+ fn=lambda v: mesh_options.update(visible=v),
+ inputs=[inp['gen_mesh']],
+ outputs=[mesh_options]
+ )
+
+ if is_depth_tab:
+ inp['inpaint'].change(
+ fn=lambda v: inpaint_options_row_0.update(visible=v),
+ inputs=[inp['inpaint']],
+ outputs=[inpaint_options_row_0]
+ )
+
+ inp['background_removal'].change(
+ fn=lambda v: bgrem_options.update(visible=v),
+ inputs=[inp['background_removal']],
+ outputs=[bgrem_options]
+ )
+
+ return inp
+
+def open_folder_action():
+ # Adapted from stable-diffusion-webui
+ f = backbone.get_outpath()
+ if backbone.get_cmd_opt('hide_ui_dir_config', False):
+ return
+ if not os.path.exists(f) or not os.path.isdir(f):
+ raise "Couldn't open output folder" # .isdir is security-related, do not remove!
+ import platform
+ import subprocess as sp
+ path = os.path.normpath(f)
+ if platform.system() == "Windows":
+ os.startfile(path)
+ elif platform.system() == "Darwin":
+ sp.Popen(["open", path])
+ elif "microsoft-standard-WSL2" in platform.uname().release:
+ sp.Popen(["wsl-open", path])
+ else:
+ sp.Popen(["xdg-open", path])
+
+def on_ui_tabs():
+ inp = GradioComponentBundle()
+ with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface:
+ with gr.Row().style(equal_height=False):
+ with gr.Column(variant='panel'):
+ inp += 'depthmap_mode', gr.HTML(visible=False, value='0')
+ with gr.Tabs():
+ with gr.TabItem('Single Image') as depthmap_mode_0:
+ with gr.Group():
+ with gr.Row():
+ inp += gr.Image(label="Source", source="upload", interactive=True, type="pil",
+ elem_id="depthmap_input_image")
+ # TODO: depthmap generation settings should disappear when using this
+ inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True,
+ type="file", elem_id='custom_depthmap_img', visible=False)
+ inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False)
+ with gr.TabItem('Batch Process') as depthmap_mode_1:
+ inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple",
+ interactive=True, type="file")
+ with gr.TabItem('Batch from Directory') as depthmap_mode_2:
+ inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory",
+ **backbone.get_hide_dirs(),
+ placeholder="A directory on the same machine where the server is running.")
+ inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory",
+ **backbone.get_hide_dirs(),
+ placeholder="Leave blank to save images to the default path.")
+ gr.HTML("Files in the output directory may be overwritten.")
+ inp += gr.Checkbox(elem_id="depthmap_batch_reuse",
+ label="Skip generation and use (edited/custom) depthmaps "
+ "in output directory when a file already exists.",
+ value=True)
+ submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary')
+ inp += main_ui_panel(True) # Main panel is inserted here
+ unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels")
+
+ with gr.Column(variant='panel'):
+ with gr.Tabs(elem_id="mode_depthmap_output"):
+ with gr.TabItem('Depth Output'):
+ with gr.Group():
+ result_images = gr.Gallery(label='Output', show_label=False,
+ elem_id=f"depthmap_gallery").style(grid=4)
+ with gr.Column():
+ html_info = gr.HTML()
+ folder_symbol = '\U0001f4c2' # 📂
+ gr.Button(folder_symbol, visible=not backbone.get_cmd_opt('hide_ui_dir_config', False)).click(
+ fn=lambda: open_folder_action(), inputs=[], outputs=[],
+ )
+
+ with gr.TabItem('3D Mesh'):
+ with gr.Group():
+ result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0])
+ with gr.Row():
+ # loadmesh = gr.Button('Load')
+ clearmesh = gr.Button('Clear')
+
+ with gr.TabItem('Generate video'):
+ # generate video
+ with gr.Group():
+ with gr.Row():
+ gr.Markdown("Generate video from inpainted(!) mesh.")
+ with gr.Row():
+ depth_vid = gr.Video(interactive=False)
+ with gr.Column():
+ vid_html_info_x = gr.HTML()
+ vid_html_info = gr.HTML()
+ fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **backbone.get_hide_dirs(),
+ placeholder="A file on the same machine where "
+ "the server is running.")
+ with gr.Row():
+ vid_numframes = gr.Textbox(label="Number of frames", value="300")
+ vid_fps = gr.Textbox(label="Framerate", value="40")
+ vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4',
+ type="value", elem_id="video_format")
+ vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3',
+ type="value", elem_id="video_ssaa")
+ with gr.Row():
+ vid_traj = gr.Dropdown(label="Trajectory",
+ choices=['straight-line', 'double-straight-line', 'circle'],
+ value='double-straight-line', type="index",
+ elem_id="video_trajectory")
+ vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05")
+ vid_border = gr.Textbox(label="Crop: top, left, bottom, right",
+ value="0.03, 0.03, 0.05, 0.03")
+ vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt")
+ with gr.Row():
+ submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo",
+ variant='primary')
+
+
+ inp += inp.enkey_tail()
+
+ depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode'])
+ depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode'])
+ depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode'])
+
+ inp['custom_depthmap'].change(
+ fn=lambda v: inp['custom_depthmap_img'].update(visible=v),
+ inputs=[inp['custom_depthmap']],
+ outputs=[inp['custom_depthmap_img']]
+ )
+
+ unloadmodels.click(
+ fn=unload_models,
+ inputs=[],
+ outputs=[]
+ )
+
+ clearmesh.click(
+ fn=lambda: None,
+ inputs=[],
+ outputs=[result_depthmesh]
+ )
+
+ submit.click(
+ fn=backbone.wrap_gradio_gpu_call(run_generate),
+ inputs=inp.enkey_body(),
+ outputs=[
+ result_images,
+ fn_mesh,
+ result_depthmesh,
+ html_info
+ ]
+ )
+
+ submit_vid.click(
+ fn=backbone.wrap_gradio_gpu_call(run_makevideo),
+ inputs=[
+ fn_mesh,
+ vid_numframes,
+ vid_fps,
+ vid_traj,
+ vid_shift,
+ vid_border,
+ vid_dolly,
+ vid_format,
+ vid_ssaa
+ ],
+ outputs=[
+ depth_vid,
+ vid_html_info_x,
+ vid_html_info
+ ]
+ )
+
+ return depthmap_interface
+
+
+def run_generate(*inputs):
+ inputs = GradioComponentBundle.enkey_to_dict(inputs)
+ depthmap_mode = inputs['depthmap_mode']
+ depthmap_batch_input_dir = inputs['depthmap_batch_input_dir']
+ image_batch = inputs['image_batch']
+ depthmap_input_image = inputs['depthmap_input_image']
+ depthmap_batch_output_dir = inputs['depthmap_batch_output_dir']
+ depthmap_batch_reuse = inputs['depthmap_batch_reuse']
+ custom_depthmap = inputs['custom_depthmap']
+ custom_depthmap_img = inputs['custom_depthmap_img']
+
+ inputimages = []
+ # Allow supplying custom depthmaps
+ inputdepthmaps = []
+ # Also keep track of original file names
+ inputnames = []
+
+ if depthmap_mode == '2' and depthmap_batch_output_dir != '':
+ outpath = depthmap_batch_output_dir
+ else:
+ outpath = backbone.get_outpath()
+
+ if depthmap_mode == '0': # Single image
+ if depthmap_input_image is None:
+ return [], None, None, "Please select an input image"
+ inputimages.append(depthmap_input_image)
+ inputnames.append(None)
+ if custom_depthmap:
+ if custom_depthmap_img is None:
+ return [], None, None, \
+ "Custom depthmap is not specified. Please either supply it or disable this option."
+ inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name)))
+ else:
+ inputdepthmaps.append(None)
+ if depthmap_mode == '1': # Batch Process
+ if image_batch is None:
+ return [], None, None, "Please select input images", ""
+ for img in image_batch:
+ image = Image.open(os.path.abspath(img.name))
+ inputimages.append(image)
+ inputnames.append(os.path.splitext(img.orig_name)[0])
+ elif depthmap_mode == '2': # Batch from Directory
+ assert not backbone.get_cmd_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled'
+ if depthmap_batch_input_dir == '':
+ return [], None, None, "Please select an input directory."
+ if depthmap_batch_input_dir == depthmap_batch_output_dir:
+ return [], None, None, "Please pick different directories for batch processing."
+ image_list = backbone.listfiles(depthmap_batch_input_dir)
+ for path in image_list:
+ try:
+ inputimages.append(Image.open(path))
+ inputnames.append(path)
+
+ custom_depthmap = None
+ if depthmap_batch_reuse:
+ basename = Path(path).stem
+ # Custom names are not used in samples directory
+ if outpath != backbone.get_opt('outdir_extras_samples', None):
+ # Possible filenames that the custom depthmaps may have
+ name_candidates = [f'{basename}-0000.{backbone.get_opt("samples_format", "png")}', # current format
+ f'{basename}.png', # human-intuitive format
+ f'{Path(path).name}'] # human-intuitive format (worse)
+ for fn_cand in name_candidates:
+ path_cand = os.path.join(outpath, fn_cand)
+ if os.path.isfile(path_cand):
+ custom_depthmap = Image.open(os.path.abspath(path_cand))
+ break
+ inputdepthmaps.append(custom_depthmap)
+ except Exception as e:
+ print(f'Failed to load {path}, ignoring. Exception: {str(e)}')
+ inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None])
+ print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused')
+
+ outputs, fn_mesh, display_mesh = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops())
+
+ # Saving images
+ show_images = []
+ for input_i, imgs in enumerate(outputs):
+ basename = 'depthmap'
+ if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.get_opt('outdir_extras_samples', None):
+ basename = Path(inputnames[input_i]).stem
+
+ for image_type, image in list(imgs.items()):
+ show_images += [image]
+ if inputs["save_outputs"]:
+ try:
+ suffix = "" if image_type == "depth" else f"{image_type}"
+ backbone.save_image(image, path=outpath, basename=basename, seed=None,
+ prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True,
+ no_prompt=True, grid=False, pnginfo_section_name="extras",
+ suffix=suffix)
+ except Exception as e:
+ if not ('image has wrong mode' in str(e) or 'I;16' in str(e)):
+ raise e
+ print('Catched exception: image has wrong mode!')
+ traceback.print_exc()
+
+ display_mesh = None
+ # use inpainted 3d mesh to show in 3d model output when enabled in settings
+ if backbone.get_opt('depthmap_script_show_3d_inpaint', True) and fn_mesh is not None and len(fn_mesh) > 0:
+ display_mesh = fn_mesh
+ # however, don't show 3dmodel when disabled in settings
+ if not backbone.get_opt('depthmap_script_show_3d', True):
+ display_mesh = None
+ # TODO: return more info
+ return show_images, fn_mesh, display_mesh, 'Generated!'
diff --git a/src/core.py b/src/core.py
index ccc3e6e..32a81b1 100644
--- a/src/core.py
+++ b/src/core.py
@@ -1,11 +1,6 @@
from pathlib import Path
-
from PIL import Image
-from modules import shared, devices
-from modules.images import get_next_sequence_number
-from modules.shared import opts, cmd_opts
-
try:
from tqdm import trange
except:
@@ -21,9 +16,10 @@
import traceback
# Our code
-from src.main import *
+from src.misc import *
from src.stereoimage_generation import create_stereoimages
from src.depthmap_generation import ModelHolder
+from src import backbone
# 3d-photo-inpainting imports
from inpaint.mesh import write_mesh, read_mesh, output_3d_photo
@@ -47,19 +43,7 @@ def convert_i16_to_rgb(image, like):
return output
-def unload_sd_model():
- if shared.sd_model is not None:
- shared.sd_model.cond_stage_model.to(devices.cpu)
- shared.sd_model.first_stage_model.to(devices.cpu)
-
-
-def reload_sd_model():
- if shared.sd_model is not None:
- shared.sd_model.cond_stage_model.to(devices.device)
- shared.sd_model.first_stage_model.to(devices.device)
-
-
-def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp):
+def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None):
if len(inputimages) == 0 or inputimages[0] is None:
return [], '', ''
if inputdepthmaps is None or len(inputdepthmaps) == 0:
@@ -97,10 +81,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
stereo_modes = inp["stereo_modes"]
stereo_separation = inp["stereo_separation"]
+ if ops is None:
+ ops = {}
+ model_holder.update_settings(**ops)
+
# TODO: ideally, run_depthmap should not save meshes - that makes the function not pure
print(SCRIPT_FULL_NAME)
- unload_sd_model()
+ backbone.unload_sd_model()
# TODO: this still should not be here
background_removed_images = []
@@ -306,7 +294,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
else:
raise e
finally:
- if hasattr(opts, 'depthmap_script_keepmodels') and opts.depthmap_script_keepmodels:
+ if backbone.get_opt('depthmap_script_keepmodels', True):
model_holder.offload() # Swap to CPU memory
else:
if 'model' in locals():
@@ -316,7 +304,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
model_holder.unload_models()
gc.collect()
- devices.torch_gc()
+ backbone.torch_gc()
# TODO: This should not be here
mesh_fi = None
@@ -326,14 +314,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
except Exception as e:
print(f'{str(e)}, some issue with generating inpainted mesh')
- reload_sd_model()
+ backbone.reload_sd_model()
print("All done.\n")
return generated_images, mesh_fi, meshsimple_fi
def get_uniquefn(outpath, basename, ext):
# Inefficient and may fail, maybe use unbounded binary search?
- basecount = get_next_sequence_number(outpath, basename)
+ basecount = backbone.get_next_sequence_number(outpath, basename)
if basecount > 0: basecount = basecount - 1
fullfn = None
for i in range(500):
@@ -401,10 +389,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v
config['repeat_inpaint_edge'] = True
config['ply_fmt'] = "bin"
- config['save_ply'] = False
- if hasattr(opts, 'depthmap_script_save_ply') and opts.depthmap_script_save_ply:
- config['save_ply'] = True
-
+ config['save_ply'] = backbone.get_opt('depthmap_script_save_ply', False)
config['save_obj'] = True
if device == torch.device("cpu"):
@@ -471,7 +456,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v
[-0.05, -0.05, -0.05, -0.05],
['dolly-zoom-in', 'zoom-in', 'circle', 'swing'], False, vid_format, vid_ssaa)
- devices.torch_gc()
+ backbone.torch_gc()
finally:
del rgb_model
@@ -480,7 +465,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v
depth_edge_model = None
del depth_feat_model
depth_feat_model = None
- devices.torch_gc()
+ backbone.torch_gc()
return mesh_fi
@@ -602,9 +587,9 @@ def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_bord
# output path and filename mess ..
basename = Path(fn_mesh).stem
- outpath = opts.outdir_samples or opts.outdir_extras_samples
+ outpath = backbone.get_outpath()
# unique filename
- basecount = get_next_sequence_number(outpath, basename)
+ basecount = backbone.get_next_sequence_number(outpath, basename)
if basecount > 0: basecount = basecount - 1
fullfn = None
for i in range(500):
@@ -697,9 +682,7 @@ def depth_edges_mask(depth):
def create_mesh(image, depth, keep_edges=False, spherical=False):
import trimesh
from dzoedepth.utils.geometry import depth_to_points, create_triangles
- maxsize = 1024
- if hasattr(opts, 'depthmap_script_mesh_maxsize'):
- maxsize = opts.depthmap_script_mesh_maxsize
+ maxsize = backbone.get_opt('depthmap_script_mesh_maxsize', 2048)
# limit the size of the input image
image.thumbnail((maxsize, maxsize))
diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py
index 6812d81..0ea4a37 100644
--- a/src/depthmap_generation.py
+++ b/src/depthmap_generation.py
@@ -1,42 +1,36 @@
+import gc
+import os.path
from operator import getitem
-from PIL import Image
-from torchvision.transforms import Compose, transforms
-
-# TODO: depthmap_generation should not depend on WebUI
-from modules import shared, devices
-from modules.shared import opts, cmd_opts
-
-import torch, gc
import cv2
-import os.path
import numpy as np
import skimage.measure
-
-# Our code
-from src.main import *
+from PIL import Image
+import torch
+from torchvision.transforms import Compose, transforms
# midas imports
from dmidas.dpt_depth import DPTDepthModel
from dmidas.midas_net import MidasNet
from dmidas.midas_net_custom import MidasNet_small
from dmidas.transforms import Resize, NormalizeImage, PrepareForNet
-
+# zoedepth
+from dzoedepth.models.builder import build_model
+from dzoedepth.utils.config import get_config
# AdelaiDepth/LeReS imports
from lib.multi_depth_model_woauxi import RelDepthModel
from lib.net_tools import strip_prefix_if_present
-
+from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel
# pix2pix/merge net imports
from pix2pix.options.test_options import TestOptions
-from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel
-# zoedepth
-from dzoedepth.models.builder import build_model
-from dzoedepth.utils.config import get_config
+# Our code
+from src.misc import *
+from src import backbone
-global device
+global depthmap_device
-class ModelHolder():
+class ModelHolder:
def __init__(self):
self.depth_model = None
self.pix2pix_model = None
@@ -48,6 +42,20 @@ def __init__(self):
self.resize_mode = None
self.normalization = None
+ # Settings (initialized to sensible values, should be updated)
+ self.boost_whole_size_threshold = 1600 # R_max from the paper by default
+ self.no_half = False
+ self.precision = "autocast"
+
+ def update_settings(self, boost_whole_size_threshold=None, no_half=None, precision=None):
+ if boost_whole_size_threshold is not None:
+ self.boost_whole_size_threshold = boost_whole_size_threshold
+ if no_half is not None:
+ self.no_half = no_half
+ if precision is not None:
+ self.precision = precision
+
+
def ensure_models(self, model_type, device: torch.device, boost: bool):
# TODO: could make it more granular
if model_type == -1 or model_type is None:
@@ -75,7 +83,6 @@ def load_models(self, model_type, device: torch.device, boost: bool):
resize_mode = "minimal"
normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
- # TODO: net_w, net_h
model = None
if model_type == 0: # "res101"
model_path = f"{model_dir}/res101.pth"
@@ -93,7 +100,7 @@ def load_models(self, model_type, device: torch.device, boost: bool):
model = RelDepthModel(backbone='resnext101')
model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), strict=True)
del checkpoint
- devices.torch_gc()
+ backbone.torch_gc()
if model_type == 1: # "dpt_beit_large_512" midas 3.1
model_path = f"{model_dir}/dpt_beit_large_512.pt"
@@ -190,8 +197,8 @@ def load_models(self, model_type, device: torch.device, boost: bool):
model.eval() # prepare for evaluation
# optimize
if device == torch.device("cuda") and model_type in [0, 1, 2, 3, 4, 5, 6]:
- model = model.to(memory_format=torch.channels_last)
- if not cmd_opts.no_half and model_type != 0 and not boost: # TODO: zoedepth, too?
+ model = model.to(memory_format=torch.channels_last) # TODO: weird
+ if not self.no_half and model_type != 0 and not boost: # TODO: zoedepth, too?
model = model.half()
model.to(device) # to correct device
@@ -217,11 +224,10 @@ def load_models(self, model_type, device: torch.device, boost: bool):
self.pix2pix_model.load_networks('latest')
self.pix2pix_model.eval()
- devices.torch_gc()
+ backbone.torch_gc()
@staticmethod
def get_default_net_size(model_type):
- # TODO: fill in, use in the GUI
sizes = {
0: [448, 448],
1: [512, 512],
@@ -264,7 +270,7 @@ def unload_models(self):
del self.pix2pix_model
self.pix2pix_model = None
gc.collect()
- devices.torch_gc()
+ backbone.torch_gc()
self.depth_model_type = None
self.device = None
@@ -272,9 +278,8 @@ def unload_models(self):
def get_raw_prediction(self, input, net_width, net_height):
"""Get prediction from the model currently loaded by the ModelHolder object.
If boost is enabled, net_width and net_height will be ignored."""
- # TODO: supply net size for zoedepth
- global device
- device = self.device
+ global depthmap_device
+ depthmap_device = self.device
# input image
img = cv2.cvtColor(np.asarray(input), cv2.COLOR_BGR2RGB) / 255.0
# compute depthmap
@@ -285,9 +290,11 @@ def get_raw_prediction(self, input, net_width, net_height):
raw_prediction = estimatezoedepth(input, self.depth_model, net_width, net_height)
else:
raw_prediction = estimatemidas(img, self.depth_model, net_width, net_height,
- self.resize_mode, self.normalization)
+ self.resize_mode, self.normalization, self.no_half,
+ self.precision == "autocast")
else:
- raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model)
+ raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model,
+ self.boost_whole_size_threshold)
raw_prediction_invert = self.depth_model_type in [0, 7, 8, 9]
return raw_prediction, raw_prediction_invert
@@ -300,7 +307,7 @@ def estimateleres(img, model, w, h):
# compute
with torch.no_grad():
- if device == torch.device("cuda"):
+ if depthmap_device == torch.device("cuda"):
img_torch = img_torch.cuda()
prediction = model.depth_model(img_torch)
@@ -332,7 +339,7 @@ def scale_torch(img):
def estimatezoedepth(img, model, w, h):
# x = transforms.ToTensor()(img).unsqueeze(0)
# x = x.type(torch.float32)
- # x.to(device)
+ # x.to(depthmap_device)
# prediction = model.infer(x)
model.core.prep.resizer._Resize__width = w
model.core.prep.resizer._Resize__height = h
@@ -341,7 +348,7 @@ def estimatezoedepth(img, model, w, h):
return prediction
-def estimatemidas(img, model, w, h, resize_mode, normalization):
+def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precision_is_autocast):
import contextlib
# init transform
transform = Compose(
@@ -364,13 +371,13 @@ def estimatemidas(img, model, w, h, resize_mode, normalization):
img_input = transform({"image": img})["image"]
# compute
- precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" and device == torch.device(
+ precision_scope = torch.autocast if precision_is_autocast and depthmap_device == torch.device(
"cuda") else contextlib.nullcontext
with torch.no_grad(), precision_scope("cuda"):
- sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
- if device == torch.device("cuda"):
+ sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0)
+ if depthmap_device == torch.device("cuda"):
sample = sample.to(memory_format=torch.channels_last)
- if not cmd_opts.no_half:
+ if not no_half:
sample = sample.half()
prediction = model.forward(sample)
prediction = (
@@ -600,12 +607,8 @@ def parse(self):
return self.opt
-def estimateboost(img, model, model_type, pix2pixmodel):
- pix2pixsize = 1024 # TODO: to setting?
- whole_size_threshold = 1600 # R_max from the paper # TODO: to setting?
- # get settings
- if hasattr(opts, 'depthmap_script_boost_rmax'):
- whole_size_threshold = opts.depthmap_script_boost_rmax
+def estimateboost(img, model, model_type, pix2pixmodel, whole_size_threshold):
+ pix2pixsize = 1024 # TODO: pix2pixsize and whole_size_threshold to setting?
if model_type == 0: # leres
net_receptive_field_size = 448
@@ -618,7 +621,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):
patch_netsize = 2 * net_receptive_field_size
gc.collect()
- devices.torch_gc()
+ backbone.torch_gc()
# Generate mask used to smoothly blend the local pathc estimations to the base estimate.
# It is arbitrarily large to avoid artifacts during rescaling for each crop.
@@ -1024,8 +1027,8 @@ def estimatemidasBoost(img, model, w, h):
# compute
with torch.no_grad():
- sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
- if device == torch.device("cuda"):
+ sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0)
+ if depthmap_device == torch.device("cuda"):
sample = sample.to(memory_format=torch.channels_last)
prediction = model.forward(sample)
diff --git a/src/main.py b/src/misc.py
similarity index 60%
rename from src/main.py
rename to src/misc.py
index d3fed1d..875211f 100644
--- a/src/main.py
+++ b/src/misc.py
@@ -1,27 +1,36 @@
import subprocess
import os
import pathlib
-import torch
+import builtins
def get_commit_hash():
- try:
+ def call_git(dir):
return subprocess.check_output(
[os.environ.get("GIT", "git"), "rev-parse", "HEAD"],
- cwd=pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/'),
- shell=False,
- stderr=subprocess.DEVNULL,
- encoding='utf8').strip()[0:8]
+ cwd=dir, shell=False, stderr=subprocess.DEVNULL, encoding='utf8').strip()[0:8]
+
+ try:
+ file_path = pathlib.Path(__file__)
+ path = file_path.parts
+ while len(path) > 0 and path[-1] != REPOSITORY_NAME:
+ path = path[:-1]
+ if len(path) >= 2 and path[-1] == REPOSITORY_NAME and path[-2] == "extensions":
+ return call_git(str(pathlib.Path(*path)))
+
+ return call_git(pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/'))
except Exception:
return ""
+REPOSITORY_NAME = "stable-diffusion-webui-depthmap-script"
SCRIPT_NAME = "DepthMap"
-SCRIPT_VERSION = "v0.4.0"
+SCRIPT_VERSION = "v0.4.1"
SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})"
def ensure_file_downloaded(filename, url, sha256_hash_prefix=None):
- # Do not check the hash every time - it is somewhat time-consuming
+ import torch
+ # Do not check the hash every time - it is somewhat time-consumin
if os.path.exists(filename):
return