From 27f3ff64418e293a12a9173b29d0be8b0f9e8341 Mon Sep 17 00:00:00 2001 From: semjon00 Date: Tue, 18 Jul 2023 14:48:39 +0300 Subject: [PATCH 01/10] Remove ops from depthmap_generation --- scripts/depthmap.py | 82 ++++++++++++++++++++++---------------- src/backbone.py | 2 + src/core.py | 4 +- src/depthmap_generation.py | 40 ++++++++++++------- src/standalone.py | 9 +++++ 5 files changed, 86 insertions(+), 51 deletions(-) create mode 100644 src/backbone.py create mode 100644 src/standalone.py diff --git a/scripts/depthmap.py b/scripts/depthmap.py index a0e85a1..260e60f 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -1,12 +1,9 @@ import gradio as gr import traceback -import modules.scripts as scripts -from modules import processing, images, shared -from modules import script_callbacks + +from modules import shared +from modules.images import save_image from modules.call_queue import wrap_gradio_gpu_call -from modules.processing import create_infotext -from modules.shared import opts -from modules.ui import plaintext_to_html from pathlib import Path from PIL import Image @@ -25,9 +22,20 @@ def ensure_gradio_temp_directory(): os.mkdir(path) except Exception as e: traceback.print_exc() + + ensure_gradio_temp_directory() +def gather_ops(): + from modules.shared import opts, cmd_opts + ops = {} + if hasattr(opts, 'depthmap_script_boost_rmax'): + ops['boost_whole_size_threshold'] = opts.depthmap_script_boost_rmax + ops['precision'] = cmd_opts.precision + ops['no_half'] = cmd_opts.no_half + return ops + def main_ui_panel(is_depth_tab): inp = GradioComponentBundle() # TODO: Greater visual separation @@ -146,6 +154,7 @@ def main_ui_panel(is_depth_tab): def update_delault_net_size(model_type): w, h = ModelHolder.get_default_net_size(model_type) return inp['net_width'].update(value=w), inp['net_height'].update(value=h) + inp['model_type'].change( fn=update_delault_net_size, inputs=inp['model_type'], @@ -230,6 +239,7 @@ def background_removal_options_visibility(v): return inp +import modules.scripts as scripts class Script(scripts.Script): def title(self): return SCRIPT_NAME @@ -247,6 +257,9 @@ def ui(self, is_img2img): # run from script in txt2img or img2img def run(self, p, *inputs): + from modules import processing + from modules.processing import create_infotext + inputs = GradioComponentBundle.enkey_to_dict(inputs) # sd process @@ -256,15 +269,15 @@ def run(self, p, *inputs): inputimages = [] for count in range(0, len(processed.images)): # skip first grid image - if count == 0 and len(processed.images) > 1 and opts.return_grid: + if count == 0 and len(processed.images) > 1 and shared.opts.return_grid: continue inputimages.append(processed.images[count]) - outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs) + outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, gather_ops()) for input_i, imgs in enumerate(outputs): # get generation parameters - if hasattr(processed, 'all_prompts') and opts.enable_pnginfo: + if hasattr(processed, 'all_prompts') and shared.opts.enable_pnginfo: info = create_infotext(processed, processed.all_prompts, processed.all_seeds, processed.all_subseeds, "", 0, input_i) else: @@ -274,11 +287,11 @@ def run(self, p, *inputs): if inputs["save_outputs"]: try: suffix = "" if image_type == "depth" else f"_{image_type}" - images.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], - prompt=processed.all_prompts[input_i], extension=opts.samples_format, - info=info, - p=processed, - suffix=suffix) + save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], + prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format, + info=info, + p=processed, + suffix=suffix) except Exception as e: if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): raise e @@ -352,7 +365,6 @@ def on_ui_tabs(): result_images = gr.Gallery(label='Output', show_label=False, elem_id=f"depthmap_gallery").style(grid=4) with gr.Column(): - html_info_x = gr.HTML() html_info = gr.HTML() with gr.TabItem('3D Mesh'): @@ -429,7 +441,6 @@ def custom_depthmap_visibility(v): result_images, fn_mesh, result_depthmesh, - html_info_x, html_info ] ) @@ -454,7 +465,7 @@ def custom_depthmap_visibility(v): ] ) - return (depthmap_interface, "Depth", "depthmap_interface"), + return depthmap_interface # called from depth tab @@ -478,17 +489,17 @@ def run_generate(*inputs): if depthmap_mode == '2' and depthmap_batch_output_dir != '': outpath = depthmap_batch_output_dir else: - outpath = opts.outdir_samples or opts.outdir_extras_samples + outpath = shared.opts.outdir_samples or shared.opts.outdir_extras_samples if depthmap_mode == '0': # Single image if depthmap_input_image is None: - return [], None, None, "Please select an input image!", "" + return [], None, None, "Please select an input image!" inputimages.append(depthmap_input_image) inputnames.append(None) if custom_depthmap: if custom_depthmap_img is None: - return [], None, None,\ - "Custom depthmap is not specified. Please either supply it or disable this option.", "" + return [], None, None, \ + "Custom depthmap is not specified. Please either supply it or disable this option." inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name))) else: inputdepthmaps.append(None) @@ -502,9 +513,9 @@ def run_generate(*inputs): elif depthmap_mode == '2': # Batch from Directory assert not shared.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled' if depthmap_batch_input_dir == '': - return [], None, None, "Please select an input directory.", "" + return [], None, None, "Please select an input directory." if depthmap_batch_input_dir == depthmap_batch_output_dir: - return [], None, None, "Please pick different directories for batch processing.", "" + return [], None, None, "Please pick different directories for batch processing." image_list = shared.listfiles(depthmap_batch_input_dir) for path in image_list: try: @@ -515,9 +526,9 @@ def run_generate(*inputs): if depthmap_batch_reuse: basename = Path(path).stem # Custom names are not used in samples directory - if outpath != opts.outdir_extras_samples: + if outpath != shared.opts.outdir_extras_samples: # Possible filenames that the custom depthmaps may have - name_candidates = [f'{basename}-0000.{opts.samples_format}', # current format + name_candidates = [f'{basename}-0000.{shared.opts.samples_format}', # current format f'{basename}.png', # human-intuitive format f'{Path(path).name}'] # human-intuitive format (worse) for fn_cand in name_candidates: @@ -531,13 +542,13 @@ def run_generate(*inputs): inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None]) print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused') - outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs) + outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, gather_ops()) show_images = [] # Saving images for input_i, imgs in enumerate(outputs): basename = 'depthmap' - if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != opts.outdir_extras_samples: + if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != shared.opts.outdir_extras_samples: basename = Path(inputnames[input_i]).stem for image_type, image in list(imgs.items()): @@ -545,10 +556,10 @@ def run_generate(*inputs): if inputs["save_outputs"]: try: suffix = "" if image_type == "depth" else f"_{image_type}" - images.save_image(image, path=outpath, basename=basename, seed=None, - prompt=None, extension=opts.samples_format, short_filename=True, - no_prompt=True, grid=False, pnginfo_section_name="extras", - suffix=suffix) + save_image(image, path=outpath, basename=basename, seed=None, + prompt=None, extension=shared.opts.samples_format, short_filename=True, + no_prompt=True, grid=False, pnginfo_section_name="extras", + suffix=suffix) except Exception as e: if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): raise e @@ -556,15 +567,16 @@ def run_generate(*inputs): traceback.print_exc() # use inpainted 3d mesh to show in 3d model output when enabled in settings - if hasattr(opts, 'depthmap_script_show_3d_inpaint') and opts.depthmap_script_show_3d_inpaint \ + if hasattr(shared.opts, 'depthmap_script_show_3d_inpaint') and shared.opts.depthmap_script_show_3d_inpaint \ and mesh_fi is not None and len(mesh_fi) > 0: meshsimple_fi = mesh_fi # however, don't show 3dmodel when disabled in settings - if hasattr(opts, 'depthmap_script_show_3d') and not opts.depthmap_script_show_3d: + if hasattr(shared.opts, 'depthmap_script_show_3d') and not shared.opts.depthmap_script_show_3d: meshsimple_fi = None # TODO: return more info - return show_images, mesh_fi, meshsimple_fi, plaintext_to_html('info'), '' + return show_images, mesh_fi, meshsimple_fi, 'Generated!' +from modules import script_callbacks script_callbacks.on_ui_settings(on_ui_settings) -script_callbacks.on_ui_tabs(on_ui_tabs) +script_callbacks.on_ui_tabs(lambda: [(on_ui_tabs(), "Depth", "depthmap_interface")]) diff --git a/src/backbone.py b/src/backbone.py new file mode 100644 index 0000000..6f41979 --- /dev/null +++ b/src/backbone.py @@ -0,0 +1,2 @@ +# This file contains stable-duiffusion-webui stuff that the plugin relies on. +# Eventually, when we have a standalone interface, this will load either standalone backbone or webui backbone. diff --git a/src/core.py b/src/core.py index ccc3e6e..831d789 100644 --- a/src/core.py +++ b/src/core.py @@ -59,7 +59,7 @@ def reload_sd_model(): shared.sd_model.first_stage_model.to(devices.device) -def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp): +def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None): if len(inputimages) == 0 or inputimages[0] is None: return [], '', '' if inputdepthmaps is None or len(inputdepthmaps) == 0: @@ -97,6 +97,8 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp stereo_modes = inp["stereo_modes"] stereo_separation = inp["stereo_separation"] + model_holder.update_settings(**ops) + # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure print(SCRIPT_FULL_NAME) diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index 6812d81..ec85770 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -4,8 +4,7 @@ from torchvision.transforms import Compose, transforms # TODO: depthmap_generation should not depend on WebUI -from modules import shared, devices -from modules.shared import opts, cmd_opts +from modules import devices import torch, gc import cv2 @@ -48,6 +47,20 @@ def __init__(self): self.resize_mode = None self.normalization = None + # Settings (initialized to sensible values, should be updated) + self.boost_whole_size_threshold = 1600 # R_max from the paper by default + self.no_half = False + self.precision = "autocast" + + def update_settings(self, boost_whole_size_threshold=None, no_half=None, precision=None): + if boost_whole_size_threshold is not None: + self.boost_whole_size_threshold = boost_whole_size_threshold + if no_half is not None: + self.no_half = no_half + if precision is not None: + self.precision = precision + + def ensure_models(self, model_type, device: torch.device, boost: bool): # TODO: could make it more granular if model_type == -1 or model_type is None: @@ -191,7 +204,7 @@ def load_models(self, model_type, device: torch.device, boost: bool): # optimize if device == torch.device("cuda") and model_type in [0, 1, 2, 3, 4, 5, 6]: model = model.to(memory_format=torch.channels_last) - if not cmd_opts.no_half and model_type != 0 and not boost: # TODO: zoedepth, too? + if not self.no_half and model_type != 0 and not boost: # TODO: zoedepth, too? model = model.half() model.to(device) # to correct device @@ -221,7 +234,6 @@ def load_models(self, model_type, device: torch.device, boost: bool): @staticmethod def get_default_net_size(model_type): - # TODO: fill in, use in the GUI sizes = { 0: [448, 448], 1: [512, 512], @@ -285,9 +297,11 @@ def get_raw_prediction(self, input, net_width, net_height): raw_prediction = estimatezoedepth(input, self.depth_model, net_width, net_height) else: raw_prediction = estimatemidas(img, self.depth_model, net_width, net_height, - self.resize_mode, self.normalization) + self.resize_mode, self.normalization, self.no_half, + self.precision == "autocast") else: - raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model) + raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model, + self.boost_whole_size_threshold) raw_prediction_invert = self.depth_model_type in [0, 7, 8, 9] return raw_prediction, raw_prediction_invert @@ -341,7 +355,7 @@ def estimatezoedepth(img, model, w, h): return prediction -def estimatemidas(img, model, w, h, resize_mode, normalization): +def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precision_is_autocast): import contextlib # init transform transform = Compose( @@ -364,13 +378,13 @@ def estimatemidas(img, model, w, h, resize_mode, normalization): img_input = transform({"image": img})["image"] # compute - precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" and device == torch.device( + precision_scope = torch.autocast if precision_is_autocast and device == torch.device( "cuda") else contextlib.nullcontext with torch.no_grad(), precision_scope("cuda"): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) - if not cmd_opts.no_half: + if not no_half: sample = sample.half() prediction = model.forward(sample) prediction = ( @@ -600,12 +614,8 @@ def parse(self): return self.opt -def estimateboost(img, model, model_type, pix2pixmodel): - pix2pixsize = 1024 # TODO: to setting? - whole_size_threshold = 1600 # R_max from the paper # TODO: to setting? - # get settings - if hasattr(opts, 'depthmap_script_boost_rmax'): - whole_size_threshold = opts.depthmap_script_boost_rmax +def estimateboost(img, model, model_type, pix2pixmodel, whole_size_threshold): + pix2pixsize = 1024 # TODO: pix2pixsize and whole_size_threshold to setting? if model_type == 0: # leres net_receptive_field_size = 448 diff --git a/src/standalone.py b/src/standalone.py new file mode 100644 index 0000000..a20ee99 --- /dev/null +++ b/src/standalone.py @@ -0,0 +1,9 @@ +# This launches Depth tab without the AUTOMATIC1111/stable-diffusion-webui +# Does not work yet. + +import gradio as gr +import scripts.depthmap + +demo = gr.Interface(fn=scripts.depthmap.on_ui_tabs, inputs="text", outputs="text") + +demo.launch() From 7ec7167218e02c1d2e652d7a95e63c249c0fa488 Mon Sep 17 00:00:00 2001 From: semjon00 Date: Tue, 18 Jul 2023 16:47:07 +0300 Subject: [PATCH 02/10] Moved code Broken --- src/common_ui.py | 465 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 465 insertions(+) create mode 100644 src/common_ui.py diff --git a/src/common_ui.py b/src/common_ui.py new file mode 100644 index 0000000..c5e76b7 --- /dev/null +++ b/src/common_ui.py @@ -0,0 +1,465 @@ +import traceback +from pathlib import Path +import gradio as gr +from PIL import Image + +from src import backbone +from src.core import core_generation_funnel, unload_models, run_makevideo +from src.depthmap_generation import ModelHolder +from src.gradio_args_transport import GradioComponentBundle +from src.main import * + + +def main_ui_panel(is_depth_tab): + inp = GradioComponentBundle() + # TODO: Greater visual separation + with gr.Blocks(): + with gr.Row(): + inp += 'compute_device', gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU') + # TODO: Should return value instead of index. Maybe Enum should be used? + inp += 'model_type', gr.Dropdown(label="Model", + choices=['res101', 'dpt_beit_large_512 (midas 3.1)', + 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)', + 'dpt_hybrid_384 (midas 3.0)', + 'midas_v21', 'midas_v21_small', + 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'], + value='res101', + type="index") + with gr.Group(): + with gr.Row(): + inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True) + with gr.Group(visible=False) as options_depend_on_boost: + inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False) + with gr.Row(visible=False) as options_depend_on_match_size: + inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448) + inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448) + + with gr.Group(): + with gr.Row(): + inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) # 50% of width + with gr.Group(): # 50% of width + inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True) + inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False) + with gr.Row() as options_depend_on_output_depth_1: + inp += "combine_output", gr.Checkbox( + label="Combine input and depthmap into one image", value=False) + inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'], + value='Horizontal', type="index", visible=False) + with gr.Group(): + with gr.Row(): + inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False) + with gr.Row(visible=False) as clip_options_row_1: + inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0) + inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1) + + with gr.Group(): + with gr.Row(): + inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False) + # gr.Checkbox(label="Generate NormalMap", value=False) # TODO: this is a fake door + + with gr.Group(): + with gr.Row(): + inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False) + with gr.Group(visible=False) as stereo_options: + with gr.Row(): + with gr.Row(): + inp += "stereo_modes", gr.CheckboxGroup( + ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"], + label="Output", value=["left-right", "red-cyan-anaglyph"]) + with gr.Row(): + inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01, + label='Divergence (3D effect)', + value=2.5) + inp += "stereo_separation", gr.Slider(minimum=-5.0, maximum=5.0, step=0.01, + label='Separation (moves images apart)', + value=0.0) + with gr.Row(): + inp += "stereo_fill", gr.Dropdown(label="Gap fill technique", + choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', + 'polylines_sharp'], value='polylines_sharp', + type="value") + inp += "stereo_balance", gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, + label='Balance between eyes', + value=0.0) + + with gr.Group(): + with gr.Row(): + inp += "gen_mesh", gr.Checkbox( + label="Generate simple 3D mesh", value=False, visible=True) + with gr.Group(visible=False) as mesh_options: + with gr.Row(): + gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps") + with gr.Row(): + inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True) + inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True) + + if is_depth_tab: + with gr.Group(): + with gr.Row(): + inp += "inpaint", gr.Checkbox( + label="Generate 3D inpainted mesh", value=False) + with gr.Group(visible=False) as inpaint_options_row_0: + gr.HTML("Generation is sloooow, required for generating videos") + inp += "inpaint_vids", gr.Checkbox( + label="Generate 4 demo videos with 3D inpainted mesh.", value=False) + gr.HTML("More options for generating video can be found in the Generate video tab") + + with gr.Group(): + # TODO: it should be clear from the UI that the background removal does not use the model selected above + with gr.Row(): + inp += "background_removal", gr.Checkbox(label="Remove background", value=False) + with gr.Row(visible=False) as bgrem_options_row_1: + inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False) + inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False) + with gr.Row(visible=False) as bgrem_options_row_2: + inp += "background_removal_model", gr.Dropdown(label="Rembg Model", + choices=['u2net', 'u2netp', 'u2net_human_seg', + 'silueta'], + value='u2net', type="value") + + with gr.Box(): + gr.HTML(f"{SCRIPT_FULL_NAME}
") + gr.HTML("Information, comment and share @ " + "https://github.com/thygate/stable-diffusion-webui-depthmap-script") + + inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False) + + def update_delault_net_size(model_type): + w, h = ModelHolder.get_default_net_size(model_type) + return inp['net_width'].update(value=w), inp['net_height'].update(value=h) + + inp['model_type'].change( + fn=update_delault_net_size, + inputs=inp['model_type'], + outputs=[inp['net_width'], inp['net_height']] + ) + + inp['boost'].change( + fn=lambda a, b: (options_depend_on_boost.update(visible=not a), + options_depend_on_match_size.update(visible=not a and not b)), + inputs=[inp['boost'], inp['match_size']], + outputs=[options_depend_on_boost, options_depend_on_match_size] + ) + inp['match_size'].change( + fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b), + inputs=[inp['boost'], inp['match_size']], + outputs=[options_depend_on_match_size] + ) + + inp['output_depth'].change( + fn=lambda a: (inp['invert_depth'].update(visible=a), options_depend_on_output_depth_1.update(visible=a)), + inputs=[inp['output_depth']], + outputs=[inp['invert_depth'], options_depend_on_output_depth_1] + ) + + inp['combine_output'].change( + fn=lambda v: inp['combine_output_axis'].update(visible=v), + inputs=[inp['combine_output']], + outputs=[inp['combine_output_axis']] + ) + + inp['clipdepth'].change( + fn=lambda v: clip_options_row_1.update(visible=v), + inputs=[inp['clipdepth']], + outputs=[clip_options_row_1] + ) + inp['clipthreshold_far'].change( + fn=lambda a, b: a if b < a else b, + inputs=[inp['clipthreshold_far'], inp['clipthreshold_near']], + outputs=[inp['clipthreshold_near']] + ) + inp['clipthreshold_near'].change( + fn=lambda a, b: a if b > a else b, + inputs=[inp['clipthreshold_near'], inp['clipthreshold_far']], + outputs=[inp['clipthreshold_far']] + ) + + def stereo_options_visibility(v): + return stereo_options.update(visible=v) + + inp['gen_stereo'].change( + fn=stereo_options_visibility, + inputs=[inp['gen_stereo']], + outputs=[stereo_options] + ) + + inp['gen_mesh'].change( + fn=lambda v: mesh_options.update(visible=v), + inputs=[inp['gen_mesh']], + outputs=[mesh_options] + ) + + def inpaint_options_visibility(v): + return inpaint_options_row_0.update(visible=v) + + if is_depth_tab: + inp['inpaint'].change( + fn=inpaint_options_visibility, + inputs=[inp['inpaint']], + outputs=[inpaint_options_row_0] + ) + + def background_removal_options_visibility(v): + return bgrem_options_row_1.update(visible=v), \ + bgrem_options_row_2.update(visible=v) + + inp['background_removal'].change( + fn=background_removal_options_visibility, + inputs=[inp['background_removal']], + outputs=[bgrem_options_row_1, bgrem_options_row_2] + ) + + return inp + +def on_ui_tabs(): + inp = GradioComponentBundle() + with gr.Blocks(analytics_enabled=False) as depthmap_interface: + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + inp += 'depthmap_mode', gr.HTML(visible=False, value='0') + with gr.Tabs(): + with gr.TabItem('Single Image') as depthmap_mode_0: + with gr.Row(): + inp += gr.Image(label="Source", source="upload", interactive=True, type="pil", + elem_id="depthmap_input_image") + with gr.Group(visible=False) as custom_depthmap_row_0: + # TODO: depthmap generation settings should disappear when using this + inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True, + type="file", elem_id='custom_depthmap_img') + inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False) + with gr.TabItem('Batch Process') as depthmap_mode_1: + inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple", + interactive=True, type="file") + with gr.TabItem('Batch from Directory') as depthmap_mode_2: + inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory", + **backbone.get_hide_dirs(), + placeholder="A directory on the same machine where the server is running.") + inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory", + **backbone.get_hide_dirs, + placeholder="Leave blank to save images to the default path.") + gr.HTML("Files in the output directory may be overwritten.") + inp += gr.Checkbox(elem_id="depthmap_batch_reuse", + label="Skip generation and use (edited/custom) depthmaps " + "in output directory when a file already exists.", + value=True) + submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary') + inp += main_ui_panel(True) # Main panel is inserted here + unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels") + + with gr.Column(variant='panel'): + with gr.Tabs(elem_id="mode_depthmap_output"): + with gr.TabItem('Depth Output'): + with gr.Group(): + result_images = gr.Gallery(label='Output', show_label=False, + elem_id=f"depthmap_gallery").style(grid=4) + with gr.Column(): + html_info = gr.HTML() + + with gr.TabItem('3D Mesh'): + with gr.Group(): + result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0]) + with gr.Row(): + # loadmesh = gr.Button('Load') + clearmesh = gr.Button('Clear') + + with gr.TabItem('Generate video'): + # generate video + with gr.Group(): + with gr.Row(): + gr.Markdown("Generate video from inpainted(!) mesh.") + with gr.Row(): + depth_vid = gr.Video(interactive=False) + with gr.Column(): + vid_html_info_x = gr.HTML() + vid_html_info = gr.HTML() + fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **shared.hide_dirs, + placeholder="A file on the same machine where " + "the server is running.") + with gr.Row(): + vid_numframes = gr.Textbox(label="Number of frames", value="300") + vid_fps = gr.Textbox(label="Framerate", value="40") + vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4', + type="value", elem_id="video_format") + vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3', + type="value", elem_id="video_ssaa") + with gr.Row(): + vid_traj = gr.Dropdown(label="Trajectory", + choices=['straight-line', 'double-straight-line', 'circle'], + value='double-straight-line', type="index", + elem_id="video_trajectory") + vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05") + vid_border = gr.Textbox(label="Crop: top, left, bottom, right", + value="0.03, 0.03, 0.05, 0.03") + vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt") + with gr.Row(): + submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo", + variant='primary') + + inp += inp.enkey_tail() + + depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode']) + depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode']) + depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) + + def custom_depthmap_visibility(v): + return custom_depthmap_row_0.update(visible=v) + + inp['custom_depthmap'].change( + fn=custom_depthmap_visibility, + inputs=[inp['custom_depthmap']], + outputs=[custom_depthmap_row_0] + ) + + unloadmodels.click( + fn=unload_models, + inputs=[], + outputs=[] + ) + + clearmesh.click( + fn=lambda: None, + inputs=[], + outputs=[result_depthmesh] + ) + + submit.click( + fn=backbone.wrap_gradio_gpu_call(run_generate), + inputs=inp.enkey_body(), + outputs=[ + result_images, + fn_mesh, + result_depthmesh, + html_info + ] + ) + + submit_vid.click( + fn=backbone.wrap_gradio_gpu_call(run_makevideo), + inputs=[ + fn_mesh, + vid_numframes, + vid_fps, + vid_traj, + vid_shift, + vid_border, + vid_dolly, + vid_format, + vid_ssaa + ], + outputs=[ + depth_vid, + vid_html_info_x, + vid_html_info + ] + ) + + return depthmap_interface + + +def run_generate(*inputs): + inputs = GradioComponentBundle.enkey_to_dict(inputs) + depthmap_mode = inputs['depthmap_mode'] + depthmap_batch_input_dir = inputs['depthmap_batch_input_dir'] + image_batch = inputs['image_batch'] + depthmap_input_image = inputs['depthmap_input_image'] + depthmap_batch_output_dir = inputs['depthmap_batch_output_dir'] + depthmap_batch_reuse = inputs['depthmap_batch_reuse'] + custom_depthmap = inputs['custom_depthmap'] + custom_depthmap_img = inputs['custom_depthmap_img'] + + inputimages = [] + # Allow supplying custom depthmaps + inputdepthmaps = [] + # Also keep track of original file names + inputnames = [] + + if depthmap_mode == '2' and depthmap_batch_output_dir != '': + outpath = depthmap_batch_output_dir + else: + outpath = backbone.opts.outdir_samples or backbone.opts.outdir_extras_samples + + if depthmap_mode == '0': # Single image + if depthmap_input_image is None: + return [], None, None, "Please select an input image!" + inputimages.append(depthmap_input_image) + inputnames.append(None) + if custom_depthmap: + if custom_depthmap_img is None: + return [], None, None, \ + "Custom depthmap is not specified. Please either supply it or disable this option." + inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name))) + else: + inputdepthmaps.append(None) + if depthmap_mode == '1': # Batch Process + if image_batch is None: + return [], None, None, "Please select input images!", "" + for img in image_batch: + image = Image.open(os.path.abspath(img.name)) + inputimages.append(image) + inputnames.append(os.path.splitext(img.orig_name)[0]) + elif depthmap_mode == '2': # Batch from Directory + assert not backbone.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled' + if depthmap_batch_input_dir == '': + return [], None, None, "Please select an input directory." + if depthmap_batch_input_dir == depthmap_batch_output_dir: + return [], None, None, "Please pick different directories for batch processing." + image_list = backbone.listfiles(depthmap_batch_input_dir) + for path in image_list: + try: + inputimages.append(Image.open(path)) + inputnames.append(path) + + custom_depthmap = None + if depthmap_batch_reuse: + basename = Path(path).stem + # Custom names are not used in samples directory + if outpath != backbone.opts.outdir_extras_samples: + # Possible filenames that the custom depthmaps may have + name_candidates = [f'{basename}-0000.{backbone.opts.samples_format}', # current format + f'{basename}.png', # human-intuitive format + f'{Path(path).name}'] # human-intuitive format (worse) + for fn_cand in name_candidates: + path_cand = os.path.join(outpath, fn_cand) + if os.path.isfile(path_cand): + custom_depthmap = Image.open(os.path.abspath(path_cand)) + break + inputdepthmaps.append(custom_depthmap) + except Exception as e: + print(f'Failed to load {path}, ignoring. Exception: {str(e)}') + inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None]) + print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused') + + outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops()) + + # Saving images + show_images = [] + for input_i, imgs in enumerate(outputs): + basename = 'depthmap' + if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.opts.outdir_extras_samples: + basename = Path(inputnames[input_i]).stem + + for image_type, image in list(imgs.items()): + show_images += [image] + if inputs["save_outputs"]: + try: + suffix = "" if image_type == "depth" else f"_{image_type}" + backbone.save_image(image, path=outpath, basename=basename, seed=None, + prompt=None, extension=backbone.opts.samples_format, short_filename=True, + no_prompt=True, grid=False, pnginfo_section_name="extras", + suffix=suffix) + except Exception as e: + if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): + raise e + print('Catched exception: image has wrong mode!') + traceback.print_exc() + + # use inpainted 3d mesh to show in 3d model output when enabled in settings + if hasattr(backbone.opts, 'depthmap_script_show_3d_inpaint') and backbone.opts.depthmap_script_show_3d_inpaint \ + and mesh_fi is not None and len(mesh_fi) > 0: + meshsimple_fi = mesh_fi + # however, don't show 3dmodel when disabled in settings + if hasattr(backbone.opts, 'depthmap_script_show_3d') and not backbone.opts.depthmap_script_show_3d: + meshsimple_fi = None + # TODO: return more info + return show_images, mesh_fi, meshsimple_fi, 'Generated!' From b345e78dae0eee89ba88d5124854c8f7ed5bffbc Mon Sep 17 00:00:00 2001 From: semjon00 Date: Tue, 18 Jul 2023 19:29:12 +0300 Subject: [PATCH 03/10] Standalone interface (barely works) --- main.py | 10 + scripts/depthmap.py | 490 +------------------------------------ src/backbone.py | 94 +++++++ src/common_ui.py | 35 +-- src/core.py | 47 ++-- src/depthmap_generation.py | 62 ++--- src/standalone.py | 9 - 7 files changed, 173 insertions(+), 574 deletions(-) create mode 100644 main.py delete mode 100644 src/standalone.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..8e72043 --- /dev/null +++ b/main.py @@ -0,0 +1,10 @@ +# This launches DepthMap without the AUTOMATIC1111/stable-diffusion-webui +import argparse +import src.common_ui + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--listen", help="Create public link") + args = parser.parse_args() + + src.common_ui.on_ui_tabs().launch(share=args.listen) diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 260e60f..2a81fd7 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -1,16 +1,13 @@ -import gradio as gr import traceback - +import gradio as gr from modules import shared -from modules.images import save_image -from modules.call_queue import wrap_gradio_gpu_call -from pathlib import Path -from PIL import Image +import modules.scripts as scripts +from src import backbone +from src import common_ui +from src.core import core_generation_funnel from src.gradio_args_transport import GradioComponentBundle from src.main import * -from src.core import core_generation_funnel, unload_models, run_makevideo -from src.depthmap_generation import ModelHolder # Ugly workaround to fix gradio tempfile issue @@ -27,219 +24,6 @@ def ensure_gradio_temp_directory(): ensure_gradio_temp_directory() -def gather_ops(): - from modules.shared import opts, cmd_opts - ops = {} - if hasattr(opts, 'depthmap_script_boost_rmax'): - ops['boost_whole_size_threshold'] = opts.depthmap_script_boost_rmax - ops['precision'] = cmd_opts.precision - ops['no_half'] = cmd_opts.no_half - return ops - -def main_ui_panel(is_depth_tab): - inp = GradioComponentBundle() - # TODO: Greater visual separation - with gr.Blocks(): - with gr.Row(): - inp += 'compute_device', gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU') - # TODO: Should return value instead of index. Maybe Enum should be used? - inp += 'model_type', gr.Dropdown(label="Model", - choices=['res101', 'dpt_beit_large_512 (midas 3.1)', - 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)', - 'dpt_hybrid_384 (midas 3.0)', - 'midas_v21', 'midas_v21_small', - 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'], - value='res101', - type="index") - with gr.Group(): - with gr.Row(): - inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True) - with gr.Group(visible=False) as options_depend_on_boost: - inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False) - with gr.Row(visible=False) as options_depend_on_match_size: - inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448) - inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448) - - with gr.Group(): - with gr.Row(): - inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) # 50% of width - with gr.Group(): # 50% of width - inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True) - inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False) - with gr.Row() as options_depend_on_output_depth_1: - inp += "combine_output", gr.Checkbox( - label="Combine input and depthmap into one image", value=False) - inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'], - value='Horizontal', type="index", visible=False) - with gr.Group(): - with gr.Row(): - inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False) - with gr.Row(visible=False) as clip_options_row_1: - inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0) - inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1) - - with gr.Group(): - with gr.Row(): - inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False) - # gr.Checkbox(label="Generate NormalMap", value=False) # TODO: this is a fake door - - with gr.Group(): - with gr.Row(): - inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False) - with gr.Group(visible=False) as stereo_options: - with gr.Row(): - with gr.Row(): - inp += "stereo_modes", gr.CheckboxGroup( - ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"], - label="Output", value=["left-right", "red-cyan-anaglyph"]) - with gr.Row(): - inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01, - label='Divergence (3D effect)', - value=2.5) - inp += "stereo_separation", gr.Slider(minimum=-5.0, maximum=5.0, step=0.01, - label='Separation (moves images apart)', - value=0.0) - with gr.Row(): - inp += "stereo_fill", gr.Dropdown(label="Gap fill technique", - choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', - 'polylines_sharp'], value='polylines_sharp', - type="value") - inp += "stereo_balance", gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, - label='Balance between eyes', - value=0.0) - - with gr.Group(): - with gr.Row(): - inp += "gen_mesh", gr.Checkbox( - label="Generate simple 3D mesh", value=False, visible=True) - with gr.Group(visible=False) as mesh_options: - with gr.Row(): - gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps") - with gr.Row(): - inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True) - inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True) - - if is_depth_tab: - with gr.Group(): - with gr.Row(): - inp += "inpaint", gr.Checkbox( - label="Generate 3D inpainted mesh", value=False) - with gr.Group(visible=False) as inpaint_options_row_0: - gr.HTML("Generation is sloooow, required for generating videos") - inp += "inpaint_vids", gr.Checkbox( - label="Generate 4 demo videos with 3D inpainted mesh.", value=False) - gr.HTML("More options for generating video can be found in the Generate video tab") - - with gr.Group(): - # TODO: it should be clear from the UI that the background removal does not use the model selected above - with gr.Row(): - inp += "background_removal", gr.Checkbox(label="Remove background", value=False) - with gr.Row(visible=False) as bgrem_options_row_1: - inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False) - inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False) - with gr.Row(visible=False) as bgrem_options_row_2: - inp += "background_removal_model", gr.Dropdown(label="Rembg Model", - choices=['u2net', 'u2netp', 'u2net_human_seg', - 'silueta'], - value='u2net', type="value") - - with gr.Box(): - gr.HTML(f"{SCRIPT_FULL_NAME}
") - gr.HTML("Information, comment and share @ " - "https://github.com/thygate/stable-diffusion-webui-depthmap-script") - - inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False) - - def update_delault_net_size(model_type): - w, h = ModelHolder.get_default_net_size(model_type) - return inp['net_width'].update(value=w), inp['net_height'].update(value=h) - - inp['model_type'].change( - fn=update_delault_net_size, - inputs=inp['model_type'], - outputs=[inp['net_width'], inp['net_height']] - ) - - inp['boost'].change( - fn=lambda a, b: (options_depend_on_boost.update(visible=not a), - options_depend_on_match_size.update(visible=not a and not b)), - inputs=[inp['boost'], inp['match_size']], - outputs=[options_depend_on_boost, options_depend_on_match_size] - ) - inp['match_size'].change( - fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b), - inputs=[inp['boost'], inp['match_size']], - outputs=[options_depend_on_match_size] - ) - - inp['output_depth'].change( - fn=lambda a: (inp['invert_depth'].update(visible=a), options_depend_on_output_depth_1.update(visible=a)), - inputs=[inp['output_depth']], - outputs=[inp['invert_depth'], options_depend_on_output_depth_1] - ) - - inp['combine_output'].change( - fn=lambda v: inp['combine_output_axis'].update(visible=v), - inputs=[inp['combine_output']], - outputs=[inp['combine_output_axis']] - ) - - inp['clipdepth'].change( - fn=lambda v: clip_options_row_1.update(visible=v), - inputs=[inp['clipdepth']], - outputs=[clip_options_row_1] - ) - inp['clipthreshold_far'].change( - fn=lambda a, b: a if b < a else b, - inputs=[inp['clipthreshold_far'], inp['clipthreshold_near']], - outputs=[inp['clipthreshold_near']] - ) - inp['clipthreshold_near'].change( - fn=lambda a, b: a if b > a else b, - inputs=[inp['clipthreshold_near'], inp['clipthreshold_far']], - outputs=[inp['clipthreshold_far']] - ) - - def stereo_options_visibility(v): - return stereo_options.update(visible=v) - - inp['gen_stereo'].change( - fn=stereo_options_visibility, - inputs=[inp['gen_stereo']], - outputs=[stereo_options] - ) - - inp['gen_mesh'].change( - fn=lambda v: mesh_options.update(visible=v), - inputs=[inp['gen_mesh']], - outputs=[mesh_options] - ) - - def inpaint_options_visibility(v): - return inpaint_options_row_0.update(visible=v) - - if is_depth_tab: - inp['inpaint'].change( - fn=inpaint_options_visibility, - inputs=[inp['inpaint']], - outputs=[inpaint_options_row_0] - ) - - def background_removal_options_visibility(v): - return bgrem_options_row_1.update(visible=v), \ - bgrem_options_row_2.update(visible=v) - - inp['background_removal'].change( - fn=background_removal_options_visibility, - inputs=[inp['background_removal']], - outputs=[bgrem_options_row_1, bgrem_options_row_2] - ) - - return inp - - -import modules.scripts as scripts class Script(scripts.Script): def title(self): return SCRIPT_NAME @@ -251,7 +35,7 @@ def ui(self, is_img2img): gr.HTML() # Work around a Gradio bug with gr.Column(variant='panel'): gr.HTML() # Work around a Gradio bug - ret = main_ui_panel(False) + ret = common_ui.main_ui_panel(False) ret += ret.enkey_tail() return ret.enkey_body() @@ -273,7 +57,7 @@ def run(self, p, *inputs): continue inputimages.append(processed.images[count]) - outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, gather_ops()) + outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops()) for input_i, imgs in enumerate(outputs): # get generation parameters @@ -286,8 +70,8 @@ def run(self, p, *inputs): processed.images.append(image) if inputs["save_outputs"]: try: - suffix = "" if image_type == "depth" else f"_{image_type}" - save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], + suffix = "" if image_type == "depth" else f"{image_type}" + backbone.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format, info=info, p=processed, @@ -323,260 +107,6 @@ def on_ui_settings(): section=section)) -def on_ui_tabs(): - inp = GradioComponentBundle() - with gr.Blocks(analytics_enabled=False) as depthmap_interface: - with gr.Row().style(equal_height=False): - with gr.Column(variant='panel'): - inp += 'depthmap_mode', gr.HTML(visible=False, value='0') - with gr.Tabs(): - with gr.TabItem('Single Image') as depthmap_mode_0: - with gr.Row(): - inp += gr.Image(label="Source", source="upload", interactive=True, type="pil", - elem_id="depthmap_input_image") - with gr.Group(visible=False) as custom_depthmap_row_0: - # TODO: depthmap generation settings should disappear when using this - inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True, - type="file", elem_id='custom_depthmap_img') - inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False) - with gr.TabItem('Batch Process') as depthmap_mode_1: - inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple", - interactive=True, type="file") - with gr.TabItem('Batch from Directory') as depthmap_mode_2: - inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory", - **shared.hide_dirs, - placeholder="A directory on the same machine where the server is running.") - inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory", - **shared.hide_dirs, - placeholder="Leave blank to save images to the default path.") - gr.HTML("Files in the output directory may be overwritten.") - inp += gr.Checkbox(elem_id="depthmap_batch_reuse", - label="Skip generation and use (edited/custom) depthmaps " - "in output directory when a file already exists.", - value=True) - submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary') - inp += main_ui_panel(True) # Main panel is inserted here - unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels") - - with gr.Column(variant='panel'): - with gr.Tabs(elem_id="mode_depthmap_output"): - with gr.TabItem('Depth Output'): - with gr.Group(): - result_images = gr.Gallery(label='Output', show_label=False, - elem_id=f"depthmap_gallery").style(grid=4) - with gr.Column(): - html_info = gr.HTML() - - with gr.TabItem('3D Mesh'): - with gr.Group(): - result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0]) - with gr.Row(): - # loadmesh = gr.Button('Load') - clearmesh = gr.Button('Clear') - - with gr.TabItem('Generate video'): - # generate video - with gr.Group(): - with gr.Row(): - gr.Markdown("Generate video from inpainted(!) mesh.") - with gr.Row(): - depth_vid = gr.Video(interactive=False) - with gr.Column(): - vid_html_info_x = gr.HTML() - vid_html_info = gr.HTML() - fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **shared.hide_dirs, - placeholder="A file on the same machine where " - "the server is running.") - with gr.Row(): - vid_numframes = gr.Textbox(label="Number of frames", value="300") - vid_fps = gr.Textbox(label="Framerate", value="40") - vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4', - type="value", elem_id="video_format") - vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3', - type="value", elem_id="video_ssaa") - with gr.Row(): - vid_traj = gr.Dropdown(label="Trajectory", - choices=['straight-line', 'double-straight-line', 'circle'], - value='double-straight-line', type="index", - elem_id="video_trajectory") - vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05") - vid_border = gr.Textbox(label="Crop: top, left, bottom, right", - value="0.03, 0.03, 0.05, 0.03") - vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt") - with gr.Row(): - submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo", - variant='primary') - - inp += inp.enkey_tail() - - depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode']) - depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode']) - depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) - - def custom_depthmap_visibility(v): - return custom_depthmap_row_0.update(visible=v) - - inp['custom_depthmap'].change( - fn=custom_depthmap_visibility, - inputs=[inp['custom_depthmap']], - outputs=[custom_depthmap_row_0] - ) - - unloadmodels.click( - fn=unload_models, - inputs=[], - outputs=[] - ) - - clearmesh.click( - fn=lambda: None, - inputs=[], - outputs=[result_depthmesh] - ) - - submit.click( - fn=wrap_gradio_gpu_call(run_generate), - inputs=inp.enkey_body(), - outputs=[ - result_images, - fn_mesh, - result_depthmesh, - html_info - ] - ) - - submit_vid.click( - fn=wrap_gradio_gpu_call(run_makevideo), - inputs=[ - fn_mesh, - vid_numframes, - vid_fps, - vid_traj, - vid_shift, - vid_border, - vid_dolly, - vid_format, - vid_ssaa - ], - outputs=[ - depth_vid, - vid_html_info_x, - vid_html_info - ] - ) - - return depthmap_interface - - -# called from depth tab -def run_generate(*inputs): - inputs = GradioComponentBundle.enkey_to_dict(inputs) - depthmap_mode = inputs['depthmap_mode'] - depthmap_batch_input_dir = inputs['depthmap_batch_input_dir'] - image_batch = inputs['image_batch'] - depthmap_input_image = inputs['depthmap_input_image'] - depthmap_batch_output_dir = inputs['depthmap_batch_output_dir'] - depthmap_batch_reuse = inputs['depthmap_batch_reuse'] - custom_depthmap = inputs['custom_depthmap'] - custom_depthmap_img = inputs['custom_depthmap_img'] - - inputimages = [] - # Allow supplying custom depthmaps - inputdepthmaps = [] - # Also keep track of original file names - inputnames = [] - - if depthmap_mode == '2' and depthmap_batch_output_dir != '': - outpath = depthmap_batch_output_dir - else: - outpath = shared.opts.outdir_samples or shared.opts.outdir_extras_samples - - if depthmap_mode == '0': # Single image - if depthmap_input_image is None: - return [], None, None, "Please select an input image!" - inputimages.append(depthmap_input_image) - inputnames.append(None) - if custom_depthmap: - if custom_depthmap_img is None: - return [], None, None, \ - "Custom depthmap is not specified. Please either supply it or disable this option." - inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name))) - else: - inputdepthmaps.append(None) - if depthmap_mode == '1': # Batch Process - if image_batch is None: - return [], None, None, "Please select input images!", "" - for img in image_batch: - image = Image.open(os.path.abspath(img.name)) - inputimages.append(image) - inputnames.append(os.path.splitext(img.orig_name)[0]) - elif depthmap_mode == '2': # Batch from Directory - assert not shared.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled' - if depthmap_batch_input_dir == '': - return [], None, None, "Please select an input directory." - if depthmap_batch_input_dir == depthmap_batch_output_dir: - return [], None, None, "Please pick different directories for batch processing." - image_list = shared.listfiles(depthmap_batch_input_dir) - for path in image_list: - try: - inputimages.append(Image.open(path)) - inputnames.append(path) - - custom_depthmap = None - if depthmap_batch_reuse: - basename = Path(path).stem - # Custom names are not used in samples directory - if outpath != shared.opts.outdir_extras_samples: - # Possible filenames that the custom depthmaps may have - name_candidates = [f'{basename}-0000.{shared.opts.samples_format}', # current format - f'{basename}.png', # human-intuitive format - f'{Path(path).name}'] # human-intuitive format (worse) - for fn_cand in name_candidates: - path_cand = os.path.join(outpath, fn_cand) - if os.path.isfile(path_cand): - custom_depthmap = Image.open(os.path.abspath(path_cand)) - break - inputdepthmaps.append(custom_depthmap) - except Exception as e: - print(f'Failed to load {path}, ignoring. Exception: {str(e)}') - inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None]) - print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused') - - outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, gather_ops()) - show_images = [] - - # Saving images - for input_i, imgs in enumerate(outputs): - basename = 'depthmap' - if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != shared.opts.outdir_extras_samples: - basename = Path(inputnames[input_i]).stem - - for image_type, image in list(imgs.items()): - show_images += [image] - if inputs["save_outputs"]: - try: - suffix = "" if image_type == "depth" else f"_{image_type}" - save_image(image, path=outpath, basename=basename, seed=None, - prompt=None, extension=shared.opts.samples_format, short_filename=True, - no_prompt=True, grid=False, pnginfo_section_name="extras", - suffix=suffix) - except Exception as e: - if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): - raise e - print('Catched exception: image has wrong mode!') - traceback.print_exc() - - # use inpainted 3d mesh to show in 3d model output when enabled in settings - if hasattr(shared.opts, 'depthmap_script_show_3d_inpaint') and shared.opts.depthmap_script_show_3d_inpaint \ - and mesh_fi is not None and len(mesh_fi) > 0: - meshsimple_fi = mesh_fi - # however, don't show 3dmodel when disabled in settings - if hasattr(shared.opts, 'depthmap_script_show_3d') and not shared.opts.depthmap_script_show_3d: - meshsimple_fi = None - # TODO: return more info - return show_images, mesh_fi, meshsimple_fi, 'Generated!' - - from modules import script_callbacks script_callbacks.on_ui_settings(on_ui_settings) -script_callbacks.on_ui_tabs(lambda: [(on_ui_tabs(), "Depth", "depthmap_interface")]) +script_callbacks.on_ui_tabs(lambda: [(common_ui.on_ui_tabs(), "Depth", "depthmap_interface")]) diff --git a/src/backbone.py b/src/backbone.py index 6f41979..44fc3fd 100644 --- a/src/backbone.py +++ b/src/backbone.py @@ -1,2 +1,96 @@ # This file contains stable-duiffusion-webui stuff that the plugin relies on. # Eventually, when we have a standalone interface, this will load either standalone backbone or webui backbone. +try: + # stable-duiffusion-webui backbone + from modules.images import save_image # Should fail if not on stable-duiffusion-webui + from modules.devices import torch_gc # TODO: is this really sufficient? + from modules.images import get_next_sequence_number + from modules.call_queue import wrap_gradio_gpu_call + from modules.shared import listfiles + + def get_opt(name, default): + from modules.shared import opts + + if hasattr(opts, name): + return opts.__getattr__(name) + return default + + + def gather_ops(): + from modules.shared import cmd_opts + ops = {} + if get_opt('depthmap_script_boost_rmax', None) is not None: + ops['boost_whole_size_threshold'] = get_opt('depthmap_script_boost_rmax', None) + ops['precision'] = cmd_opts.precision + ops['no_half'] = cmd_opts.no_half + return ops + + + def get_outpath(): + path = get_opt('outdir_samples', None) + if path is None or len(path) == 0: + path = get_opt('outdir_extras_samples', None) + assert path is not None and len(path) > 0 + return path + + + def unload_sd_model(): + from modules import shared, devices + if shared.sd_model is not None: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + + def reload_sd_model(): + from modules import shared, devices + if shared.sd_model is not None: + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + def get_hide_dirs(): + import modules.shared + return modules.shared.hide_dirs +except: + # Standalone backbone + print("DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n" + "The standalone backbone is not on par with the stable-duiffusion-webui backbone.\n" + "Some features may be missing or work differently.\n") + + def save_image(image, path, basename, **kwargs): + import os + os.makedirs(path, exist_ok=True) + fullfn = os.path.join(path, f"{get_next_sequence_number()}-{basename}.{kwargs['extension']}") + image.save(fullfn, format=get_opt('samples_format', 'png')) + + def torch_gc(): + # TODO: is this really sufficient? + import torch + if torch.cuda.is_available(): + with torch.cuda.device('cuda'): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + def get_next_sequence_number(): + # Don't really care what the number will be... As long as it is unique. + from datetime import datetime, timezone + import random + return f"{int(datetime.now(timezone.utc).timestamp())}-{random.randint(1000,9999)}" + + def wrap_gradio_gpu_call(f): return f # Displaying various stats is not supported + + def listfiles(dirname): + import os + filenames = [os.path.join(dirname, x) for x in sorted(os.listdir(dirname)) if not x.startswith(".")] + return [file for file in filenames if os.path.isfile(file)] + + def get_opt(name, default): return default # Configuring is not supported + + def gather_ops(): return {} # Configuring is not supported + + def get_outpath(): return '.' + + def unload_sd_model(): pass # Not needed + + def reload_sd_model(): pass # Not needed + + def get_hide_dirs(): return {} # Directories will not be hidden from traversal diff --git a/src/common_ui.py b/src/common_ui.py index c5e76b7..3210c74 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -105,7 +105,8 @@ def main_ui_panel(is_depth_tab): gr.HTML("More options for generating video can be found in the Generate video tab") with gr.Group(): - # TODO: it should be clear from the UI that the background removal does not use the model selected above + # TODO: it should be clear from the UI that there is an option of the background removal + # that does not use the model selected above with gr.Row(): inp += "background_removal", gr.Checkbox(label="Remove background", value=False) with gr.Row(visible=False) as bgrem_options_row_1: @@ -236,7 +237,7 @@ def on_ui_tabs(): **backbone.get_hide_dirs(), placeholder="A directory on the same machine where the server is running.") inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory", - **backbone.get_hide_dirs, + **backbone.get_hide_dirs(), placeholder="Leave blank to save images to the default path.") gr.HTML("Files in the output directory may be overwritten.") inp += gr.Checkbox(elem_id="depthmap_batch_reuse", @@ -273,7 +274,7 @@ def on_ui_tabs(): with gr.Column(): vid_html_info_x = gr.HTML() vid_html_info = gr.HTML() - fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **shared.hide_dirs, + fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **backbone.get_hide_dirs(), placeholder="A file on the same machine where " "the server is running.") with gr.Row(): @@ -377,7 +378,7 @@ def run_generate(*inputs): if depthmap_mode == '2' and depthmap_batch_output_dir != '': outpath = depthmap_batch_output_dir else: - outpath = backbone.opts.outdir_samples or backbone.opts.outdir_extras_samples + outpath = backbone.get_outpath() if depthmap_mode == '0': # Single image if depthmap_input_image is None: @@ -399,7 +400,7 @@ def run_generate(*inputs): inputimages.append(image) inputnames.append(os.path.splitext(img.orig_name)[0]) elif depthmap_mode == '2': # Batch from Directory - assert not backbone.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled' + assert not backbone.get_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled' if depthmap_batch_input_dir == '': return [], None, None, "Please select an input directory." if depthmap_batch_input_dir == depthmap_batch_output_dir: @@ -414,9 +415,9 @@ def run_generate(*inputs): if depthmap_batch_reuse: basename = Path(path).stem # Custom names are not used in samples directory - if outpath != backbone.opts.outdir_extras_samples: + if outpath != backbone.get_opt('outdir_extras_samples', None): # Possible filenames that the custom depthmaps may have - name_candidates = [f'{basename}-0000.{backbone.opts.samples_format}', # current format + name_candidates = [f'{basename}-0000.{backbone.get_opt("samples_format", "png")}', # current format f'{basename}.png', # human-intuitive format f'{Path(path).name}'] # human-intuitive format (worse) for fn_cand in name_candidates: @@ -430,22 +431,22 @@ def run_generate(*inputs): inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None]) print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused') - outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops()) + outputs, fn_mesh, display_mesh = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops()) # Saving images show_images = [] for input_i, imgs in enumerate(outputs): basename = 'depthmap' - if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.opts.outdir_extras_samples: + if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.get_opt('outdir_extras_samples', None): basename = Path(inputnames[input_i]).stem for image_type, image in list(imgs.items()): show_images += [image] if inputs["save_outputs"]: try: - suffix = "" if image_type == "depth" else f"_{image_type}" + suffix = "" if image_type == "depth" else f"{image_type}" backbone.save_image(image, path=outpath, basename=basename, seed=None, - prompt=None, extension=backbone.opts.samples_format, short_filename=True, + prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True, no_prompt=True, grid=False, pnginfo_section_name="extras", suffix=suffix) except Exception as e: @@ -454,12 +455,12 @@ def run_generate(*inputs): print('Catched exception: image has wrong mode!') traceback.print_exc() + display_mesh = None # use inpainted 3d mesh to show in 3d model output when enabled in settings - if hasattr(backbone.opts, 'depthmap_script_show_3d_inpaint') and backbone.opts.depthmap_script_show_3d_inpaint \ - and mesh_fi is not None and len(mesh_fi) > 0: - meshsimple_fi = mesh_fi + if backbone.get_opt('depthmap_script_show_3d_inpaint', True) and fn_mesh is not None and len(fn_mesh) > 0: + display_mesh = fn_mesh # however, don't show 3dmodel when disabled in settings - if hasattr(backbone.opts, 'depthmap_script_show_3d') and not backbone.opts.depthmap_script_show_3d: - meshsimple_fi = None + if not backbone.get_opt('depthmap_script_show_3d', True): + display_mesh = None # TODO: return more info - return show_images, mesh_fi, meshsimple_fi, 'Generated!' + return show_images, fn_mesh, display_mesh, 'Generated!' diff --git a/src/core.py b/src/core.py index 831d789..f9d65db 100644 --- a/src/core.py +++ b/src/core.py @@ -1,11 +1,6 @@ from pathlib import Path - from PIL import Image -from modules import shared, devices -from modules.images import get_next_sequence_number -from modules.shared import opts, cmd_opts - try: from tqdm import trange except: @@ -24,6 +19,7 @@ from src.main import * from src.stereoimage_generation import create_stereoimages from src.depthmap_generation import ModelHolder +from src import backbone # 3d-photo-inpainting imports from inpaint.mesh import write_mesh, read_mesh, output_3d_photo @@ -47,18 +43,6 @@ def convert_i16_to_rgb(image, like): return output -def unload_sd_model(): - if shared.sd_model is not None: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) - - -def reload_sd_model(): - if shared.sd_model is not None: - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None): if len(inputimages) == 0 or inputimages[0] is None: return [], '', '' @@ -97,12 +81,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp stereo_modes = inp["stereo_modes"] stereo_separation = inp["stereo_separation"] + if ops is None: + ops = {} model_holder.update_settings(**ops) # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure print(SCRIPT_FULL_NAME) - unload_sd_model() + backbone.unload_sd_model() # TODO: this still should not be here background_removed_images = [] @@ -308,7 +294,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp else: raise e finally: - if hasattr(opts, 'depthmap_script_keepmodels') and opts.depthmap_script_keepmodels: + if backbone.get_opt('depthmap_script_keepmodels', False): model_holder.offload() # Swap to CPU memory else: if 'model' in locals(): @@ -318,7 +304,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp model_holder.unload_models() gc.collect() - devices.torch_gc() + backbone.torch_gc() # TODO: This should not be here mesh_fi = None @@ -328,14 +314,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp except Exception as e: print(f'{str(e)}, some issue with generating inpainted mesh') - reload_sd_model() + backbone.reload_sd_model() print("All done.\n") return generated_images, mesh_fi, meshsimple_fi def get_uniquefn(outpath, basename, ext): # Inefficient and may fail, maybe use unbounded binary search? - basecount = get_next_sequence_number(outpath, basename) + basecount = backbone.get_next_sequence_number(outpath, basename) if basecount > 0: basecount = basecount - 1 fullfn = None for i in range(500): @@ -403,10 +389,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v config['repeat_inpaint_edge'] = True config['ply_fmt'] = "bin" - config['save_ply'] = False - if hasattr(opts, 'depthmap_script_save_ply') and opts.depthmap_script_save_ply: - config['save_ply'] = True - + config['save_ply'] = backbone.get_opt('depthmap_script_save_ply', False) config['save_obj'] = True if device == torch.device("cpu"): @@ -473,7 +456,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v [-0.05, -0.05, -0.05, -0.05], ['dolly-zoom-in', 'zoom-in', 'circle', 'swing'], False, vid_format, vid_ssaa) - devices.torch_gc() + backbone.torch_gc() finally: del rgb_model @@ -482,7 +465,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v depth_edge_model = None del depth_feat_model depth_feat_model = None - devices.torch_gc() + backbone.torch_gc() return mesh_fi @@ -604,9 +587,9 @@ def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_bord # output path and filename mess .. basename = Path(fn_mesh).stem - outpath = opts.outdir_samples or opts.outdir_extras_samples + outpath = backbone.get_outpath() # unique filename - basecount = get_next_sequence_number(outpath, basename) + basecount = backbone.get_next_sequence_number(outpath, basename) if basecount > 0: basecount = basecount - 1 fullfn = None for i in range(500): @@ -699,9 +682,7 @@ def depth_edges_mask(depth): def create_mesh(image, depth, keep_edges=False, spherical=False): import trimesh from dzoedepth.utils.geometry import depth_to_points, create_triangles - maxsize = 1024 - if hasattr(opts, 'depthmap_script_mesh_maxsize'): - maxsize = opts.depthmap_script_mesh_maxsize + maxsize = backbone.get_opt('depthmap_script_mesh_maxsize', 2048) # limit the size of the input image image.thumbnail((maxsize, maxsize)) diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index ec85770..ada3cf4 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -1,41 +1,35 @@ +import gc +import os.path from operator import getitem -from PIL import Image -from torchvision.transforms import Compose, transforms - -# TODO: depthmap_generation should not depend on WebUI -from modules import devices - -import torch, gc import cv2 -import os.path import numpy as np import skimage.measure - -# Our code -from src.main import * +from PIL import Image +from torchvision.transforms import Compose, transforms # midas imports from dmidas.dpt_depth import DPTDepthModel from dmidas.midas_net import MidasNet from dmidas.midas_net_custom import MidasNet_small from dmidas.transforms import Resize, NormalizeImage, PrepareForNet - +# zoedepth +from dzoedepth.models.builder import build_model +from dzoedepth.utils.config import get_config # AdelaiDepth/LeReS imports from lib.multi_depth_model_woauxi import RelDepthModel from lib.net_tools import strip_prefix_if_present - +from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel # pix2pix/merge net imports from pix2pix.options.test_options import TestOptions -from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel -# zoedepth -from dzoedepth.models.builder import build_model -from dzoedepth.utils.config import get_config +# Our code +from src.main import * +from src import backbone -global device +global depthmap_device -class ModelHolder(): +class ModelHolder: def __init__(self): self.depth_model = None self.pix2pix_model = None @@ -88,7 +82,6 @@ def load_models(self, model_type, device: torch.device, boost: bool): resize_mode = "minimal" normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) - # TODO: net_w, net_h model = None if model_type == 0: # "res101" model_path = f"{model_dir}/res101.pth" @@ -106,7 +99,7 @@ def load_models(self, model_type, device: torch.device, boost: bool): model = RelDepthModel(backbone='resnext101') model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), strict=True) del checkpoint - devices.torch_gc() + backbone.torch_gc() if model_type == 1: # "dpt_beit_large_512" midas 3.1 model_path = f"{model_dir}/dpt_beit_large_512.pt" @@ -203,7 +196,7 @@ def load_models(self, model_type, device: torch.device, boost: bool): model.eval() # prepare for evaluation # optimize if device == torch.device("cuda") and model_type in [0, 1, 2, 3, 4, 5, 6]: - model = model.to(memory_format=torch.channels_last) + model = model.to(memory_format=torch.channels_last) # TODO: weird if not self.no_half and model_type != 0 and not boost: # TODO: zoedepth, too? model = model.half() model.to(device) # to correct device @@ -230,7 +223,7 @@ def load_models(self, model_type, device: torch.device, boost: bool): self.pix2pix_model.load_networks('latest') self.pix2pix_model.eval() - devices.torch_gc() + backbone.torch_gc() @staticmethod def get_default_net_size(model_type): @@ -276,7 +269,7 @@ def unload_models(self): del self.pix2pix_model self.pix2pix_model = None gc.collect() - devices.torch_gc() + backbone.torch_gc() self.depth_model_type = None self.device = None @@ -284,9 +277,8 @@ def unload_models(self): def get_raw_prediction(self, input, net_width, net_height): """Get prediction from the model currently loaded by the ModelHolder object. If boost is enabled, net_width and net_height will be ignored.""" - # TODO: supply net size for zoedepth - global device - device = self.device + global depthmap_device + depthmap_device = self.device # input image img = cv2.cvtColor(np.asarray(input), cv2.COLOR_BGR2RGB) / 255.0 # compute depthmap @@ -314,7 +306,7 @@ def estimateleres(img, model, w, h): # compute with torch.no_grad(): - if device == torch.device("cuda"): + if depthmap_device == torch.device("cuda"): img_torch = img_torch.cuda() prediction = model.depth_model(img_torch) @@ -346,7 +338,7 @@ def scale_torch(img): def estimatezoedepth(img, model, w, h): # x = transforms.ToTensor()(img).unsqueeze(0) # x = x.type(torch.float32) - # x.to(device) + # x.to(depthmap_device) # prediction = model.infer(x) model.core.prep.resizer._Resize__width = w model.core.prep.resizer._Resize__height = h @@ -378,11 +370,11 @@ def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precisi img_input = transform({"image": img})["image"] # compute - precision_scope = torch.autocast if precision_is_autocast and device == torch.device( + precision_scope = torch.autocast if precision_is_autocast and depthmap_device == torch.device( "cuda") else contextlib.nullcontext with torch.no_grad(), precision_scope("cuda"): - sample = torch.from_numpy(img_input).to(device).unsqueeze(0) - if device == torch.device("cuda"): + sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0) + if depthmap_device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) if not no_half: sample = sample.half() @@ -628,7 +620,7 @@ def estimateboost(img, model, model_type, pix2pixmodel, whole_size_threshold): patch_netsize = 2 * net_receptive_field_size gc.collect() - devices.torch_gc() + backbone.torch_gc() # Generate mask used to smoothly blend the local pathc estimations to the base estimate. # It is arbitrarily large to avoid artifacts during rescaling for each crop. @@ -1034,8 +1026,8 @@ def estimatemidasBoost(img, model, w, h): # compute with torch.no_grad(): - sample = torch.from_numpy(img_input).to(device).unsqueeze(0) - if device == torch.device("cuda"): + sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0) + if depthmap_device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) prediction = model.forward(sample) diff --git a/src/standalone.py b/src/standalone.py deleted file mode 100644 index a20ee99..0000000 --- a/src/standalone.py +++ /dev/null @@ -1,9 +0,0 @@ -# This launches Depth tab without the AUTOMATIC1111/stable-diffusion-webui -# Does not work yet. - -import gradio as gr -import scripts.depthmap - -demo = gr.Interface(fn=scripts.depthmap.on_ui_tabs, inputs="text", outputs="text") - -demo.launch() From 5372a13d7d475ff81a8343d2c07a1a747cce4b73 Mon Sep 17 00:00:00 2001 From: semjon00 Date: Tue, 18 Jul 2023 19:58:43 +0300 Subject: [PATCH 04/10] Improvements for standalone interface --- src/common_ui.py | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/common_ui.py b/src/common_ui.py index 3210c74..6c01534 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -28,8 +28,7 @@ def main_ui_panel(is_depth_tab): with gr.Group(): with gr.Row(): inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True) - with gr.Group(visible=False) as options_depend_on_boost: - inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False) + inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False) with gr.Row(visible=False) as options_depend_on_match_size: inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448) inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448) @@ -62,10 +61,9 @@ def main_ui_panel(is_depth_tab): inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False) with gr.Group(visible=False) as stereo_options: with gr.Row(): - with gr.Row(): - inp += "stereo_modes", gr.CheckboxGroup( - ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"], - label="Output", value=["left-right", "red-cyan-anaglyph"]) + inp += "stereo_modes", gr.CheckboxGroup( + ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"], + label="Output", value=["left-right", "red-cyan-anaglyph"]) with gr.Row(): inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01, label='Divergence (3D effect)', @@ -137,10 +135,10 @@ def update_delault_net_size(model_type): ) inp['boost'].change( - fn=lambda a, b: (options_depend_on_boost.update(visible=not a), + fn=lambda a, b: (inp['match_size'].update(visible=not a), options_depend_on_match_size.update(visible=not a and not b)), inputs=[inp['boost'], inp['match_size']], - outputs=[options_depend_on_boost, options_depend_on_match_size] + outputs=[inp['match_size'], options_depend_on_match_size] ) inp['match_size'].change( fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b), @@ -176,11 +174,8 @@ def update_delault_net_size(model_type): outputs=[inp['clipthreshold_far']] ) - def stereo_options_visibility(v): - return stereo_options.update(visible=v) - inp['gen_stereo'].change( - fn=stereo_options_visibility, + fn=lambda v: stereo_options.update(visible=v), inputs=[inp['gen_stereo']], outputs=[stereo_options] ) @@ -191,22 +186,15 @@ def stereo_options_visibility(v): outputs=[mesh_options] ) - def inpaint_options_visibility(v): - return inpaint_options_row_0.update(visible=v) - if is_depth_tab: inp['inpaint'].change( - fn=inpaint_options_visibility, + fn=lambda v: inpaint_options_row_0.update(visible=v), inputs=[inp['inpaint']], outputs=[inpaint_options_row_0] ) - def background_removal_options_visibility(v): - return bgrem_options_row_1.update(visible=v), \ - bgrem_options_row_2.update(visible=v) - inp['background_removal'].change( - fn=background_removal_options_visibility, + fn=lambda v: (bgrem_options_row_1.update(visible=v), bgrem_options_row_2.update(visible=v)), inputs=[inp['background_removal']], outputs=[bgrem_options_row_1, bgrem_options_row_2] ) @@ -215,7 +203,7 @@ def background_removal_options_visibility(v): def on_ui_tabs(): inp = GradioComponentBundle() - with gr.Blocks(analytics_enabled=False) as depthmap_interface: + with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface: with gr.Row().style(equal_height=False): with gr.Column(variant='panel'): inp += 'depthmap_mode', gr.HTML(visible=False, value='0') @@ -382,7 +370,7 @@ def run_generate(*inputs): if depthmap_mode == '0': # Single image if depthmap_input_image is None: - return [], None, None, "Please select an input image!" + return [], None, None, "Please select an input image" inputimages.append(depthmap_input_image) inputnames.append(None) if custom_depthmap: @@ -394,7 +382,7 @@ def run_generate(*inputs): inputdepthmaps.append(None) if depthmap_mode == '1': # Batch Process if image_batch is None: - return [], None, None, "Please select input images!", "" + return [], None, None, "Please select input images", "" for img in image_batch: image = Image.open(os.path.abspath(img.name)) inputimages.append(image) From 970d365c41778fec70ab98c0657c0cdf218f7eaf Mon Sep 17 00:00:00 2001 From: semjon00 Date: Tue, 18 Jul 2023 20:56:58 +0300 Subject: [PATCH 05/10] Questionable design decisions Element hiding-unhiding how works in the standalone mode. --- src/common_ui.py | 67 ++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/src/common_ui.py b/src/common_ui.py index 6c01534..6b45a3b 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -25,7 +25,7 @@ def main_ui_panel(is_depth_tab): 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'], value='res101', type="index") - with gr.Group(): + with gr.Box(): with gr.Row(): inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True) inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False) @@ -33,9 +33,10 @@ def main_ui_panel(is_depth_tab): inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448) inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448) - with gr.Group(): + with gr.Box(): with gr.Row(): - inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) # 50% of width + with gr.Group(): + inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) # 50% of width with gr.Group(): # 50% of width inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True) inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False) @@ -44,22 +45,22 @@ def main_ui_panel(is_depth_tab): label="Combine input and depthmap into one image", value=False) inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'], value='Horizontal', type="index", visible=False) - with gr.Group(): + with gr.Box(): with gr.Row(): inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False) with gr.Row(visible=False) as clip_options_row_1: inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0) inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1) - with gr.Group(): + with gr.Box(): with gr.Row(): inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False) # gr.Checkbox(label="Generate NormalMap", value=False) # TODO: this is a fake door - with gr.Group(): + with gr.Box(): with gr.Row(): inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False) - with gr.Group(visible=False) as stereo_options: + with gr.Column(visible=False) as stereo_options: with gr.Row(): inp += "stereo_modes", gr.CheckboxGroup( ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"], @@ -80,11 +81,11 @@ def main_ui_panel(is_depth_tab): label='Balance between eyes', value=0.0) - with gr.Group(): - with gr.Row(): + with gr.Box(): + with gr.Column(): inp += "gen_mesh", gr.Checkbox( label="Generate simple 3D mesh", value=False, visible=True) - with gr.Group(visible=False) as mesh_options: + with gr.Column(visible=False) as mesh_options: with gr.Row(): gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps") with gr.Row(): @@ -92,29 +93,30 @@ def main_ui_panel(is_depth_tab): inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True) if is_depth_tab: - with gr.Group(): - with gr.Row(): + with gr.Box(): + with gr.Column(): inp += "inpaint", gr.Checkbox( label="Generate 3D inpainted mesh", value=False) - with gr.Group(visible=False) as inpaint_options_row_0: + with gr.Column(visible=False) as inpaint_options_row_0: gr.HTML("Generation is sloooow, required for generating videos") inp += "inpaint_vids", gr.Checkbox( label="Generate 4 demo videos with 3D inpainted mesh.", value=False) gr.HTML("More options for generating video can be found in the Generate video tab") - with gr.Group(): + with gr.Box(): # TODO: it should be clear from the UI that there is an option of the background removal # that does not use the model selected above with gr.Row(): inp += "background_removal", gr.Checkbox(label="Remove background", value=False) - with gr.Row(visible=False) as bgrem_options_row_1: - inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False) - inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False) - with gr.Row(visible=False) as bgrem_options_row_2: - inp += "background_removal_model", gr.Dropdown(label="Rembg Model", - choices=['u2net', 'u2netp', 'u2net_human_seg', - 'silueta'], - value='u2net', type="value") + with gr.Column(visible=False) as bgrem_options: + with gr.Row(): + inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False) + inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False) + with gr.Row(): + inp += "background_removal_model", gr.Dropdown(label="Rembg Model", + choices=['u2net', 'u2netp', 'u2net_human_seg', + 'silueta'], + value='u2net', type="value") with gr.Box(): gr.HTML(f"{SCRIPT_FULL_NAME}
") @@ -194,9 +196,9 @@ def update_delault_net_size(model_type): ) inp['background_removal'].change( - fn=lambda v: (bgrem_options_row_1.update(visible=v), bgrem_options_row_2.update(visible=v)), + fn=lambda v: bgrem_options.update(visible=v), inputs=[inp['background_removal']], - outputs=[bgrem_options_row_1, bgrem_options_row_2] + outputs=[bgrem_options] ) return inp @@ -209,13 +211,13 @@ def on_ui_tabs(): inp += 'depthmap_mode', gr.HTML(visible=False, value='0') with gr.Tabs(): with gr.TabItem('Single Image') as depthmap_mode_0: - with gr.Row(): - inp += gr.Image(label="Source", source="upload", interactive=True, type="pil", - elem_id="depthmap_input_image") - with gr.Group(visible=False) as custom_depthmap_row_0: + with gr.Group(): + with gr.Row(): + inp += gr.Image(label="Source", source="upload", interactive=True, type="pil", + elem_id="depthmap_input_image") # TODO: depthmap generation settings should disappear when using this inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True, - type="file", elem_id='custom_depthmap_img') + type="file", elem_id='custom_depthmap_img', visible=False) inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False) with gr.TabItem('Batch Process') as depthmap_mode_1: inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple", @@ -291,13 +293,10 @@ def on_ui_tabs(): depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode']) depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) - def custom_depthmap_visibility(v): - return custom_depthmap_row_0.update(visible=v) - inp['custom_depthmap'].change( - fn=custom_depthmap_visibility, + fn=lambda v: inp['custom_depthmap_img'].update(visible=v), inputs=[inp['custom_depthmap']], - outputs=[custom_depthmap_row_0] + outputs=[inp['custom_depthmap_img']] ) unloadmodels.click( From 35f77f6a981ff055b4945582d48ef838a89c4ec8 Mon Sep 17 00:00:00 2001 From: semjon00 Date: Tue, 18 Jul 2023 21:00:25 +0300 Subject: [PATCH 06/10] Rename main to misc to avoid confusion --- main.py | 3 +++ scripts/depthmap.py | 2 +- src/backbone.py | 8 +++++--- src/common_ui.py | 2 +- src/core.py | 2 +- src/depthmap_generation.py | 2 +- src/{main.py => misc.py} | 0 7 files changed, 12 insertions(+), 7 deletions(-) rename src/{main.py => misc.py} (100%) diff --git a/main.py b/main.py index 8e72043..8421011 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,7 @@ # This launches DepthMap without the AUTOMATIC1111/stable-diffusion-webui +# If DepthMap is installed as an extension, +# you may want to change the working directory to the stable-diffusion-webui root. + import argparse import src.common_ui diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 2a81fd7..82284b5 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -7,7 +7,7 @@ from src import common_ui from src.core import core_generation_funnel from src.gradio_args_transport import GradioComponentBundle -from src.main import * +from src.misc import * # Ugly workaround to fix gradio tempfile issue diff --git a/src/backbone.py b/src/backbone.py index 44fc3fd..8be1265 100644 --- a/src/backbone.py +++ b/src/backbone.py @@ -1,5 +1,7 @@ -# This file contains stable-duiffusion-webui stuff that the plugin relies on. -# Eventually, when we have a standalone interface, this will load either standalone backbone or webui backbone. +# DepthMap can be run inside stable-duiffusion-webui, but also separately. +# All the stable-duiffusion-webui stuff that the DepthMap relies on +# must be resided in this file (or in the scripts folder). + try: # stable-duiffusion-webui backbone from modules.images import save_image # Should fail if not on stable-duiffusion-webui @@ -93,4 +95,4 @@ def unload_sd_model(): pass # Not needed def reload_sd_model(): pass # Not needed - def get_hide_dirs(): return {} # Directories will not be hidden from traversal + def get_hide_dirs(): return {} # Directories will not be hidden from traversal (except when starts with the dot) diff --git a/src/common_ui.py b/src/common_ui.py index 6b45a3b..bb4b3c0 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -7,7 +7,7 @@ from src.core import core_generation_funnel, unload_models, run_makevideo from src.depthmap_generation import ModelHolder from src.gradio_args_transport import GradioComponentBundle -from src.main import * +from src.misc import * def main_ui_panel(is_depth_tab): diff --git a/src/core.py b/src/core.py index f9d65db..142ac37 100644 --- a/src/core.py +++ b/src/core.py @@ -16,7 +16,7 @@ import traceback # Our code -from src.main import * +from src.misc import * from src.stereoimage_generation import create_stereoimages from src.depthmap_generation import ModelHolder from src import backbone diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index ada3cf4..5ecc02b 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -24,7 +24,7 @@ from pix2pix.options.test_options import TestOptions # Our code -from src.main import * +from src.misc import * from src import backbone global depthmap_device diff --git a/src/main.py b/src/misc.py similarity index 100% rename from src/main.py rename to src/misc.py From f5cff471974c26c63a6076573a214ac7f64c440b Mon Sep 17 00:00:00 2001 From: semjon00 Date: Tue, 18 Jul 2023 21:14:45 +0300 Subject: [PATCH 07/10] Bump version Also do not needlessly unload models in standalone mode. --- README.md | 2 ++ scripts/depthmap.py | 14 -------------- src/common_ui.py | 14 ++++++++++++++ src/core.py | 2 +- src/misc.py | 2 +- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index ccb66cc..1ed85b9 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ video by [@graemeniedermayer](https://github.com/graemeniedermayer), more exampl images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56#issuecomment-1367596463). ## Changelog +* v0.4.1 standalone mode + * ability to run DepthMap without WebUI (Use main.py. Make sure all the dependencies are installed. The support is not feature-complete.) * v0.4.0 large code refactor * UI improvements * improved Batch from Directory, Clip and renormalize DepthMap diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 82284b5..868add5 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -10,20 +10,6 @@ from src.misc import * -# Ugly workaround to fix gradio tempfile issue -def ensure_gradio_temp_directory(): - try: - import tempfile - path = os.path.join(tempfile.gettempdir(), 'gradio') - if not (os.path.exists(path)): - os.mkdir(path) - except Exception as e: - traceback.print_exc() - - -ensure_gradio_temp_directory() - - class Script(scripts.Script): def title(self): return SCRIPT_NAME diff --git a/src/common_ui.py b/src/common_ui.py index bb4b3c0..f37c73c 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -10,6 +10,20 @@ from src.misc import * +# Ugly workaround to fix gradio tempfile issue +def ensure_gradio_temp_directory(): + try: + import tempfile + path = os.path.join(tempfile.gettempdir(), 'gradio') + if not (os.path.exists(path)): + os.mkdir(path) + except Exception as e: + traceback.print_exc() + + +ensure_gradio_temp_directory() + + def main_ui_panel(is_depth_tab): inp = GradioComponentBundle() # TODO: Greater visual separation diff --git a/src/core.py b/src/core.py index 142ac37..32a81b1 100644 --- a/src/core.py +++ b/src/core.py @@ -294,7 +294,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp else: raise e finally: - if backbone.get_opt('depthmap_script_keepmodels', False): + if backbone.get_opt('depthmap_script_keepmodels', True): model_holder.offload() # Swap to CPU memory else: if 'model' in locals(): diff --git a/src/misc.py b/src/misc.py index d3fed1d..f3d2bfd 100644 --- a/src/misc.py +++ b/src/misc.py @@ -16,7 +16,7 @@ def get_commit_hash(): SCRIPT_NAME = "DepthMap" -SCRIPT_VERSION = "v0.4.0" +SCRIPT_VERSION = "v0.4.1" SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})" From 7c44b702d0a4638c023d7a46667c42fb54c7b816 Mon Sep 17 00:00:00 2001 From: semjon00 Date: Wed, 19 Jul 2023 11:14:41 +0300 Subject: [PATCH 08/10] Add folder button, cmd_opts bugfix --- src/backbone.py | 12 +++++++++++- src/common_ui.py | 26 +++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/backbone.py b/src/backbone.py index 8be1265..202cf47 100644 --- a/src/backbone.py +++ b/src/backbone.py @@ -12,13 +12,19 @@ def get_opt(name, default): from modules.shared import opts - if hasattr(opts, name): return opts.__getattr__(name) return default + def get_cmd_opt(name, default): + """Get command line argument""" + from modules.shared import cmd_opts + if hasattr(cmd_opts, name): + return cmd_opts.__getattribute__(name) + return default def gather_ops(): + """Parameters for depthmap generation""" from modules.shared import cmd_opts ops = {} if get_opt('depthmap_script_boost_rmax', None) is not None: @@ -29,6 +35,7 @@ def gather_ops(): def get_outpath(): + """Get path where results are saved by default""" path = get_opt('outdir_samples', None) if path is None or len(path) == 0: path = get_opt('outdir_extras_samples', None) @@ -87,6 +94,9 @@ def listfiles(dirname): def get_opt(name, default): return default # Configuring is not supported + + def get_cmd_opt(name, default): return default # Configuring is not supported + def gather_ops(): return {} # Configuring is not supported def get_outpath(): return '.' diff --git a/src/common_ui.py b/src/common_ui.py index f37c73c..68583d3 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -217,6 +217,25 @@ def update_delault_net_size(model_type): return inp +def open_folder_action(): + # Adapted from stable-diffusion-webui + f = backbone.get_outpath() + if backbone.get_cmd_opt('hide_ui_dir_config', False): + return + if not os.path.exists(f) or not os.path.isdir(f): + raise "Couldn't open output folder" # .isdir is security-related, do not remove! + import platform + import subprocess as sp + path = os.path.normpath(f) + if platform.system() == "Windows": + os.startfile(path) + elif platform.system() == "Darwin": + sp.Popen(["open", path]) + elif "microsoft-standard-WSL2" in platform.uname().release: + sp.Popen(["wsl-open", path]) + else: + sp.Popen(["xdg-open", path]) + def on_ui_tabs(): inp = GradioComponentBundle() with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface: @@ -260,6 +279,10 @@ def on_ui_tabs(): elem_id=f"depthmap_gallery").style(grid=4) with gr.Column(): html_info = gr.HTML() + folder_symbol = '\U0001f4c2' # 📂 + gr.Button(folder_symbol, visible=not backbone.get_cmd_opt('hide_ui_dir_config', False)).click( + fn=lambda: open_folder_action(), inputs=[], outputs=[], + ) with gr.TabItem('3D Mesh'): with gr.Group(): @@ -301,6 +324,7 @@ def on_ui_tabs(): submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo", variant='primary') + inp += inp.enkey_tail() depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode']) @@ -401,7 +425,7 @@ def run_generate(*inputs): inputimages.append(image) inputnames.append(os.path.splitext(img.orig_name)[0]) elif depthmap_mode == '2': # Batch from Directory - assert not backbone.get_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled' + assert not backbone.get_cmd_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled' if depthmap_batch_input_dir == '': return [], None, None, "Please select an input directory." if depthmap_batch_input_dir == depthmap_batch_output_dir: From 13023f197ee5c9df05a422e7ff4705c5c6e97d35 Mon Sep 17 00:00:00 2001 From: semjon00 Date: Wed, 19 Jul 2023 22:21:43 +0300 Subject: [PATCH 09/10] Bugfix: standalone mesh generation --- src/backbone.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backbone.py b/src/backbone.py index 202cf47..bc1606e 100644 --- a/src/backbone.py +++ b/src/backbone.py @@ -63,7 +63,7 @@ def get_hide_dirs(): # Standalone backbone print("DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n" "The standalone backbone is not on par with the stable-duiffusion-webui backbone.\n" - "Some features may be missing or work differently.\n") + "Some features may be missing or work differently. Please report bugs.\n") def save_image(image, path, basename, **kwargs): import os @@ -79,11 +79,11 @@ def torch_gc(): torch.cuda.empty_cache() torch.cuda.ipc_collect() - def get_next_sequence_number(): + def get_next_sequence_number(outpath=None, basename=None): # Don't really care what the number will be... As long as it is unique. from datetime import datetime, timezone import random - return f"{int(datetime.now(timezone.utc).timestamp())}-{random.randint(1000,9999)}" + return int(f"{int(datetime.now(timezone.utc).timestamp())}{random.randint(1000,9999)}") def wrap_gradio_gpu_call(f): return f # Displaying various stats is not supported From 1d217ce55c8e4a659be4f0ec805a05b1db421f1a Mon Sep 17 00:00:00 2001 From: semjon00 Date: Thu, 20 Jul 2023 11:44:17 +0300 Subject: [PATCH 10/10] Standalone mode improvements * Make standalone mode more self-aware if installed as webui extension * Fix commit retrieval for standalone mode if not installed as webui extension * Improved output saving --- main.py | 28 +++++++++++++++++++++++++++- src/backbone.py | 30 ++++++++++++++++++++---------- src/depthmap_generation.py | 1 + src/misc.py | 23 ++++++++++++++++------- 4 files changed, 64 insertions(+), 18 deletions(-) diff --git a/main.py b/main.py index 8421011..fc7cf68 100644 --- a/main.py +++ b/main.py @@ -3,11 +3,37 @@ # you may want to change the working directory to the stable-diffusion-webui root. import argparse -import src.common_ui +import os +import pathlib +import builtins + +import src.misc + +def maybe_chdir(): + """Detects if DepthMap was installed as a stable-diffusion-webui script, but run without current directory set to + the stable-diffusion-webui root. Changes current directory if needed, to aviod clutter.""" + try: + file_path = pathlib.Path(__file__) + path = file_path.parts + while len(path) > 0 and path[-1] != src.misc.REPOSITORY_NAME: + path = path[:-1] + if len(path) >= 2 and path[-1] == src.misc.REPOSITORY_NAME and path[-2] == "extensions": + path = path[:-2] + listdir = os.listdir(str(pathlib.Path(*path))) + if 'launch.py' in listdir and 'webui.py': + os.chdir(str(pathlib.Path(**path))) + except: + pass + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--listen", help="Create public link") + parser.add_argument("--no_chdir", help="Do not try to use the root of stable-diffusion-webui") args = parser.parse_args() + print(f"{src.misc.SCRIPT_FULL_NAME} running in standalone mode!") + import src.common_ui + if not args.no_chdir: + maybe_chdir() src.common_ui.on_ui_tabs().launch(share=args.listen) diff --git a/src/backbone.py b/src/backbone.py index bc1606e..0829ce0 100644 --- a/src/backbone.py +++ b/src/backbone.py @@ -1,6 +1,8 @@ # DepthMap can be run inside stable-duiffusion-webui, but also separately. # All the stable-duiffusion-webui stuff that the DepthMap relies on # must be resided in this file (or in the scripts folder). +import pathlib +from datetime import datetime try: # stable-duiffusion-webui backbone @@ -61,15 +63,21 @@ def get_hide_dirs(): return modules.shared.hide_dirs except: # Standalone backbone - print("DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n" - "The standalone backbone is not on par with the stable-duiffusion-webui backbone.\n" - "Some features may be missing or work differently. Please report bugs.\n") + print( # " DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n" + " The standalone mode is not on par with the stable-duiffusion-webui mode.\n" + " Some features may be missing or work differently. Please report bugs.\n") def save_image(image, path, basename, **kwargs): import os os.makedirs(path, exist_ok=True) - fullfn = os.path.join(path, f"{get_next_sequence_number()}-{basename}.{kwargs['extension']}") - image.save(fullfn, format=get_opt('samples_format', 'png')) + if 'suffix' not in kwargs or len(kwargs['suffix']) == 0: + kwargs['suffix'] = '' + else: + kwargs['suffix'] = f"-{kwargs['suffix']}" + format = get_opt('samples_format', kwargs['extension']) + fullfn = os.path.join( + path, f"{basename}-{get_next_sequence_number(path, basename)}{kwargs['suffix']}.{format}") + image.save(fullfn, format=format) def torch_gc(): # TODO: is this really sufficient? @@ -79,11 +87,13 @@ def torch_gc(): torch.cuda.empty_cache() torch.cuda.ipc_collect() + launched_at = int(datetime.now().timestamp()) + backbone_current_seq_number = 0 + def get_next_sequence_number(outpath=None, basename=None): - # Don't really care what the number will be... As long as it is unique. - from datetime import datetime, timezone - import random - return int(f"{int(datetime.now(timezone.utc).timestamp())}{random.randint(1000,9999)}") + global backbone_current_seq_number + backbone_current_seq_number += 1 + return int(f"{launched_at}{backbone_current_seq_number:04}") def wrap_gradio_gpu_call(f): return f # Displaying various stats is not supported @@ -99,7 +109,7 @@ def get_cmd_opt(name, default): return default # Configuring is not supported def gather_ops(): return {} # Configuring is not supported - def get_outpath(): return '.' + def get_outpath(): return str(pathlib.Path('.', 'outputs')) def unload_sd_model(): pass # Not needed diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index 5ecc02b..0ea4a37 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -6,6 +6,7 @@ import numpy as np import skimage.measure from PIL import Image +import torch from torchvision.transforms import Compose, transforms # midas imports diff --git a/src/misc.py b/src/misc.py index f3d2bfd..875211f 100644 --- a/src/misc.py +++ b/src/misc.py @@ -1,27 +1,36 @@ import subprocess import os import pathlib -import torch +import builtins def get_commit_hash(): - try: + def call_git(dir): return subprocess.check_output( [os.environ.get("GIT", "git"), "rev-parse", "HEAD"], - cwd=pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/'), - shell=False, - stderr=subprocess.DEVNULL, - encoding='utf8').strip()[0:8] + cwd=dir, shell=False, stderr=subprocess.DEVNULL, encoding='utf8').strip()[0:8] + + try: + file_path = pathlib.Path(__file__) + path = file_path.parts + while len(path) > 0 and path[-1] != REPOSITORY_NAME: + path = path[:-1] + if len(path) >= 2 and path[-1] == REPOSITORY_NAME and path[-2] == "extensions": + return call_git(str(pathlib.Path(*path))) + + return call_git(pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/')) except Exception: return "" +REPOSITORY_NAME = "stable-diffusion-webui-depthmap-script" SCRIPT_NAME = "DepthMap" SCRIPT_VERSION = "v0.4.1" SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})" def ensure_file_downloaded(filename, url, sha256_hash_prefix=None): - # Do not check the hash every time - it is somewhat time-consuming + import torch + # Do not check the hash every time - it is somewhat time-consumin if os.path.exists(filename): return