diff --git a/README.md b/README.md index ccb66cc..1ed85b9 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ video by [@graemeniedermayer](https://github.com/graemeniedermayer), more exampl images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56#issuecomment-1367596463). ## Changelog +* v0.4.1 standalone mode + * ability to run DepthMap without WebUI (Use main.py. Make sure all the dependencies are installed. The support is not feature-complete.) * v0.4.0 large code refactor * UI improvements * improved Batch from Directory, Clip and renormalize DepthMap diff --git a/main.py b/main.py new file mode 100644 index 0000000..fc7cf68 --- /dev/null +++ b/main.py @@ -0,0 +1,39 @@ +# This launches DepthMap without the AUTOMATIC1111/stable-diffusion-webui +# If DepthMap is installed as an extension, +# you may want to change the working directory to the stable-diffusion-webui root. + +import argparse +import os +import pathlib +import builtins + +import src.misc + +def maybe_chdir(): + """Detects if DepthMap was installed as a stable-diffusion-webui script, but run without current directory set to + the stable-diffusion-webui root. Changes current directory if needed, to aviod clutter.""" + try: + file_path = pathlib.Path(__file__) + path = file_path.parts + while len(path) > 0 and path[-1] != src.misc.REPOSITORY_NAME: + path = path[:-1] + if len(path) >= 2 and path[-1] == src.misc.REPOSITORY_NAME and path[-2] == "extensions": + path = path[:-2] + listdir = os.listdir(str(pathlib.Path(*path))) + if 'launch.py' in listdir and 'webui.py': + os.chdir(str(pathlib.Path(**path))) + except: + pass + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--listen", help="Create public link") + parser.add_argument("--no_chdir", help="Do not try to use the root of stable-diffusion-webui") + args = parser.parse_args() + + print(f"{src.misc.SCRIPT_FULL_NAME} running in standalone mode!") + import src.common_ui + if not args.no_chdir: + maybe_chdir() + src.common_ui.on_ui_tabs().launch(share=args.listen) diff --git a/scripts/depthmap.py b/scripts/depthmap.py index a0e85a1..868add5 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -1,233 +1,13 @@ -import gradio as gr import traceback +import gradio as gr +from modules import shared import modules.scripts as scripts -from modules import processing, images, shared -from modules import script_callbacks -from modules.call_queue import wrap_gradio_gpu_call -from modules.processing import create_infotext -from modules.shared import opts -from modules.ui import plaintext_to_html -from pathlib import Path -from PIL import Image +from src import backbone +from src import common_ui +from src.core import core_generation_funnel from src.gradio_args_transport import GradioComponentBundle -from src.main import * -from src.core import core_generation_funnel, unload_models, run_makevideo -from src.depthmap_generation import ModelHolder - - -# Ugly workaround to fix gradio tempfile issue -def ensure_gradio_temp_directory(): - try: - import tempfile - path = os.path.join(tempfile.gettempdir(), 'gradio') - if not (os.path.exists(path)): - os.mkdir(path) - except Exception as e: - traceback.print_exc() -ensure_gradio_temp_directory() - - -def main_ui_panel(is_depth_tab): - inp = GradioComponentBundle() - # TODO: Greater visual separation - with gr.Blocks(): - with gr.Row(): - inp += 'compute_device', gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU') - # TODO: Should return value instead of index. Maybe Enum should be used? - inp += 'model_type', gr.Dropdown(label="Model", - choices=['res101', 'dpt_beit_large_512 (midas 3.1)', - 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)', - 'dpt_hybrid_384 (midas 3.0)', - 'midas_v21', 'midas_v21_small', - 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'], - value='res101', - type="index") - with gr.Group(): - with gr.Row(): - inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True) - with gr.Group(visible=False) as options_depend_on_boost: - inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False) - with gr.Row(visible=False) as options_depend_on_match_size: - inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448) - inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448) - - with gr.Group(): - with gr.Row(): - inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) # 50% of width - with gr.Group(): # 50% of width - inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True) - inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False) - with gr.Row() as options_depend_on_output_depth_1: - inp += "combine_output", gr.Checkbox( - label="Combine input and depthmap into one image", value=False) - inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'], - value='Horizontal', type="index", visible=False) - with gr.Group(): - with gr.Row(): - inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False) - with gr.Row(visible=False) as clip_options_row_1: - inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0) - inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1) - - with gr.Group(): - with gr.Row(): - inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False) - # gr.Checkbox(label="Generate NormalMap", value=False) # TODO: this is a fake door - - with gr.Group(): - with gr.Row(): - inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False) - with gr.Group(visible=False) as stereo_options: - with gr.Row(): - with gr.Row(): - inp += "stereo_modes", gr.CheckboxGroup( - ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"], - label="Output", value=["left-right", "red-cyan-anaglyph"]) - with gr.Row(): - inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01, - label='Divergence (3D effect)', - value=2.5) - inp += "stereo_separation", gr.Slider(minimum=-5.0, maximum=5.0, step=0.01, - label='Separation (moves images apart)', - value=0.0) - with gr.Row(): - inp += "stereo_fill", gr.Dropdown(label="Gap fill technique", - choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', - 'polylines_sharp'], value='polylines_sharp', - type="value") - inp += "stereo_balance", gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, - label='Balance between eyes', - value=0.0) - - with gr.Group(): - with gr.Row(): - inp += "gen_mesh", gr.Checkbox( - label="Generate simple 3D mesh", value=False, visible=True) - with gr.Group(visible=False) as mesh_options: - with gr.Row(): - gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps") - with gr.Row(): - inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True) - inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True) - - if is_depth_tab: - with gr.Group(): - with gr.Row(): - inp += "inpaint", gr.Checkbox( - label="Generate 3D inpainted mesh", value=False) - with gr.Group(visible=False) as inpaint_options_row_0: - gr.HTML("Generation is sloooow, required for generating videos") - inp += "inpaint_vids", gr.Checkbox( - label="Generate 4 demo videos with 3D inpainted mesh.", value=False) - gr.HTML("More options for generating video can be found in the Generate video tab") - - with gr.Group(): - # TODO: it should be clear from the UI that the background removal does not use the model selected above - with gr.Row(): - inp += "background_removal", gr.Checkbox(label="Remove background", value=False) - with gr.Row(visible=False) as bgrem_options_row_1: - inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False) - inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False) - with gr.Row(visible=False) as bgrem_options_row_2: - inp += "background_removal_model", gr.Dropdown(label="Rembg Model", - choices=['u2net', 'u2netp', 'u2net_human_seg', - 'silueta'], - value='u2net', type="value") - - with gr.Box(): - gr.HTML(f"{SCRIPT_FULL_NAME}
") - gr.HTML("Information, comment and share @ " - "https://github.com/thygate/stable-diffusion-webui-depthmap-script") - - inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False) - - def update_delault_net_size(model_type): - w, h = ModelHolder.get_default_net_size(model_type) - return inp['net_width'].update(value=w), inp['net_height'].update(value=h) - inp['model_type'].change( - fn=update_delault_net_size, - inputs=inp['model_type'], - outputs=[inp['net_width'], inp['net_height']] - ) - - inp['boost'].change( - fn=lambda a, b: (options_depend_on_boost.update(visible=not a), - options_depend_on_match_size.update(visible=not a and not b)), - inputs=[inp['boost'], inp['match_size']], - outputs=[options_depend_on_boost, options_depend_on_match_size] - ) - inp['match_size'].change( - fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b), - inputs=[inp['boost'], inp['match_size']], - outputs=[options_depend_on_match_size] - ) - - inp['output_depth'].change( - fn=lambda a: (inp['invert_depth'].update(visible=a), options_depend_on_output_depth_1.update(visible=a)), - inputs=[inp['output_depth']], - outputs=[inp['invert_depth'], options_depend_on_output_depth_1] - ) - - inp['combine_output'].change( - fn=lambda v: inp['combine_output_axis'].update(visible=v), - inputs=[inp['combine_output']], - outputs=[inp['combine_output_axis']] - ) - - inp['clipdepth'].change( - fn=lambda v: clip_options_row_1.update(visible=v), - inputs=[inp['clipdepth']], - outputs=[clip_options_row_1] - ) - inp['clipthreshold_far'].change( - fn=lambda a, b: a if b < a else b, - inputs=[inp['clipthreshold_far'], inp['clipthreshold_near']], - outputs=[inp['clipthreshold_near']] - ) - inp['clipthreshold_near'].change( - fn=lambda a, b: a if b > a else b, - inputs=[inp['clipthreshold_near'], inp['clipthreshold_far']], - outputs=[inp['clipthreshold_far']] - ) - - def stereo_options_visibility(v): - return stereo_options.update(visible=v) - - inp['gen_stereo'].change( - fn=stereo_options_visibility, - inputs=[inp['gen_stereo']], - outputs=[stereo_options] - ) - - inp['gen_mesh'].change( - fn=lambda v: mesh_options.update(visible=v), - inputs=[inp['gen_mesh']], - outputs=[mesh_options] - ) - - def inpaint_options_visibility(v): - return inpaint_options_row_0.update(visible=v) - - if is_depth_tab: - inp['inpaint'].change( - fn=inpaint_options_visibility, - inputs=[inp['inpaint']], - outputs=[inpaint_options_row_0] - ) - - def background_removal_options_visibility(v): - return bgrem_options_row_1.update(visible=v), \ - bgrem_options_row_2.update(visible=v) - - inp['background_removal'].change( - fn=background_removal_options_visibility, - inputs=[inp['background_removal']], - outputs=[bgrem_options_row_1, bgrem_options_row_2] - ) - - return inp +from src.misc import * class Script(scripts.Script): @@ -241,12 +21,15 @@ def ui(self, is_img2img): gr.HTML() # Work around a Gradio bug with gr.Column(variant='panel'): gr.HTML() # Work around a Gradio bug - ret = main_ui_panel(False) + ret = common_ui.main_ui_panel(False) ret += ret.enkey_tail() return ret.enkey_body() # run from script in txt2img or img2img def run(self, p, *inputs): + from modules import processing + from modules.processing import create_infotext + inputs = GradioComponentBundle.enkey_to_dict(inputs) # sd process @@ -256,15 +39,15 @@ def run(self, p, *inputs): inputimages = [] for count in range(0, len(processed.images)): # skip first grid image - if count == 0 and len(processed.images) > 1 and opts.return_grid: + if count == 0 and len(processed.images) > 1 and shared.opts.return_grid: continue inputimages.append(processed.images[count]) - outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs) + outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops()) for input_i, imgs in enumerate(outputs): # get generation parameters - if hasattr(processed, 'all_prompts') and opts.enable_pnginfo: + if hasattr(processed, 'all_prompts') and shared.opts.enable_pnginfo: info = create_infotext(processed, processed.all_prompts, processed.all_seeds, processed.all_subseeds, "", 0, input_i) else: @@ -273,12 +56,12 @@ def run(self, p, *inputs): processed.images.append(image) if inputs["save_outputs"]: try: - suffix = "" if image_type == "depth" else f"_{image_type}" - images.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], - prompt=processed.all_prompts[input_i], extension=opts.samples_format, - info=info, - p=processed, - suffix=suffix) + suffix = "" if image_type == "depth" else f"{image_type}" + backbone.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], + prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format, + info=info, + p=processed, + suffix=suffix) except Exception as e: if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): raise e @@ -310,261 +93,6 @@ def on_ui_settings(): section=section)) -def on_ui_tabs(): - inp = GradioComponentBundle() - with gr.Blocks(analytics_enabled=False) as depthmap_interface: - with gr.Row().style(equal_height=False): - with gr.Column(variant='panel'): - inp += 'depthmap_mode', gr.HTML(visible=False, value='0') - with gr.Tabs(): - with gr.TabItem('Single Image') as depthmap_mode_0: - with gr.Row(): - inp += gr.Image(label="Source", source="upload", interactive=True, type="pil", - elem_id="depthmap_input_image") - with gr.Group(visible=False) as custom_depthmap_row_0: - # TODO: depthmap generation settings should disappear when using this - inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True, - type="file", elem_id='custom_depthmap_img') - inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False) - with gr.TabItem('Batch Process') as depthmap_mode_1: - inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple", - interactive=True, type="file") - with gr.TabItem('Batch from Directory') as depthmap_mode_2: - inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory", - **shared.hide_dirs, - placeholder="A directory on the same machine where the server is running.") - inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory", - **shared.hide_dirs, - placeholder="Leave blank to save images to the default path.") - gr.HTML("Files in the output directory may be overwritten.") - inp += gr.Checkbox(elem_id="depthmap_batch_reuse", - label="Skip generation and use (edited/custom) depthmaps " - "in output directory when a file already exists.", - value=True) - submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary') - inp += main_ui_panel(True) # Main panel is inserted here - unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels") - - with gr.Column(variant='panel'): - with gr.Tabs(elem_id="mode_depthmap_output"): - with gr.TabItem('Depth Output'): - with gr.Group(): - result_images = gr.Gallery(label='Output', show_label=False, - elem_id=f"depthmap_gallery").style(grid=4) - with gr.Column(): - html_info_x = gr.HTML() - html_info = gr.HTML() - - with gr.TabItem('3D Mesh'): - with gr.Group(): - result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0]) - with gr.Row(): - # loadmesh = gr.Button('Load') - clearmesh = gr.Button('Clear') - - with gr.TabItem('Generate video'): - # generate video - with gr.Group(): - with gr.Row(): - gr.Markdown("Generate video from inpainted(!) mesh.") - with gr.Row(): - depth_vid = gr.Video(interactive=False) - with gr.Column(): - vid_html_info_x = gr.HTML() - vid_html_info = gr.HTML() - fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **shared.hide_dirs, - placeholder="A file on the same machine where " - "the server is running.") - with gr.Row(): - vid_numframes = gr.Textbox(label="Number of frames", value="300") - vid_fps = gr.Textbox(label="Framerate", value="40") - vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4', - type="value", elem_id="video_format") - vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3', - type="value", elem_id="video_ssaa") - with gr.Row(): - vid_traj = gr.Dropdown(label="Trajectory", - choices=['straight-line', 'double-straight-line', 'circle'], - value='double-straight-line', type="index", - elem_id="video_trajectory") - vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05") - vid_border = gr.Textbox(label="Crop: top, left, bottom, right", - value="0.03, 0.03, 0.05, 0.03") - vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt") - with gr.Row(): - submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo", - variant='primary') - - inp += inp.enkey_tail() - - depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode']) - depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode']) - depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) - - def custom_depthmap_visibility(v): - return custom_depthmap_row_0.update(visible=v) - - inp['custom_depthmap'].change( - fn=custom_depthmap_visibility, - inputs=[inp['custom_depthmap']], - outputs=[custom_depthmap_row_0] - ) - - unloadmodels.click( - fn=unload_models, - inputs=[], - outputs=[] - ) - - clearmesh.click( - fn=lambda: None, - inputs=[], - outputs=[result_depthmesh] - ) - - submit.click( - fn=wrap_gradio_gpu_call(run_generate), - inputs=inp.enkey_body(), - outputs=[ - result_images, - fn_mesh, - result_depthmesh, - html_info_x, - html_info - ] - ) - - submit_vid.click( - fn=wrap_gradio_gpu_call(run_makevideo), - inputs=[ - fn_mesh, - vid_numframes, - vid_fps, - vid_traj, - vid_shift, - vid_border, - vid_dolly, - vid_format, - vid_ssaa - ], - outputs=[ - depth_vid, - vid_html_info_x, - vid_html_info - ] - ) - - return (depthmap_interface, "Depth", "depthmap_interface"), - - -# called from depth tab -def run_generate(*inputs): - inputs = GradioComponentBundle.enkey_to_dict(inputs) - depthmap_mode = inputs['depthmap_mode'] - depthmap_batch_input_dir = inputs['depthmap_batch_input_dir'] - image_batch = inputs['image_batch'] - depthmap_input_image = inputs['depthmap_input_image'] - depthmap_batch_output_dir = inputs['depthmap_batch_output_dir'] - depthmap_batch_reuse = inputs['depthmap_batch_reuse'] - custom_depthmap = inputs['custom_depthmap'] - custom_depthmap_img = inputs['custom_depthmap_img'] - - inputimages = [] - # Allow supplying custom depthmaps - inputdepthmaps = [] - # Also keep track of original file names - inputnames = [] - - if depthmap_mode == '2' and depthmap_batch_output_dir != '': - outpath = depthmap_batch_output_dir - else: - outpath = opts.outdir_samples or opts.outdir_extras_samples - - if depthmap_mode == '0': # Single image - if depthmap_input_image is None: - return [], None, None, "Please select an input image!", "" - inputimages.append(depthmap_input_image) - inputnames.append(None) - if custom_depthmap: - if custom_depthmap_img is None: - return [], None, None,\ - "Custom depthmap is not specified. Please either supply it or disable this option.", "" - inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name))) - else: - inputdepthmaps.append(None) - if depthmap_mode == '1': # Batch Process - if image_batch is None: - return [], None, None, "Please select input images!", "" - for img in image_batch: - image = Image.open(os.path.abspath(img.name)) - inputimages.append(image) - inputnames.append(os.path.splitext(img.orig_name)[0]) - elif depthmap_mode == '2': # Batch from Directory - assert not shared.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled' - if depthmap_batch_input_dir == '': - return [], None, None, "Please select an input directory.", "" - if depthmap_batch_input_dir == depthmap_batch_output_dir: - return [], None, None, "Please pick different directories for batch processing.", "" - image_list = shared.listfiles(depthmap_batch_input_dir) - for path in image_list: - try: - inputimages.append(Image.open(path)) - inputnames.append(path) - - custom_depthmap = None - if depthmap_batch_reuse: - basename = Path(path).stem - # Custom names are not used in samples directory - if outpath != opts.outdir_extras_samples: - # Possible filenames that the custom depthmaps may have - name_candidates = [f'{basename}-0000.{opts.samples_format}', # current format - f'{basename}.png', # human-intuitive format - f'{Path(path).name}'] # human-intuitive format (worse) - for fn_cand in name_candidates: - path_cand = os.path.join(outpath, fn_cand) - if os.path.isfile(path_cand): - custom_depthmap = Image.open(os.path.abspath(path_cand)) - break - inputdepthmaps.append(custom_depthmap) - except Exception as e: - print(f'Failed to load {path}, ignoring. Exception: {str(e)}') - inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None]) - print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused') - - outputs, mesh_fi, meshsimple_fi = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs) - show_images = [] - - # Saving images - for input_i, imgs in enumerate(outputs): - basename = 'depthmap' - if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != opts.outdir_extras_samples: - basename = Path(inputnames[input_i]).stem - - for image_type, image in list(imgs.items()): - show_images += [image] - if inputs["save_outputs"]: - try: - suffix = "" if image_type == "depth" else f"_{image_type}" - images.save_image(image, path=outpath, basename=basename, seed=None, - prompt=None, extension=opts.samples_format, short_filename=True, - no_prompt=True, grid=False, pnginfo_section_name="extras", - suffix=suffix) - except Exception as e: - if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): - raise e - print('Catched exception: image has wrong mode!') - traceback.print_exc() - - # use inpainted 3d mesh to show in 3d model output when enabled in settings - if hasattr(opts, 'depthmap_script_show_3d_inpaint') and opts.depthmap_script_show_3d_inpaint \ - and mesh_fi is not None and len(mesh_fi) > 0: - meshsimple_fi = mesh_fi - # however, don't show 3dmodel when disabled in settings - if hasattr(opts, 'depthmap_script_show_3d') and not opts.depthmap_script_show_3d: - meshsimple_fi = None - # TODO: return more info - return show_images, mesh_fi, meshsimple_fi, plaintext_to_html('info'), '' - - +from modules import script_callbacks script_callbacks.on_ui_settings(on_ui_settings) -script_callbacks.on_ui_tabs(on_ui_tabs) +script_callbacks.on_ui_tabs(lambda: [(common_ui.on_ui_tabs(), "Depth", "depthmap_interface")]) diff --git a/src/backbone.py b/src/backbone.py new file mode 100644 index 0000000..0829ce0 --- /dev/null +++ b/src/backbone.py @@ -0,0 +1,118 @@ +# DepthMap can be run inside stable-duiffusion-webui, but also separately. +# All the stable-duiffusion-webui stuff that the DepthMap relies on +# must be resided in this file (or in the scripts folder). +import pathlib +from datetime import datetime + +try: + # stable-duiffusion-webui backbone + from modules.images import save_image # Should fail if not on stable-duiffusion-webui + from modules.devices import torch_gc # TODO: is this really sufficient? + from modules.images import get_next_sequence_number + from modules.call_queue import wrap_gradio_gpu_call + from modules.shared import listfiles + + def get_opt(name, default): + from modules.shared import opts + if hasattr(opts, name): + return opts.__getattr__(name) + return default + + def get_cmd_opt(name, default): + """Get command line argument""" + from modules.shared import cmd_opts + if hasattr(cmd_opts, name): + return cmd_opts.__getattribute__(name) + return default + + def gather_ops(): + """Parameters for depthmap generation""" + from modules.shared import cmd_opts + ops = {} + if get_opt('depthmap_script_boost_rmax', None) is not None: + ops['boost_whole_size_threshold'] = get_opt('depthmap_script_boost_rmax', None) + ops['precision'] = cmd_opts.precision + ops['no_half'] = cmd_opts.no_half + return ops + + + def get_outpath(): + """Get path where results are saved by default""" + path = get_opt('outdir_samples', None) + if path is None or len(path) == 0: + path = get_opt('outdir_extras_samples', None) + assert path is not None and len(path) > 0 + return path + + + def unload_sd_model(): + from modules import shared, devices + if shared.sd_model is not None: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + + def reload_sd_model(): + from modules import shared, devices + if shared.sd_model is not None: + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + def get_hide_dirs(): + import modules.shared + return modules.shared.hide_dirs +except: + # Standalone backbone + print( # " DepthMap did not detect stable-duiffusion-webui; launching with the standalone backbone.\n" + " The standalone mode is not on par with the stable-duiffusion-webui mode.\n" + " Some features may be missing or work differently. Please report bugs.\n") + + def save_image(image, path, basename, **kwargs): + import os + os.makedirs(path, exist_ok=True) + if 'suffix' not in kwargs or len(kwargs['suffix']) == 0: + kwargs['suffix'] = '' + else: + kwargs['suffix'] = f"-{kwargs['suffix']}" + format = get_opt('samples_format', kwargs['extension']) + fullfn = os.path.join( + path, f"{basename}-{get_next_sequence_number(path, basename)}{kwargs['suffix']}.{format}") + image.save(fullfn, format=format) + + def torch_gc(): + # TODO: is this really sufficient? + import torch + if torch.cuda.is_available(): + with torch.cuda.device('cuda'): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + launched_at = int(datetime.now().timestamp()) + backbone_current_seq_number = 0 + + def get_next_sequence_number(outpath=None, basename=None): + global backbone_current_seq_number + backbone_current_seq_number += 1 + return int(f"{launched_at}{backbone_current_seq_number:04}") + + def wrap_gradio_gpu_call(f): return f # Displaying various stats is not supported + + def listfiles(dirname): + import os + filenames = [os.path.join(dirname, x) for x in sorted(os.listdir(dirname)) if not x.startswith(".")] + return [file for file in filenames if os.path.isfile(file)] + + def get_opt(name, default): return default # Configuring is not supported + + + def get_cmd_opt(name, default): return default # Configuring is not supported + + def gather_ops(): return {} # Configuring is not supported + + def get_outpath(): return str(pathlib.Path('.', 'outputs')) + + def unload_sd_model(): pass # Not needed + + def reload_sd_model(): pass # Not needed + + def get_hide_dirs(): return {} # Directories will not be hidden from traversal (except when starts with the dot) diff --git a/src/common_ui.py b/src/common_ui.py new file mode 100644 index 0000000..68583d3 --- /dev/null +++ b/src/common_ui.py @@ -0,0 +1,491 @@ +import traceback +from pathlib import Path +import gradio as gr +from PIL import Image + +from src import backbone +from src.core import core_generation_funnel, unload_models, run_makevideo +from src.depthmap_generation import ModelHolder +from src.gradio_args_transport import GradioComponentBundle +from src.misc import * + + +# Ugly workaround to fix gradio tempfile issue +def ensure_gradio_temp_directory(): + try: + import tempfile + path = os.path.join(tempfile.gettempdir(), 'gradio') + if not (os.path.exists(path)): + os.mkdir(path) + except Exception as e: + traceback.print_exc() + + +ensure_gradio_temp_directory() + + +def main_ui_panel(is_depth_tab): + inp = GradioComponentBundle() + # TODO: Greater visual separation + with gr.Blocks(): + with gr.Row(): + inp += 'compute_device', gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU') + # TODO: Should return value instead of index. Maybe Enum should be used? + inp += 'model_type', gr.Dropdown(label="Model", + choices=['res101', 'dpt_beit_large_512 (midas 3.1)', + 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)', + 'dpt_hybrid_384 (midas 3.0)', + 'midas_v21', 'midas_v21_small', + 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'], + value='res101', + type="index") + with gr.Box(): + with gr.Row(): + inp += 'boost', gr.Checkbox(label="BOOST (multi-resolution merging)", value=True) + inp += 'match_size', gr.Checkbox(label="Match net size to input size", value=False) + with gr.Row(visible=False) as options_depend_on_match_size: + inp += 'net_width', gr.Slider(minimum=64, maximum=2048, step=64, label='Net width', value=448) + inp += 'net_height', gr.Slider(minimum=64, maximum=2048, step=64, label='Net height', value=448) + + with gr.Box(): + with gr.Row(): + with gr.Group(): + inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) # 50% of width + with gr.Group(): # 50% of width + inp += "output_depth", gr.Checkbox(label="Output DepthMap", value=True) + inp += "invert_depth", gr.Checkbox(label="Invert (black=near, white=far)", value=False) + with gr.Row() as options_depend_on_output_depth_1: + inp += "combine_output", gr.Checkbox( + label="Combine input and depthmap into one image", value=False) + inp += "combine_output_axis", gr.Radio(label="Combine axis", choices=['Vertical', 'Horizontal'], + value='Horizontal', type="index", visible=False) + with gr.Box(): + with gr.Row(): + inp += 'clipdepth', gr.Checkbox(label="Clip and renormalize DepthMap", value=False) + with gr.Row(visible=False) as clip_options_row_1: + inp += "clipthreshold_far", gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip', value=0) + inp += "clipthreshold_near", gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip', value=1) + + with gr.Box(): + with gr.Row(): + inp += "show_heat", gr.Checkbox(label="Generate HeatMap", value=False) + # gr.Checkbox(label="Generate NormalMap", value=False) # TODO: this is a fake door + + with gr.Box(): + with gr.Row(): + inp += "gen_stereo", gr.Checkbox(label="Generate stereoscopic image(s)", value=False) + with gr.Column(visible=False) as stereo_options: + with gr.Row(): + inp += "stereo_modes", gr.CheckboxGroup( + ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"], + label="Output", value=["left-right", "red-cyan-anaglyph"]) + with gr.Row(): + inp += "stereo_divergence", gr.Slider(minimum=0.05, maximum=10.005, step=0.01, + label='Divergence (3D effect)', + value=2.5) + inp += "stereo_separation", gr.Slider(minimum=-5.0, maximum=5.0, step=0.01, + label='Separation (moves images apart)', + value=0.0) + with gr.Row(): + inp += "stereo_fill", gr.Dropdown(label="Gap fill technique", + choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', + 'polylines_sharp'], value='polylines_sharp', + type="value") + inp += "stereo_balance", gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, + label='Balance between eyes', + value=0.0) + + with gr.Box(): + with gr.Column(): + inp += "gen_mesh", gr.Checkbox( + label="Generate simple 3D mesh", value=False, visible=True) + with gr.Column(visible=False) as mesh_options: + with gr.Row(): + gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps") + with gr.Row(): + inp += "mesh_occlude", gr.Checkbox(label="Remove occluded edges", value=True, visible=True) + inp += "mesh_spherical", gr.Checkbox(label="Equirectangular projection", value=False, visible=True) + + if is_depth_tab: + with gr.Box(): + with gr.Column(): + inp += "inpaint", gr.Checkbox( + label="Generate 3D inpainted mesh", value=False) + with gr.Column(visible=False) as inpaint_options_row_0: + gr.HTML("Generation is sloooow, required for generating videos") + inp += "inpaint_vids", gr.Checkbox( + label="Generate 4 demo videos with 3D inpainted mesh.", value=False) + gr.HTML("More options for generating video can be found in the Generate video tab") + + with gr.Box(): + # TODO: it should be clear from the UI that there is an option of the background removal + # that does not use the model selected above + with gr.Row(): + inp += "background_removal", gr.Checkbox(label="Remove background", value=False) + with gr.Column(visible=False) as bgrem_options: + with gr.Row(): + inp += "save_background_removal_masks", gr.Checkbox(label="Save the foreground masks", value=False) + inp += "pre_depth_background_removal", gr.Checkbox(label="Pre-depth background removal", value=False) + with gr.Row(): + inp += "background_removal_model", gr.Dropdown(label="Rembg Model", + choices=['u2net', 'u2netp', 'u2net_human_seg', + 'silueta'], + value='u2net', type="value") + + with gr.Box(): + gr.HTML(f"{SCRIPT_FULL_NAME}
") + gr.HTML("Information, comment and share @ " + "https://github.com/thygate/stable-diffusion-webui-depthmap-script") + + inp += "gen_normal", gr.Checkbox(label="Generate Normalmap (hidden! api only)", value=False, visible=False) + + def update_delault_net_size(model_type): + w, h = ModelHolder.get_default_net_size(model_type) + return inp['net_width'].update(value=w), inp['net_height'].update(value=h) + + inp['model_type'].change( + fn=update_delault_net_size, + inputs=inp['model_type'], + outputs=[inp['net_width'], inp['net_height']] + ) + + inp['boost'].change( + fn=lambda a, b: (inp['match_size'].update(visible=not a), + options_depend_on_match_size.update(visible=not a and not b)), + inputs=[inp['boost'], inp['match_size']], + outputs=[inp['match_size'], options_depend_on_match_size] + ) + inp['match_size'].change( + fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b), + inputs=[inp['boost'], inp['match_size']], + outputs=[options_depend_on_match_size] + ) + + inp['output_depth'].change( + fn=lambda a: (inp['invert_depth'].update(visible=a), options_depend_on_output_depth_1.update(visible=a)), + inputs=[inp['output_depth']], + outputs=[inp['invert_depth'], options_depend_on_output_depth_1] + ) + + inp['combine_output'].change( + fn=lambda v: inp['combine_output_axis'].update(visible=v), + inputs=[inp['combine_output']], + outputs=[inp['combine_output_axis']] + ) + + inp['clipdepth'].change( + fn=lambda v: clip_options_row_1.update(visible=v), + inputs=[inp['clipdepth']], + outputs=[clip_options_row_1] + ) + inp['clipthreshold_far'].change( + fn=lambda a, b: a if b < a else b, + inputs=[inp['clipthreshold_far'], inp['clipthreshold_near']], + outputs=[inp['clipthreshold_near']] + ) + inp['clipthreshold_near'].change( + fn=lambda a, b: a if b > a else b, + inputs=[inp['clipthreshold_near'], inp['clipthreshold_far']], + outputs=[inp['clipthreshold_far']] + ) + + inp['gen_stereo'].change( + fn=lambda v: stereo_options.update(visible=v), + inputs=[inp['gen_stereo']], + outputs=[stereo_options] + ) + + inp['gen_mesh'].change( + fn=lambda v: mesh_options.update(visible=v), + inputs=[inp['gen_mesh']], + outputs=[mesh_options] + ) + + if is_depth_tab: + inp['inpaint'].change( + fn=lambda v: inpaint_options_row_0.update(visible=v), + inputs=[inp['inpaint']], + outputs=[inpaint_options_row_0] + ) + + inp['background_removal'].change( + fn=lambda v: bgrem_options.update(visible=v), + inputs=[inp['background_removal']], + outputs=[bgrem_options] + ) + + return inp + +def open_folder_action(): + # Adapted from stable-diffusion-webui + f = backbone.get_outpath() + if backbone.get_cmd_opt('hide_ui_dir_config', False): + return + if not os.path.exists(f) or not os.path.isdir(f): + raise "Couldn't open output folder" # .isdir is security-related, do not remove! + import platform + import subprocess as sp + path = os.path.normpath(f) + if platform.system() == "Windows": + os.startfile(path) + elif platform.system() == "Darwin": + sp.Popen(["open", path]) + elif "microsoft-standard-WSL2" in platform.uname().release: + sp.Popen(["wsl-open", path]) + else: + sp.Popen(["xdg-open", path]) + +def on_ui_tabs(): + inp = GradioComponentBundle() + with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface: + with gr.Row().style(equal_height=False): + with gr.Column(variant='panel'): + inp += 'depthmap_mode', gr.HTML(visible=False, value='0') + with gr.Tabs(): + with gr.TabItem('Single Image') as depthmap_mode_0: + with gr.Group(): + with gr.Row(): + inp += gr.Image(label="Source", source="upload", interactive=True, type="pil", + elem_id="depthmap_input_image") + # TODO: depthmap generation settings should disappear when using this + inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True, + type="file", elem_id='custom_depthmap_img', visible=False) + inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False) + with gr.TabItem('Batch Process') as depthmap_mode_1: + inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple", + interactive=True, type="file") + with gr.TabItem('Batch from Directory') as depthmap_mode_2: + inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory", + **backbone.get_hide_dirs(), + placeholder="A directory on the same machine where the server is running.") + inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory", + **backbone.get_hide_dirs(), + placeholder="Leave blank to save images to the default path.") + gr.HTML("Files in the output directory may be overwritten.") + inp += gr.Checkbox(elem_id="depthmap_batch_reuse", + label="Skip generation and use (edited/custom) depthmaps " + "in output directory when a file already exists.", + value=True) + submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary') + inp += main_ui_panel(True) # Main panel is inserted here + unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels") + + with gr.Column(variant='panel'): + with gr.Tabs(elem_id="mode_depthmap_output"): + with gr.TabItem('Depth Output'): + with gr.Group(): + result_images = gr.Gallery(label='Output', show_label=False, + elem_id=f"depthmap_gallery").style(grid=4) + with gr.Column(): + html_info = gr.HTML() + folder_symbol = '\U0001f4c2' # 📂 + gr.Button(folder_symbol, visible=not backbone.get_cmd_opt('hide_ui_dir_config', False)).click( + fn=lambda: open_folder_action(), inputs=[], outputs=[], + ) + + with gr.TabItem('3D Mesh'): + with gr.Group(): + result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0]) + with gr.Row(): + # loadmesh = gr.Button('Load') + clearmesh = gr.Button('Clear') + + with gr.TabItem('Generate video'): + # generate video + with gr.Group(): + with gr.Row(): + gr.Markdown("Generate video from inpainted(!) mesh.") + with gr.Row(): + depth_vid = gr.Video(interactive=False) + with gr.Column(): + vid_html_info_x = gr.HTML() + vid_html_info = gr.HTML() + fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **backbone.get_hide_dirs(), + placeholder="A file on the same machine where " + "the server is running.") + with gr.Row(): + vid_numframes = gr.Textbox(label="Number of frames", value="300") + vid_fps = gr.Textbox(label="Framerate", value="40") + vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4', + type="value", elem_id="video_format") + vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3', + type="value", elem_id="video_ssaa") + with gr.Row(): + vid_traj = gr.Dropdown(label="Trajectory", + choices=['straight-line', 'double-straight-line', 'circle'], + value='double-straight-line', type="index", + elem_id="video_trajectory") + vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05") + vid_border = gr.Textbox(label="Crop: top, left, bottom, right", + value="0.03, 0.03, 0.05, 0.03") + vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt") + with gr.Row(): + submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo", + variant='primary') + + + inp += inp.enkey_tail() + + depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode']) + depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode']) + depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) + + inp['custom_depthmap'].change( + fn=lambda v: inp['custom_depthmap_img'].update(visible=v), + inputs=[inp['custom_depthmap']], + outputs=[inp['custom_depthmap_img']] + ) + + unloadmodels.click( + fn=unload_models, + inputs=[], + outputs=[] + ) + + clearmesh.click( + fn=lambda: None, + inputs=[], + outputs=[result_depthmesh] + ) + + submit.click( + fn=backbone.wrap_gradio_gpu_call(run_generate), + inputs=inp.enkey_body(), + outputs=[ + result_images, + fn_mesh, + result_depthmesh, + html_info + ] + ) + + submit_vid.click( + fn=backbone.wrap_gradio_gpu_call(run_makevideo), + inputs=[ + fn_mesh, + vid_numframes, + vid_fps, + vid_traj, + vid_shift, + vid_border, + vid_dolly, + vid_format, + vid_ssaa + ], + outputs=[ + depth_vid, + vid_html_info_x, + vid_html_info + ] + ) + + return depthmap_interface + + +def run_generate(*inputs): + inputs = GradioComponentBundle.enkey_to_dict(inputs) + depthmap_mode = inputs['depthmap_mode'] + depthmap_batch_input_dir = inputs['depthmap_batch_input_dir'] + image_batch = inputs['image_batch'] + depthmap_input_image = inputs['depthmap_input_image'] + depthmap_batch_output_dir = inputs['depthmap_batch_output_dir'] + depthmap_batch_reuse = inputs['depthmap_batch_reuse'] + custom_depthmap = inputs['custom_depthmap'] + custom_depthmap_img = inputs['custom_depthmap_img'] + + inputimages = [] + # Allow supplying custom depthmaps + inputdepthmaps = [] + # Also keep track of original file names + inputnames = [] + + if depthmap_mode == '2' and depthmap_batch_output_dir != '': + outpath = depthmap_batch_output_dir + else: + outpath = backbone.get_outpath() + + if depthmap_mode == '0': # Single image + if depthmap_input_image is None: + return [], None, None, "Please select an input image" + inputimages.append(depthmap_input_image) + inputnames.append(None) + if custom_depthmap: + if custom_depthmap_img is None: + return [], None, None, \ + "Custom depthmap is not specified. Please either supply it or disable this option." + inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name))) + else: + inputdepthmaps.append(None) + if depthmap_mode == '1': # Batch Process + if image_batch is None: + return [], None, None, "Please select input images", "" + for img in image_batch: + image = Image.open(os.path.abspath(img.name)) + inputimages.append(image) + inputnames.append(os.path.splitext(img.orig_name)[0]) + elif depthmap_mode == '2': # Batch from Directory + assert not backbone.get_cmd_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled' + if depthmap_batch_input_dir == '': + return [], None, None, "Please select an input directory." + if depthmap_batch_input_dir == depthmap_batch_output_dir: + return [], None, None, "Please pick different directories for batch processing." + image_list = backbone.listfiles(depthmap_batch_input_dir) + for path in image_list: + try: + inputimages.append(Image.open(path)) + inputnames.append(path) + + custom_depthmap = None + if depthmap_batch_reuse: + basename = Path(path).stem + # Custom names are not used in samples directory + if outpath != backbone.get_opt('outdir_extras_samples', None): + # Possible filenames that the custom depthmaps may have + name_candidates = [f'{basename}-0000.{backbone.get_opt("samples_format", "png")}', # current format + f'{basename}.png', # human-intuitive format + f'{Path(path).name}'] # human-intuitive format (worse) + for fn_cand in name_candidates: + path_cand = os.path.join(outpath, fn_cand) + if os.path.isfile(path_cand): + custom_depthmap = Image.open(os.path.abspath(path_cand)) + break + inputdepthmaps.append(custom_depthmap) + except Exception as e: + print(f'Failed to load {path}, ignoring. Exception: {str(e)}') + inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None]) + print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused') + + outputs, fn_mesh, display_mesh = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops()) + + # Saving images + show_images = [] + for input_i, imgs in enumerate(outputs): + basename = 'depthmap' + if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.get_opt('outdir_extras_samples', None): + basename = Path(inputnames[input_i]).stem + + for image_type, image in list(imgs.items()): + show_images += [image] + if inputs["save_outputs"]: + try: + suffix = "" if image_type == "depth" else f"{image_type}" + backbone.save_image(image, path=outpath, basename=basename, seed=None, + prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True, + no_prompt=True, grid=False, pnginfo_section_name="extras", + suffix=suffix) + except Exception as e: + if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): + raise e + print('Catched exception: image has wrong mode!') + traceback.print_exc() + + display_mesh = None + # use inpainted 3d mesh to show in 3d model output when enabled in settings + if backbone.get_opt('depthmap_script_show_3d_inpaint', True) and fn_mesh is not None and len(fn_mesh) > 0: + display_mesh = fn_mesh + # however, don't show 3dmodel when disabled in settings + if not backbone.get_opt('depthmap_script_show_3d', True): + display_mesh = None + # TODO: return more info + return show_images, fn_mesh, display_mesh, 'Generated!' diff --git a/src/core.py b/src/core.py index ccc3e6e..32a81b1 100644 --- a/src/core.py +++ b/src/core.py @@ -1,11 +1,6 @@ from pathlib import Path - from PIL import Image -from modules import shared, devices -from modules.images import get_next_sequence_number -from modules.shared import opts, cmd_opts - try: from tqdm import trange except: @@ -21,9 +16,10 @@ import traceback # Our code -from src.main import * +from src.misc import * from src.stereoimage_generation import create_stereoimages from src.depthmap_generation import ModelHolder +from src import backbone # 3d-photo-inpainting imports from inpaint.mesh import write_mesh, read_mesh, output_3d_photo @@ -47,19 +43,7 @@ def convert_i16_to_rgb(image, like): return output -def unload_sd_model(): - if shared.sd_model is not None: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) - - -def reload_sd_model(): - if shared.sd_model is not None: - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - -def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp): +def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None): if len(inputimages) == 0 or inputimages[0] is None: return [], '', '' if inputdepthmaps is None or len(inputdepthmaps) == 0: @@ -97,10 +81,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp stereo_modes = inp["stereo_modes"] stereo_separation = inp["stereo_separation"] + if ops is None: + ops = {} + model_holder.update_settings(**ops) + # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure print(SCRIPT_FULL_NAME) - unload_sd_model() + backbone.unload_sd_model() # TODO: this still should not be here background_removed_images = [] @@ -306,7 +294,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp else: raise e finally: - if hasattr(opts, 'depthmap_script_keepmodels') and opts.depthmap_script_keepmodels: + if backbone.get_opt('depthmap_script_keepmodels', True): model_holder.offload() # Swap to CPU memory else: if 'model' in locals(): @@ -316,7 +304,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp model_holder.unload_models() gc.collect() - devices.torch_gc() + backbone.torch_gc() # TODO: This should not be here mesh_fi = None @@ -326,14 +314,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp except Exception as e: print(f'{str(e)}, some issue with generating inpainted mesh') - reload_sd_model() + backbone.reload_sd_model() print("All done.\n") return generated_images, mesh_fi, meshsimple_fi def get_uniquefn(outpath, basename, ext): # Inefficient and may fail, maybe use unbounded binary search? - basecount = get_next_sequence_number(outpath, basename) + basecount = backbone.get_next_sequence_number(outpath, basename) if basecount > 0: basecount = basecount - 1 fullfn = None for i in range(500): @@ -401,10 +389,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v config['repeat_inpaint_edge'] = True config['ply_fmt'] = "bin" - config['save_ply'] = False - if hasattr(opts, 'depthmap_script_save_ply') and opts.depthmap_script_save_ply: - config['save_ply'] = True - + config['save_ply'] = backbone.get_opt('depthmap_script_save_ply', False) config['save_obj'] = True if device == torch.device("cpu"): @@ -471,7 +456,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v [-0.05, -0.05, -0.05, -0.05], ['dolly-zoom-in', 'zoom-in', 'circle', 'swing'], False, vid_format, vid_ssaa) - devices.torch_gc() + backbone.torch_gc() finally: del rgb_model @@ -480,7 +465,7 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, inpaint_vids, v depth_edge_model = None del depth_feat_model depth_feat_model = None - devices.torch_gc() + backbone.torch_gc() return mesh_fi @@ -602,9 +587,9 @@ def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_bord # output path and filename mess .. basename = Path(fn_mesh).stem - outpath = opts.outdir_samples or opts.outdir_extras_samples + outpath = backbone.get_outpath() # unique filename - basecount = get_next_sequence_number(outpath, basename) + basecount = backbone.get_next_sequence_number(outpath, basename) if basecount > 0: basecount = basecount - 1 fullfn = None for i in range(500): @@ -697,9 +682,7 @@ def depth_edges_mask(depth): def create_mesh(image, depth, keep_edges=False, spherical=False): import trimesh from dzoedepth.utils.geometry import depth_to_points, create_triangles - maxsize = 1024 - if hasattr(opts, 'depthmap_script_mesh_maxsize'): - maxsize = opts.depthmap_script_mesh_maxsize + maxsize = backbone.get_opt('depthmap_script_mesh_maxsize', 2048) # limit the size of the input image image.thumbnail((maxsize, maxsize)) diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py index 6812d81..0ea4a37 100644 --- a/src/depthmap_generation.py +++ b/src/depthmap_generation.py @@ -1,42 +1,36 @@ +import gc +import os.path from operator import getitem -from PIL import Image -from torchvision.transforms import Compose, transforms - -# TODO: depthmap_generation should not depend on WebUI -from modules import shared, devices -from modules.shared import opts, cmd_opts - -import torch, gc import cv2 -import os.path import numpy as np import skimage.measure - -# Our code -from src.main import * +from PIL import Image +import torch +from torchvision.transforms import Compose, transforms # midas imports from dmidas.dpt_depth import DPTDepthModel from dmidas.midas_net import MidasNet from dmidas.midas_net_custom import MidasNet_small from dmidas.transforms import Resize, NormalizeImage, PrepareForNet - +# zoedepth +from dzoedepth.models.builder import build_model +from dzoedepth.utils.config import get_config # AdelaiDepth/LeReS imports from lib.multi_depth_model_woauxi import RelDepthModel from lib.net_tools import strip_prefix_if_present - +from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel # pix2pix/merge net imports from pix2pix.options.test_options import TestOptions -from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel -# zoedepth -from dzoedepth.models.builder import build_model -from dzoedepth.utils.config import get_config +# Our code +from src.misc import * +from src import backbone -global device +global depthmap_device -class ModelHolder(): +class ModelHolder: def __init__(self): self.depth_model = None self.pix2pix_model = None @@ -48,6 +42,20 @@ def __init__(self): self.resize_mode = None self.normalization = None + # Settings (initialized to sensible values, should be updated) + self.boost_whole_size_threshold = 1600 # R_max from the paper by default + self.no_half = False + self.precision = "autocast" + + def update_settings(self, boost_whole_size_threshold=None, no_half=None, precision=None): + if boost_whole_size_threshold is not None: + self.boost_whole_size_threshold = boost_whole_size_threshold + if no_half is not None: + self.no_half = no_half + if precision is not None: + self.precision = precision + + def ensure_models(self, model_type, device: torch.device, boost: bool): # TODO: could make it more granular if model_type == -1 or model_type is None: @@ -75,7 +83,6 @@ def load_models(self, model_type, device: torch.device, boost: bool): resize_mode = "minimal" normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) - # TODO: net_w, net_h model = None if model_type == 0: # "res101" model_path = f"{model_dir}/res101.pth" @@ -93,7 +100,7 @@ def load_models(self, model_type, device: torch.device, boost: bool): model = RelDepthModel(backbone='resnext101') model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), strict=True) del checkpoint - devices.torch_gc() + backbone.torch_gc() if model_type == 1: # "dpt_beit_large_512" midas 3.1 model_path = f"{model_dir}/dpt_beit_large_512.pt" @@ -190,8 +197,8 @@ def load_models(self, model_type, device: torch.device, boost: bool): model.eval() # prepare for evaluation # optimize if device == torch.device("cuda") and model_type in [0, 1, 2, 3, 4, 5, 6]: - model = model.to(memory_format=torch.channels_last) - if not cmd_opts.no_half and model_type != 0 and not boost: # TODO: zoedepth, too? + model = model.to(memory_format=torch.channels_last) # TODO: weird + if not self.no_half and model_type != 0 and not boost: # TODO: zoedepth, too? model = model.half() model.to(device) # to correct device @@ -217,11 +224,10 @@ def load_models(self, model_type, device: torch.device, boost: bool): self.pix2pix_model.load_networks('latest') self.pix2pix_model.eval() - devices.torch_gc() + backbone.torch_gc() @staticmethod def get_default_net_size(model_type): - # TODO: fill in, use in the GUI sizes = { 0: [448, 448], 1: [512, 512], @@ -264,7 +270,7 @@ def unload_models(self): del self.pix2pix_model self.pix2pix_model = None gc.collect() - devices.torch_gc() + backbone.torch_gc() self.depth_model_type = None self.device = None @@ -272,9 +278,8 @@ def unload_models(self): def get_raw_prediction(self, input, net_width, net_height): """Get prediction from the model currently loaded by the ModelHolder object. If boost is enabled, net_width and net_height will be ignored.""" - # TODO: supply net size for zoedepth - global device - device = self.device + global depthmap_device + depthmap_device = self.device # input image img = cv2.cvtColor(np.asarray(input), cv2.COLOR_BGR2RGB) / 255.0 # compute depthmap @@ -285,9 +290,11 @@ def get_raw_prediction(self, input, net_width, net_height): raw_prediction = estimatezoedepth(input, self.depth_model, net_width, net_height) else: raw_prediction = estimatemidas(img, self.depth_model, net_width, net_height, - self.resize_mode, self.normalization) + self.resize_mode, self.normalization, self.no_half, + self.precision == "autocast") else: - raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model) + raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model, + self.boost_whole_size_threshold) raw_prediction_invert = self.depth_model_type in [0, 7, 8, 9] return raw_prediction, raw_prediction_invert @@ -300,7 +307,7 @@ def estimateleres(img, model, w, h): # compute with torch.no_grad(): - if device == torch.device("cuda"): + if depthmap_device == torch.device("cuda"): img_torch = img_torch.cuda() prediction = model.depth_model(img_torch) @@ -332,7 +339,7 @@ def scale_torch(img): def estimatezoedepth(img, model, w, h): # x = transforms.ToTensor()(img).unsqueeze(0) # x = x.type(torch.float32) - # x.to(device) + # x.to(depthmap_device) # prediction = model.infer(x) model.core.prep.resizer._Resize__width = w model.core.prep.resizer._Resize__height = h @@ -341,7 +348,7 @@ def estimatezoedepth(img, model, w, h): return prediction -def estimatemidas(img, model, w, h, resize_mode, normalization): +def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precision_is_autocast): import contextlib # init transform transform = Compose( @@ -364,13 +371,13 @@ def estimatemidas(img, model, w, h, resize_mode, normalization): img_input = transform({"image": img})["image"] # compute - precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" and device == torch.device( + precision_scope = torch.autocast if precision_is_autocast and depthmap_device == torch.device( "cuda") else contextlib.nullcontext with torch.no_grad(), precision_scope("cuda"): - sample = torch.from_numpy(img_input).to(device).unsqueeze(0) - if device == torch.device("cuda"): + sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0) + if depthmap_device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) - if not cmd_opts.no_half: + if not no_half: sample = sample.half() prediction = model.forward(sample) prediction = ( @@ -600,12 +607,8 @@ def parse(self): return self.opt -def estimateboost(img, model, model_type, pix2pixmodel): - pix2pixsize = 1024 # TODO: to setting? - whole_size_threshold = 1600 # R_max from the paper # TODO: to setting? - # get settings - if hasattr(opts, 'depthmap_script_boost_rmax'): - whole_size_threshold = opts.depthmap_script_boost_rmax +def estimateboost(img, model, model_type, pix2pixmodel, whole_size_threshold): + pix2pixsize = 1024 # TODO: pix2pixsize and whole_size_threshold to setting? if model_type == 0: # leres net_receptive_field_size = 448 @@ -618,7 +621,7 @@ def estimateboost(img, model, model_type, pix2pixmodel): patch_netsize = 2 * net_receptive_field_size gc.collect() - devices.torch_gc() + backbone.torch_gc() # Generate mask used to smoothly blend the local pathc estimations to the base estimate. # It is arbitrarily large to avoid artifacts during rescaling for each crop. @@ -1024,8 +1027,8 @@ def estimatemidasBoost(img, model, w, h): # compute with torch.no_grad(): - sample = torch.from_numpy(img_input).to(device).unsqueeze(0) - if device == torch.device("cuda"): + sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0) + if depthmap_device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) prediction = model.forward(sample) diff --git a/src/main.py b/src/misc.py similarity index 60% rename from src/main.py rename to src/misc.py index d3fed1d..875211f 100644 --- a/src/main.py +++ b/src/misc.py @@ -1,27 +1,36 @@ import subprocess import os import pathlib -import torch +import builtins def get_commit_hash(): - try: + def call_git(dir): return subprocess.check_output( [os.environ.get("GIT", "git"), "rev-parse", "HEAD"], - cwd=pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/'), - shell=False, - stderr=subprocess.DEVNULL, - encoding='utf8').strip()[0:8] + cwd=dir, shell=False, stderr=subprocess.DEVNULL, encoding='utf8').strip()[0:8] + + try: + file_path = pathlib.Path(__file__) + path = file_path.parts + while len(path) > 0 and path[-1] != REPOSITORY_NAME: + path = path[:-1] + if len(path) >= 2 and path[-1] == REPOSITORY_NAME and path[-2] == "extensions": + return call_git(str(pathlib.Path(*path))) + + return call_git(pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/')) except Exception: return "" +REPOSITORY_NAME = "stable-diffusion-webui-depthmap-script" SCRIPT_NAME = "DepthMap" -SCRIPT_VERSION = "v0.4.0" +SCRIPT_VERSION = "v0.4.1" SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})" def ensure_file_downloaded(filename, url, sha256_hash_prefix=None): - # Do not check the hash every time - it is somewhat time-consuming + import torch + # Do not check the hash every time - it is somewhat time-consumin if os.path.exists(filename): return