From 3d148294ad51b52915be84d24c5ef537333a2f93 Mon Sep 17 00:00:00 2001 From: Semjon Kravtsenko Date: Mon, 24 Jul 2023 13:27:28 +0300 Subject: [PATCH 01/16] Remove code duplication, allow more settings --- scripts/depthmap_api.py | 103 +++++++--------------------------------- 1 file changed, 16 insertions(+), 87 deletions(-) diff --git a/scripts/depthmap_api.py b/scripts/depthmap_api.py index a70a859..c66a42d 100644 --- a/scripts/depthmap_api.py +++ b/scripts/depthmap_api.py @@ -9,14 +9,14 @@ import gradio as gr -from modules.api.models import * +from modules.api.models import List, Dict from modules.api import api -from modules.shared import opts from src.core import core_generation_funnel -from src.common_ui import main_ui_panel from src.misc import SCRIPT_VERSION from src import backbone +from src.common_constants import GenerationOptions as go + def encode_to_base64(image): if type(image) is str: @@ -28,20 +28,15 @@ def encode_to_base64(image): else: return "" + def encode_np_to_base64(image): pil = Image.fromarray(image) return api.encode_pil_to_base64(pil) + def to_base64_PIL(encoding: str): return Image.fromarray(np.array(api.decode_base64_to_image(encoding)).astype('uint8')) -#TODO: is this slow? -def get_defaults(): - default_gradio = main_ui_panel(True).internal - defaults = {} - for key, value in default_gradio.items(): - defaults[key]= value.value - return defaults def depth_api(_: gr.Blocks, app: FastAPI): @app.get("/depth/version") @@ -50,97 +45,31 @@ async def version(): @app.get("/depth/get_options") async def get_options(): - default_input = get_defaults() - return {"settings": sorted(list(default_input.internal.keys()))} - - #This will be the stable basic api - @app.post("/depth/process") + return {"options": sorted([x.name.lower() for x in go])} + + # TODO: some potential inputs not supported (like custom depthmaps) + @app.post("/depth/generate") async def process( depth_input_images: List[str] = Body([], title='Input Images'), - compute_device:str = Body("GPU", title='CPU or GPU', options="'GPU', 'CPU'"), - model_type:str = Body('zoedepth_n (indoor)', title='depth model', options="'res101', 'dpt_beit_large_512 (midas 3.1)', 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)', 'dpt_hybrid_384 (midas 3.0)', 'midas_v21', 'midas_v21_small', 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'"), - net_width:int = Body(512, title="net width"), - net_height:int = Body(512, title="net height"), - net_size_match:bool = Body(True, title="match original image size"), - boost:bool = Body(False, title="use boost algorithm"), - output_depth_invert:bool = Body(False, title="invert depthmap") + options: Dict[str, object] = Body("options", title='Generation options'), ): - default_inputs = get_defaults() - override = { - # TODO: These indexing aren't soo nice - 'compute_device': compute_device, - 'model_type': ['res101', 'dpt_beit_large_512 (midas 3.1)', - 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)', - 'dpt_hybrid_384 (midas 3.0)', - 'midas_v21', 'midas_v21_small', - 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'].index(model_type), - 'net_width': net_width, - 'net_height': net_height, - 'net_size_match': net_size_match, - 'boost': boost, - 'output_depth_invert': output_depth_invert, - } - - for key, value in override.items(): - default_inputs[key] = value - if len(depth_input_images) == 0: - raise HTTPException( - status_code=422, detail="No image selected") + raise HTTPException(status_code=422, detail="No image supplied") - print(f"Processing {str(len(depth_input_images))} images with the depth module.") + print(f"Processing {str(len(depth_input_images))} images trough the API.") PIL_images = [] for input_image in depth_input_images: PIL_images.append(to_base64_PIL(input_image)) - outpath = opts.outdir_samples or opts.outdir_extras_samples - img_gen = core_generation_funnel(outpath, PIL_images, None, None, default_inputs)[0] + outpath = backbone.get_outpath() + results, _, _ = core_generation_funnel(outpath, PIL_images, None, None, options) - # This just keeps depth image throws everything else away - results = [img['depth'] for img in img_gen] + # TODO: Fix: this just keeps depth image throws everything else away + results = [img['depth'] for img in results] results64 = list(map(encode_to_base64, results)) return {"images": results64, "info": "Success"} - - #This will be direct process for overriding the default settings - @app.post("/depth/raw_process") - async def raw_process( - depth_input_images: List[str] = Body([], title='Input Images'), - override: dict = Body({}, title="a dictionary containing exact internal keys to depthmap") - ): - - default_inputs = get_defaults() - for key, value in override.items(): - default_inputs[key] = value - - if len(depth_input_images) == 0: - raise HTTPException( - status_code=422, detail="No image selected") - - print(f"Processing {str(len(depth_input_images))} images with the depth module.") - - PIL_images = [] - for input_image in depth_input_images: - PIL_images.append(to_base64_PIL(input_image)) - - outpath = opts.outdir_samples or opts.outdir_extras_samples - img_gen = core_generation_funnel(outpath, PIL_images, None, None, default_inputs)[0] - - # This just keeps depth image throws everything else away - results = [img['depth'] for img in img_gen] - results64 = list(map(encode_to_base64, results)) - return {"images": results64, "info": "Success"} - - # TODO: add functionality - # most different output formats (.obj, etc) should have different apis because otherwise network bloat might become a thing - - @app.post("/depth/extras_process") - async def extras_process( - depth_input_images: List[str] = Body([], title='Input Images') - ): - - return {"images": depth_input_images, "info": "Success"} try: import modules.script_callbacks as script_callbacks From 9bd3d909838692e27cc4bc43037781b39baa6167 Mon Sep 17 00:00:00 2001 From: Semjon Kravtsenko Date: Mon, 24 Jul 2023 15:32:37 +0300 Subject: [PATCH 02/16] Rework core_generation_funnel to be generator Closes #302 --- scripts/depthmap.py | 42 +++++++++++++++++-------------- scripts/depthmap_api.py | 23 +++++++++-------- src/backbone.py | 12 +++++++++ src/common_ui.py | 56 +++++++++++++++++++++++------------------ src/core.py | 44 +++++++++++++++++--------------- 5 files changed, 102 insertions(+), 75 deletions(-) diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 9378066..3a39d24 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -43,30 +43,34 @@ def run(self, p, *inputs): continue inputimages.append(processed.images[count]) - outputs, mesh_fi, meshsimple_fi = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops()) + gen_obj = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops()) + + for input_i, type, result in gen_obj: + if type in ['simple_mesh', 'inpainted_mesh']: + continue # We are in script mode: do nothing with the filenames - for input_i, imgs in enumerate(outputs): # get generation parameters + # TODO: could reuse if hasattr(processed, 'all_prompts') and shared.opts.enable_pnginfo: - info = create_infotext(processed, processed.all_prompts, processed.all_seeds, processed.all_subseeds, - "", 0, input_i) + info = create_infotext( + processed, processed.all_prompts, processed.all_seeds, processed.all_subseeds, "", 0, input_i) else: info = None - for image_type, image in list(imgs.items()): - processed.images.append(image) - if inputs["save_outputs"]: - try: - suffix = "" if image_type == "depth" else f"{image_type}" - backbone.save_image(image, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], - prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format, - info=info, - p=processed, - suffix=suffix) - except Exception as e: - if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): - raise e - print('Catched exception: image has wrong mode!') - traceback.print_exc() + + processed.images.append(result) + if inputs["save_outputs"]: + try: + suffix = "" if type == "depth" else f"{type}" + backbone.save_image(result, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], + prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format, + info=info, + p=processed, + suffix=suffix) + except Exception as e: + if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): + raise e + print('Catched exception: image has wrong mode!') + traceback.print_exc() return processed diff --git a/scripts/depthmap_api.py b/scripts/depthmap_api.py index c66a42d..a23d792 100644 --- a/scripts/depthmap_api.py +++ b/scripts/depthmap_api.py @@ -53,23 +53,24 @@ async def process( depth_input_images: List[str] = Body([], title='Input Images'), options: Dict[str, object] = Body("options", title='Generation options'), ): - if len(depth_input_images) == 0: - raise HTTPException(status_code=422, detail="No image supplied") + # TODO: restrict mesh options - print(f"Processing {str(len(depth_input_images))} images trough the API.") + if len(depth_input_images) == 0: + raise HTTPException(status_code=422, detail="No images supplied") + print(f"Processing {str(len(depth_input_images))} images trough the API") - PIL_images = [] + pil_images = [] for input_image in depth_input_images: - PIL_images.append(to_base64_PIL(input_image)) - + pil_images.append(to_base64_PIL(input_image)) outpath = backbone.get_outpath() - results, _, _ = core_generation_funnel(outpath, PIL_images, None, None, options) + gen_obj = core_generation_funnel(outpath, pil_images, None, None, options) - # TODO: Fix: this just keeps depth image throws everything else away - results = [img['depth'] for img in results] - results64 = list(map(encode_to_base64, results)) + results_based = [] + for count, type, result in gen_obj: + if type not in ['simple_mesh', 'inpainted_mesh']: + results_based += [encode_to_base64(result)] + return {"images": results_based, "info": "Success"} - return {"images": results64, "info": "Success"} try: import modules.script_callbacks as script_callbacks diff --git a/src/backbone.py b/src/backbone.py index f564129..f82c0d5 100644 --- a/src/backbone.py +++ b/src/backbone.py @@ -3,6 +3,13 @@ # must be resided in this file (or in the scripts folder). import pathlib from datetime import datetime +import enum + + +class BackboneType(enum.Enum): + WEBUI = 1 + STANDALONE = 2 + try: # stable-diffusion-webui backbone @@ -61,6 +68,8 @@ def reload_sd_model(): def get_hide_dirs(): import modules.shared return modules.shared.hide_dirs + + USED_BACKBONE = BackboneType.WEBUI except: # Standalone backbone print( # " DepthMap did not detect stable-diffusion-webui; launching with the standalone backbone.\n" @@ -116,3 +125,6 @@ def unload_sd_model(): pass # Not needed def reload_sd_model(): pass # Not needed def get_hide_dirs(): return {} # Directories will not be hidden from traversal (except when starts with the dot) + + + USED_BACKBONE = BackboneType.STANDALONE diff --git a/src/common_ui.py b/src/common_ui.py index cdde4f5..d2d6930 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -470,36 +470,42 @@ def run_generate(*inputs): inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None]) print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused') - outputs, fn_mesh, display_mesh = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops()) + gen_obj = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops()) - # Saving images show_images = [] - for input_i, imgs in enumerate(outputs): + inpainted_mesh_fi = mesh_simple_fi = None + for input_i, type, result in gen_obj: + if type == 'simple_mesh': + mesh_simple_fi = result + continue + if type == 'inpainted_mesh': + inpainted_mesh_fi = result + continue + basename = 'depthmap' if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.get_opt('outdir_extras_samples', None): basename = Path(inputnames[input_i]).stem - for image_type, image in list(imgs.items()): - show_images += [image] - if inputs["save_outputs"]: - try: - suffix = "" if image_type == "depth" else f"{image_type}" - backbone.save_image(image, path=outpath, basename=basename, seed=None, - prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True, - no_prompt=True, grid=False, pnginfo_section_name="extras", - suffix=suffix) - except Exception as e: - if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): - raise e - print('Catched exception: image has wrong mode!') - traceback.print_exc() - - display_mesh = None - # use inpainted 3d mesh to show in 3d model output when enabled in settings - if backbone.get_opt('depthmap_script_show_3d_inpaint', True) and fn_mesh is not None and len(fn_mesh) > 0: - display_mesh = fn_mesh - # however, don't show 3dmodel when disabled in settings + show_images += [result] + if inputs["save_outputs"]: + try: + suffix = "" if type == "depth" else f"{type}" + backbone.save_image(result, path=outpath, basename=basename, seed=None, + prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True, + no_prompt=True, grid=False, pnginfo_section_name="extras", + suffix=suffix) + except Exception as e: + if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): + raise e + print('Catched exception: image has wrong mode!') + traceback.print_exc() + + # Deciding what mesh to display (and if) + display_mesh_fi = None if not backbone.get_opt('depthmap_script_show_3d', True): - display_mesh = None + display_mesh_fi = mesh_simple_fi + if backbone.get_opt('depthmap_script_show_3d_inpaint', True): + if inpainted_mesh_fi is not None and len(inpainted_mesh_fi) > 0: + display_mesh_fi = inpainted_mesh_fi # TODO: return more info - return show_images, fn_mesh, display_mesh, 'Generated!' + return show_images, inpainted_mesh_fi, display_mesh_fi, 'Generated!' diff --git a/src/core.py b/src/core.py index c18acd4..6f16efb 100644 --- a/src/core.py +++ b/src/core.py @@ -66,7 +66,7 @@ def __getattr__(self, item): def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None): if len(inputimages) == 0 or inputimages[0] is None: - return [], '', '' + return if inputdepthmaps is None or len(inputdepthmaps) == 0: inputdepthmaps: list[Image] = [None for _ in range(len(inputimages))] inputdepthmaps_complete = all([x is not None for x in inputdepthmaps]) @@ -103,12 +103,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp device = torch.device("cpu") print("device: %s" % device) - generated_images = [{} for _ in range(len(inputimages))] - """Images that will be returned. - Every array element corresponds to particular input image. - Dictionary keys are types of images that were derived from the input image.""" - # TODO: ??? - meshsimple_fi = None + # TODO: This should not be here inpaint_imgs = [] inpaint_depths = [] @@ -192,14 +187,14 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp background_removed_array[:, :, 2] == 0) & (background_removed_array[:, :, 3] <= 0.2) img_output[bg_mask] = 0 # far value - generated_images[count]['background_removed'] = background_removed_image + yield count, 'background_removed', background_removed_image if inp[go.SAVE_BACKGROUND_REMOVAL_MASKS]: bg_array = (1 - bg_mask.astype('int8')) * 255 mask_array = np.stack((bg_array, bg_array, bg_array, bg_array), axis=2) mask_image = Image.fromarray(mask_array.astype(np.uint8)) - generated_images[count]['foreground_mask'] = mask_image + yield count, 'foreground_mask', mask_image # A weird quirk: if user tries to save depthmap, whereas input depthmap is used, # depthmap will be outputed, even if output_depth_combine is used. @@ -211,9 +206,9 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp img_concat = Image.fromarray(np.concatenate( (inputimages[count], convert_i16_to_rgb(img_depth, inputimages[count])), axis=axis)) - generated_images[count]['concat_depth'] = img_concat + yield count, 'concat_depth', img_concat else: - generated_images[count]['depth'] = Image.fromarray(img_depth) + yield count, 'depth', Image.fromarray(img_depth) if inp[go.GEN_STEREO]: print("Generating stereoscopic images..") @@ -222,21 +217,22 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION], inp[go.STEREO_MODES], inp[go.STEREO_BALANCE], inp[go.STEREO_FILL_ALGO]) for c in range(0, len(stereoimages)): - generated_images[count][inp[go.STEREO_MODES][c]] = stereoimages[c] + yield count, inp[go.STEREO_MODES][c], stereoimages[c] if inp[go.GEN_NORMALMAP]: - generated_images[count]['normalmap'] = create_normalmap( + normalmap = create_normalmap( img_output, inp[go.NORMALMAP_PRE_BLUR_KERNEL] if inp[go.NORMALMAP_PRE_BLUR] else None, inp[go.NORMALMAP_SOBEL_KERNEL] if inp[go.NORMALMAP_SOBEL] else None, inp[go.NORMALMAP_POST_BLUR_KERNEL] if inp[go.NORMALMAP_POST_BLUR] else None, inp[go.NORMALMAP_INVERT] ) + yield count, 'normalmap', normalmap if inp[go.GEN_HEATMAP]: from dzoedepth.utils.misc import colorize heatmap = Image.fromarray(colorize(img_output, cmap='inferno')) - generated_images[count]['heatmap'] = heatmap + yield count, 'heatmap', heatmap # gen mesh if inp[go.GEN_SIMPLE_MESH]: @@ -268,17 +264,25 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp mesh = create_mesh(inputimages[count], depthi, keep_edges=not inp[go.SIMPLE_MESH_OCCLUDE], spherical=(inp[go.SIMPLE_MESH_SPHERICAL])) mesh.export(meshsimple_fi) + yield count, 'simple_mesh', meshsimple_fi print("Computing output(s) done.") except RuntimeError as e: # TODO: display in UI if 'out of memory' in str(e): - suggestion = "ERROR: out of memory, could not generate depthmap!\nPlease try a different model" - if device != torch.device("cpu"): - suggestion += ", or try using the CPU" + suggestion = "ERROR: out of GPU memory, could not generate depthmap! " \ + "Here are some suggestions to work around this issue:\n" if inp[go.BOOST]: - suggestion += ", or disable BOOST" - print(f"{suggestion}.") + suggestion += " * Disable BOOST (generation will be faster, but the depthmap will be less detailed)\n" + if backbone.USED_BACKBONE == backbone.BackboneType.WEBUI: + suggestion += " * Run DepthMap in the standalone mode - without launching the SD WebUI\n" + if device != torch.device("cpu"): + suggestion += " * Select CPU as the processing device (this will be slower)\n" + if inp[go.MODEL_TYPE] != 6: + suggestion += " * Use a different model (this could reduce quality)\n" + if not inp[go.BOOST]: + suggestion += " * Reduce net size (this could reduce quality)\n" + print(f"{suggestion}") else: raise e finally: @@ -301,12 +305,12 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp mesh_fi = run_3dphoto(device, inpaint_imgs, inpaint_depths, inputnames, outpath, inp[go.GEN_INPAINTED_MESH_DEMOS], 1, "mp4") + yield 0, 'inpainted_mesh', mesh_fi except Exception as e: print(f'{str(e)}, some issue with generating inpainted mesh') backbone.reload_sd_model() print("All done.\n") - return generated_images, mesh_fi, meshsimple_fi def get_uniquefn(outpath, basename, ext): From a232eb9a5a711247d4c2c6312fc9df336bf8f81c Mon Sep 17 00:00:00 2001 From: Semjon Kravtsenko Date: Mon, 24 Jul 2023 22:30:21 +0300 Subject: [PATCH 03/16] Better error handling in tab --- src/common_ui.py | 22 +++++++++++++++++++--- src/core.py | 18 +++++++++++------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/common_ui.py b/src/common_ui.py index d2d6930..caf46a2 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -474,7 +474,24 @@ def run_generate(*inputs): show_images = [] inpainted_mesh_fi = mesh_simple_fi = None - for input_i, type, result in gen_obj: + msg = "" # Empty string is never returned + while True: + try: + input_i, type, result = next(gen_obj) + except StopIteration: + # TODO: return more info + msg = '

Successfully generated.

' + break + except Exception as e: + traceback.print_exc() + msg = '

' + 'ERROR: ' + str(e) + '

' + '\n' + if 'out of GPU memory' not in msg: + msg +=\ + 'Please report this issue ' \ + f'here. ' \ + 'Make sure to provide the full stacktrace: \n' + msg += '' + traceback.format_exc() + '' + break if type == 'simple_mesh': mesh_simple_fi = result continue @@ -507,5 +524,4 @@ def run_generate(*inputs): if backbone.get_opt('depthmap_script_show_3d_inpaint', True): if inpainted_mesh_fi is not None and len(inpainted_mesh_fi) > 0: display_mesh_fi = inpainted_mesh_fi - # TODO: return more info - return show_images, inpainted_mesh_fi, display_mesh_fi, 'Generated!' + return show_images, inpainted_mesh_fi, display_mesh_fi, msg.replace('\n', '
') diff --git a/src/core.py b/src/core.py index 6f16efb..9294ffe 100644 --- a/src/core.py +++ b/src/core.py @@ -267,23 +267,27 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp yield count, 'simple_mesh', meshsimple_fi print("Computing output(s) done.") - except RuntimeError as e: - # TODO: display in UI - if 'out of memory' in str(e): - suggestion = "ERROR: out of GPU memory, could not generate depthmap! " \ + except Exception as e: + import traceback + if 'out of memory' in str(e).lower(): + print(str(e)) + suggestion = "out of GPU memory, could not generate depthmap! " \ "Here are some suggestions to work around this issue:\n" if inp[go.BOOST]: suggestion += " * Disable BOOST (generation will be faster, but the depthmap will be less detailed)\n" - if backbone.USED_BACKBONE == backbone.BackboneType.WEBUI: + if backbone.USED_BACKBONE != backbone.BackboneType.STANDALONE: suggestion += " * Run DepthMap in the standalone mode - without launching the SD WebUI\n" if device != torch.device("cpu"): suggestion += " * Select CPU as the processing device (this will be slower)\n" if inp[go.MODEL_TYPE] != 6: - suggestion += " * Use a different model (this could reduce quality)\n" + suggestion +=\ + " * Use a different model (generally, more memory-consuming models produce better depthmaps)\n" if not inp[go.BOOST]: suggestion += " * Reduce net size (this could reduce quality)\n" - print(f"{suggestion}") + print('Fail.\n') + raise Exception(suggestion) else: + print('Fail.\n') raise e finally: if backbone.get_opt('depthmap_script_keepmodels', True): From d7a8fd1d3a494df2025cf299ceec90db00bd9b6d Mon Sep 17 00:00:00 2001 From: semjon00 Date: Thu, 27 Jul 2023 00:09:22 +0300 Subject: [PATCH 04/16] Multiple changes Refactored GradioComponentBundle Hide/unselect depthmap generation/output options if using custom depthmap New stereoimage outputs --- scripts/depthmap.py | 1 + src/common_ui.py | 95 ++++++++++++----------------------- src/core.py | 4 +- src/gradio_args_transport.py | 70 +++++++++++++++++++------- src/stereoimage_generation.py | 15 ++++-- 5 files changed, 99 insertions(+), 86 deletions(-) diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 3a39d24..918eaa6 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -89,6 +89,7 @@ def add_option(name, default_value, description, name_prefix='depthmap_script'): add_option('show_3d_inpaint', True, "Also show 3D Inpainted Mesh in 3D Mesh output tab. (Experimental)") add_option('mesh_maxsize', 2048, "Max size for generating simple mesh.") add_option('gen_heatmap_from_ui', False, "Show an option to generate HeatMap in the UI") + add_option('extra_stereomodes', False, "Enable more possible outputs for stereoimage generation") from modules import script_callbacks diff --git a/src/common_ui.py b/src/common_ui.py index caf46a2..88606fa 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -28,7 +28,8 @@ def main_ui_panel(is_depth_tab): inp = GradioComponentBundle() # TODO: Greater visual separation with gr.Blocks(): - with gr.Row(): + with gr.Row() as cur_option_root: + inp -= 'depthmap_gen_row_0', cur_option_root inp += go.COMPUTE_DEVICE, gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU') # TODO: Should return value instead of index. Maybe Enum should be used? inp += go.MODEL_TYPE, gr.Dropdown(label="Model", @@ -38,15 +39,17 @@ def main_ui_panel(is_depth_tab): 'midas_v21', 'midas_v21_small', 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk'], type="index") - with gr.Box(): + with gr.Box() as cur_option_root: + inp -= 'depthmap_gen_row_1', cur_option_root with gr.Row(): inp += go.BOOST, gr.Checkbox(label="BOOST (multi-resolution merging)") - inp += go.NET_SIZE_MATCH, gr.Checkbox(label="Match net size to input size") + inp += go.NET_SIZE_MATCH, gr.Checkbox(label="Match net size to input size", visible=False) with gr.Row(visible=False) as options_depend_on_match_size: inp += go.NET_WIDTH, gr.Slider(minimum=64, maximum=2048, step=64, label='Net width') inp += go.NET_HEIGHT, gr.Slider(minimum=64, maximum=2048, step=64, label='Net height') - with gr.Box(): + with gr.Box() as cur_option_root: + inp -= 'depthmap_gen_row_2', cur_option_root with gr.Row(): with gr.Group(): # 50% of width inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) @@ -58,7 +61,9 @@ def main_ui_panel(is_depth_tab): label="Combine input and depthmap into one image") inp += go.OUTPUT_DEPTH_COMBINE_AXIS, gr.Radio( label="Combine axis", choices=['Vertical', 'Horizontal'], type="value", visible=False) - with gr.Box(): + + with gr.Box() as cur_option_root: + inp -= 'depthmap_gen_row_3', cur_option_root with gr.Row(): inp += go.CLIPDEPTH, gr.Checkbox(label="Clip and renormalize DepthMap") with gr.Row(visible=False) as clip_options_row_1: @@ -71,8 +76,9 @@ def main_ui_panel(is_depth_tab): with gr.Column(visible=False) as stereo_options: with gr.Row(): inp += go.STEREO_MODES, gr.CheckboxGroup( - ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph"], - label="Output") + ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph", + "left-only", "only-right", "cyan-red-reverseanaglyph" + ][0:8 if backbone.get_opt('depthmap_script_extra_stereomodes', False) else 5], label="Output") with gr.Row(): inp += go.STEREO_DIVERGENCE, gr.Slider(minimum=0.05, maximum=10.005, step=0.01, label='Divergence (3D effect)') @@ -164,29 +170,13 @@ def update_default_net_size(model_type): inputs=[inp[go.BOOST], inp[go.NET_SIZE_MATCH]], outputs=[inp[go.NET_SIZE_MATCH], options_depend_on_match_size] ) - inp[go.NET_SIZE_MATCH].change( - fn=lambda a, b: options_depend_on_match_size.update(visible=not a and not b), - inputs=[inp[go.BOOST], inp[go.NET_SIZE_MATCH]], - outputs=[options_depend_on_match_size] - ) + inp.add_rule(options_depend_on_match_size, 'visible-if-not', go.NET_SIZE_MATCH) - inp[go.DO_OUTPUT_DEPTH].change( - fn=lambda a: (inp[go.OUTPUT_DEPTH_INVERT].update(visible=a), options_depend_on_output_depth_1.update(visible=a)), - inputs=[inp[go.DO_OUTPUT_DEPTH]], - outputs=[inp[go.OUTPUT_DEPTH_INVERT], options_depend_on_output_depth_1] - ) + inp.add_rule(options_depend_on_output_depth_1, 'visible-if', go.DO_OUTPUT_DEPTH) + inp.add_rule(go.OUTPUT_DEPTH_INVERT, 'visible-if', go.DO_OUTPUT_DEPTH) + inp.add_rule(go.OUTPUT_DEPTH_COMBINE_AXIS, 'visible-if', go.OUTPUT_DEPTH_COMBINE) + inp.add_rule(clip_options_row_1, 'visible-if', go.CLIPDEPTH) - inp[go.OUTPUT_DEPTH_COMBINE].change( - fn=lambda v: inp[go.OUTPUT_DEPTH_COMBINE_AXIS].update(visible=v), - inputs=[inp[go.OUTPUT_DEPTH_COMBINE]], - outputs=[inp[go.OUTPUT_DEPTH_COMBINE_AXIS]] - ) - - inp[go.CLIPDEPTH].change( - fn=lambda v: clip_options_row_1.update(visible=v), - inputs=[inp[go.CLIPDEPTH]], - outputs=[clip_options_row_1] - ) inp[go.CLIPDEPTH_FAR].change( fn=lambda a, b: a if b < a else b, inputs=[inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR]], @@ -198,36 +188,12 @@ def update_default_net_size(model_type): outputs=[inp[go.CLIPDEPTH_FAR]] ) - inp[go.GEN_STEREO].change( - fn=lambda v: stereo_options.update(visible=v), - inputs=[inp[go.GEN_STEREO]], - outputs=[stereo_options] - ) - - inp[go.GEN_NORMALMAP].change( - fn=lambda v: normalmap_options.update(visible=v), - inputs=[inp[go.GEN_NORMALMAP]], - outputs=[normalmap_options] - ) - - inp[go.GEN_SIMPLE_MESH].change( - fn=lambda v: mesh_options.update(visible=v), - inputs=[inp[go.GEN_SIMPLE_MESH]], - outputs=[mesh_options] - ) - + inp.add_rule(stereo_options, 'visible-if', go.GEN_STEREO) + inp.add_rule(normalmap_options, 'visible-if', go.GEN_NORMALMAP) + inp.add_rule(mesh_options, 'visible-if', go.GEN_SIMPLE_MESH) if is_depth_tab: - inp[go.GEN_INPAINTED_MESH].change( - fn=lambda v: inpaint_options_row_0.update(visible=v), - inputs=[inp[go.GEN_INPAINTED_MESH]], - outputs=[inpaint_options_row_0] - ) - - inp[go.GEN_REMBG].change( - fn=lambda v: bgrem_options.update(visible=v), - inputs=[inp[go.GEN_REMBG]], - outputs=[bgrem_options] - ) + inp.add_rule(inpaint_options_row_0, 'visible-if', go.GEN_INPAINTED_MESH) + inp.add_rule(bgrem_options, 'visible-if', go.GEN_REMBG) return inp @@ -282,7 +248,7 @@ def on_ui_tabs(): "in output directory when a file already exists.", value=True) submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary') - inp += main_ui_panel(True) # Main panel is inserted here + inp |= main_ui_panel(True) # Main panel is inserted here unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels") with gr.Column(variant='panel'): @@ -338,18 +304,23 @@ def on_ui_tabs(): submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo", variant='primary') - inp += inp.enkey_tail() depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode']) depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode']) depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) + def custom_depthmap_change_fn(turned_on): + return inp['custom_depthmap_img'].update(visible=turned_on), \ + inp['depthmap_gen_row_0'].update(visible=not turned_on), \ + inp['depthmap_gen_row_1'].update(visible=not turned_on), \ + inp['depthmap_gen_row_3'].update(visible=not turned_on), not turned_on inp['custom_depthmap'].change( - fn=lambda v: inp['custom_depthmap_img'].update(visible=v), + fn=custom_depthmap_change_fn, inputs=[inp['custom_depthmap']], - outputs=[inp['custom_depthmap_img']] - ) + outputs=[inp[st] for st in + ['custom_depthmap_img', 'depthmap_gen_row_0', 'depthmap_gen_row_1', 'depthmap_gen_row_3', + go.DO_OUTPUT_DEPTH]]) unloadmodels.click( fn=unload_models, diff --git a/src/core.py b/src/core.py index 9294ffe..a997383 100644 --- a/src/core.py +++ b/src/core.py @@ -196,8 +196,8 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp yield count, 'foreground_mask', mask_image - # A weird quirk: if user tries to save depthmap, whereas input depthmap is used, - # depthmap will be outputed, even if output_depth_combine is used. + # A weird quirk: if user tries to save depthmap, whereas custom depthmap is used, + # depthmap will not be outputed, even if output_depth_combine is used. if inp[go.DO_OUTPUT_DEPTH] and inputdepthmaps[count] is None: if inp[go.DO_OUTPUT_DEPTH]: img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output diff --git a/src/gradio_args_transport.py b/src/gradio_args_transport.py index c5cea82..101c3a4 100644 --- a/src/gradio_args_transport.py +++ b/src/gradio_args_transport.py @@ -1,53 +1,87 @@ import gradio as gr -import enum class GradioComponentBundle: - """Allows easier transportation of massive ammount of named gradio inputs""" + """Allows easier transportation of massive ammount of named gradio inputs. + Allows adding visibility rules quicker.""" def __init__(self): self.internal = {} + self.internal_ignored = {} - def append(self, thing): # thing: (str | enum.Enum, gr.components.Component) - if isinstance(thing, GradioComponentBundle): - keys = list(thing.internal.keys()) - for key in keys: - assert key not in self.internal, f"Already bundled component with name {key}." - self.internal[key] = thing[key] - elif isinstance(thing, tuple) and len(thing) == 2 and isinstance(thing[1], gr.components.Component): + def _raw_assignment(self, key, value, ignored=False): + assert key not in self.internal, f"Already bundled component with name {key}." + assert key not in self.internal_ignored, f"Already bundled component with name {key}." + if not ignored: + self.internal[key] = value + else: + self.internal_ignored[key] = value + + def _append_el(self, thing, ignored=False): + if isinstance(thing, tuple) and len(thing) == 2 and isinstance(thing[1], gr.blocks.Block): name = thing[0] if isinstance(thing[0], str) else thing[0].name.lower() # .name is for Enums if hasattr(thing[0], 'df') and thing[0].df is not None: thing[1].value = thing[0].df - assert name not in self.internal, f"Already bundled component with name {thing[0]}." - self.internal[name] = thing[1] + self._raw_assignment(name, thing[1], ignored) elif isinstance(thing, gr.components.Component) and thing.elem_id is not None: - assert thing.elem_id not in self.internal, f"Already bundled component with name {thing.elem_id}." - self.internal[thing.elem_id] = thing + self._raw_assignment(thing.elem_id, thing, ignored) else: - assert False, f"This object can not be bundled, {str(thing)}" + raise Exception(f"This object can not be bundled, {str(thing)}") def __iadd__(self, els): - self.append(els) + """Add an input element that will be packed into a bundle.""" + self._append_el(els, ignored=False) + return self + + def __isub__(self, els): + """Add an element that will not be packed into a bundle, but will be accessible.""" + self._append_el(els, ignored=True) + return self + + def __ior__(self, thing): + """Add an extra bundle into your bundle, so you could have more bundeled items in your bundle.""" + assert isinstance(thing, GradioComponentBundle), "Use += or -= for bundling elements" + for key in list(thing.internal.keys()): + self._raw_assignment(key, thing[key], False) + for key in list(thing.internal_ignored.keys()): + self._raw_assignment(key, thing[key], True) return self def __getitem__(self, key): """Return the gradio component elem_id""" if hasattr(key, 'name'): key = key.name.lower() # for enum elements + if key in self.internal_ignored: + return self.internal_ignored[key] return self.internal[key] - # def send_format(self): - # return set(self.internal.values()) + def __contains__(self, key): + if hasattr(key, 'name'): + key = key.name.lower() # for enum elements + return key in self.internal_ignored or key in self.internal def enkey_tail(self): + """Must be the last element of the bundle for unbundling to work""" keys = sorted(list(self.internal.keys())) head = gr.HTML(elem_id="zzz_depthmap_enkey", value="\u222F" + "\u222F".join(keys), visible=False) return head def enkey_body(self): + """This is what should be passed into the function that is called by gradio""" return [self.internal[x] for x in sorted(list(self.internal.keys()))] + def add_rule(self, first, rule, second): + first = self[first] if first in self else first + second = self[second] if second in self else second + if rule == 'visible-if-not': + second.change(fn=lambda v: first.update(visible=not v), inputs=[second], outputs=[first]) + elif rule == 'visible-if': + second.change(fn=lambda v: first.update(visible=v), inputs=[second], outputs=[first]) + else: + raise Exception(f'Unknown rule type {rule}') + @staticmethod def enkey_to_dict(inp): - """Enkey format: bunch of Gradio components, + """Unbundle: get a dictionary with stuff after it is sent bby the gradio to the function. + Enkey format: bunch of Gradio components, then a Gradio component, which value is concatination of names of the previous Gradio objects""" assert inp[-1].startswith("\u222F") ret = {} diff --git a/src/stereoimage_generation.py b/src/stereoimage_generation.py index aa20128..6ac48e0 100644 --- a/src/stereoimage_generation.py +++ b/src/stereoimage_generation.py @@ -25,7 +25,7 @@ def create_stereoimages(original_image, depthmap, divergence, separation=0.0, mo Affects which parts of the image will be visible in left and/or right half. :param list modes: how the result will look like. By default only 'left-right' is generated - a picture for the left eye will be on the left and the picture from the right eye - on the right. - The supported modes are: 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'red-cyan-anaglyph'. + Some of the supported modes are: 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'red-cyan-anaglyph'. :param float stereo_balance: has to do with how the divergence will be split among the two parts of the image, must be in the [-1.0; 1.0] interval. :param str fill_technique: applying divergence inevitably creates some gaps in the image. @@ -48,16 +48,23 @@ def create_stereoimages(original_image, depthmap, divergence, separation=0.0, mo results = [] for mode in modes: - if mode == 'left-right': + if mode == 'left-right': # Most popular format. Common use case: displaying in HMD. results.append(np.hstack([left_eye, right_eye])) - elif mode == 'right-left': + elif mode == 'right-left': # Cross-viewing results.append(np.hstack([right_eye, left_eye])) elif mode == 'top-bottom': results.append(np.vstack([left_eye, right_eye])) elif mode == 'bottom-top': results.append(np.vstack([right_eye, left_eye])) - elif mode == 'red-cyan-anaglyph': + elif mode == 'red-cyan-anaglyph': # Anaglyth glasses results.append(overlap_red_cyan(left_eye, right_eye)) + elif mode == 'left-only': + results.append(left_eye) + elif mode == 'only-right': + results.append(right_eye) + elif mode == 'cyan-red-reverseanaglyph': # Anaglyth glasses worn upside down + # Better for people whose main eye is left + results.append(overlap_red_cyan(right_eye, left_eye)) else: raise Exception('Unknown mode') return [Image.fromarray(r) for r in results] From 32d07ebb92230011d43cc8dc84405b22bd5ecaef Mon Sep 17 00:00:00 2001 From: semjon00 Date: Thu, 27 Jul 2023 10:39:15 +0300 Subject: [PATCH 05/16] Offset exponent parameter for stereo image generation Preparing to change the algo again... --- src/common_constants.py | 1 + src/common_ui.py | 1 + src/core.py | 3 ++- src/stereoimage_generation.py | 28 +++++++++++++++++----------- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/common_constants.py b/src/common_constants.py index d360e20..a070436 100644 --- a/src/common_constants.py +++ b/src/common_constants.py @@ -36,6 +36,7 @@ def __init__(self, default_value=None, *args): STEREO_DIVERGENCE = 2.5 STEREO_SEPARATION = 0.0 STEREO_FILL_ALGO = "polylines_sharp" + STEREO_OFFSET_EXPONENT = 2.0 STEREO_BALANCE = 0.0 GEN_NORMALMAP = False diff --git a/src/common_ui.py b/src/common_ui.py index 88606fa..8514b0e 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -89,6 +89,7 @@ def main_ui_panel(is_depth_tab): choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'], type="value") + inp += go.STEREO_OFFSET_EXPONENT, gr.Slider(label="Magic exponent", minimum=1, maximum=2, step=1) inp += go.STEREO_BALANCE, gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes') diff --git a/src/core.py b/src/core.py index a997383..f936601 100644 --- a/src/core.py +++ b/src/core.py @@ -215,7 +215,8 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp stereoimages = create_stereoimages( inputimages[count], img_output, inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION], - inp[go.STEREO_MODES], inp[go.STEREO_BALANCE], inp[go.STEREO_FILL_ALGO]) + inp[go.STEREO_MODES], + inp[go.STEREO_BALANCE], inp[go.STEREO_OFFSET_EXPONENT], inp[go.STEREO_FILL_ALGO]) for c in range(0, len(stereoimages)): yield count, inp[go.STEREO_MODES][c], stereoimages[c] diff --git a/src/stereoimage_generation.py b/src/stereoimage_generation.py index 6ac48e0..e751214 100644 --- a/src/stereoimage_generation.py +++ b/src/stereoimage_generation.py @@ -10,8 +10,8 @@ def Inner(func): return lambda *args, **kwargs: func(*args, **kwargs) from PIL import Image -def create_stereoimages(original_image, depthmap, divergence, separation=0.0, modes=None, stereo_balance=0.0, - fill_technique='polylines_sharp'): +def create_stereoimages(original_image, depthmap, divergence, separation=0.0, modes=None, + stereo_balance=0.0, stereo_offset_exponent=1.0, fill_technique='polylines_sharp'): """Creates stereoscopic images. An effort is made to make them look nice, but beware that the resulting image will have some distortion. The correctness was not rigorously tested. @@ -28,6 +28,8 @@ def create_stereoimages(original_image, depthmap, divergence, separation=0.0, mo Some of the supported modes are: 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'red-cyan-anaglyph'. :param float stereo_balance: has to do with how the divergence will be split among the two parts of the image, must be in the [-1.0; 1.0] interval. + :param float stereo_offset_exponent: Higher values move objects residing + between close and far plane more to the far plane :param str fill_technique: applying divergence inevitably creates some gaps in the image. This parameter specifies the technique that will be used to fill in the blanks in the two resulting images. Must be one of the following: 'none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'. @@ -42,9 +44,11 @@ def create_stereoimages(original_image, depthmap, divergence, separation=0.0, mo original_image = np.asarray(original_image) balance = (stereo_balance + 1) / 2 left_eye = original_image if balance < 0.001 else \ - apply_stereo_divergence(original_image, depthmap, +1 * divergence * balance, -1 * separation, fill_technique) + apply_stereo_divergence(original_image, depthmap, +1 * divergence * balance, -1 * separation, + stereo_offset_exponent, fill_technique) right_eye = original_image if balance > 0.999 else \ - apply_stereo_divergence(original_image, depthmap, -1 * divergence * (1 - balance), separation, fill_technique) + apply_stereo_divergence(original_image, depthmap, -1 * divergence * (1 - balance), separation, + stereo_offset_exponent, fill_technique) results = [] for mode in modes: @@ -70,7 +74,7 @@ def create_stereoimages(original_image, depthmap, divergence, separation=0.0, mo return [Image.fromarray(r) for r in results] -def apply_stereo_divergence(original_image, depth, divergence, separation, fill_technique): +def apply_stereo_divergence(original_image, depth, divergence, separation, stereo_offset_exponent, fill_technique): depth_min = depth.min() depth_max = depth.max() normalized_depth = (depth - depth_min) / (depth_max - depth_min) @@ -79,17 +83,18 @@ def apply_stereo_divergence(original_image, depth, divergence, separation, fill_ if fill_technique in ['none', 'naive', 'naive_interpolating']: return apply_stereo_divergence_naive( - original_image, normalized_depth, divergence_px, separation_px, fill_technique + original_image, normalized_depth, divergence_px, separation_px, stereo_offset_exponent, fill_technique ) if fill_technique in ['polylines_soft', 'polylines_sharp']: return apply_stereo_divergence_polylines( - original_image, normalized_depth, divergence_px, separation_px, fill_technique + original_image, normalized_depth, divergence_px, separation_px, stereo_offset_exponent, fill_technique ) @njit(parallel=False) def apply_stereo_divergence_naive( - original_image, normalized_depth, divergence_px: float, separation_px: float, fill_technique): + original_image, normalized_depth, divergence_px: float, separation_px: float, stereo_offset_exponent: float, + fill_technique: str): h, w, c = original_image.shape derived_image = np.zeros_like(original_image) @@ -99,7 +104,7 @@ def apply_stereo_divergence_naive( # Swipe order should ensure that pixels that are closer overwrite # (at their destination) pixels that are less close for col in range(w) if divergence_px < 0 else range(w - 1, -1, -1): - col_d = col + int((normalized_depth[row][col] ** 2) * divergence_px + separation_px) + col_d = col + int((normalized_depth[row][col] ** stereo_offset_exponent) * divergence_px + separation_px) if 0 <= col_d < w: derived_image[row][col_d] = original_image[row][col] filled[row * w + col_d] = 1 @@ -155,7 +160,8 @@ def apply_stereo_divergence_naive( @njit(parallel=True) # fastmath=True does not reasonably improve performance def apply_stereo_divergence_polylines( - original_image, normalized_depth, divergence_px: float, separation_px: float, fill_technique): + original_image, normalized_depth, divergence_px: float, separation_px: float, stereo_offset_exponent: float, + fill_technique: str): # This code treats rows of the image as polylines # It generates polylines, morphs them (applies divergence) to them, and then rasterizes them EPSILON = 1e-7 @@ -172,7 +178,7 @@ def apply_stereo_divergence_polylines( pt[pt_end] = [-1.0 * w, 0.0, 0.0] pt_end += 1 for col in range(0, w): - coord_d = (normalized_depth[row][col] ** 2) * divergence_px + coord_d = (normalized_depth[row][col] ** stereo_offset_exponent) * divergence_px coord_x = col + 0.5 + coord_d + separation_px if PIXEL_HALF_WIDTH < EPSILON: pt[pt_end] = [coord_x, abs(coord_d), col] From 1dc48c21de3867281496c0fee63d0940cf91648d Mon Sep 17 00:00:00 2001 From: semjon00 Date: Wed, 19 Jul 2023 23:17:33 +0300 Subject: [PATCH 06/16] Single Video mode (Experimental) --- scripts/depthmap.py | 5 +- scripts/depthmap_api.py | 5 +- src/common_constants.py | 3 +- src/common_ui.py | 97 +++++++++++++++++++++------ src/core.py | 80 ++++++++++++++++------ src/video_mode.py | 144 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 288 insertions(+), 46 deletions(-) create mode 100644 src/video_mode.py diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 918eaa6..7dbdbb2 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -2,6 +2,7 @@ import gradio as gr from modules import shared import modules.scripts as scripts +from PIL import Image from src import backbone from src import common_ui @@ -46,8 +47,8 @@ def run(self, p, *inputs): gen_obj = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops()) for input_i, type, result in gen_obj: - if type in ['simple_mesh', 'inpainted_mesh']: - continue # We are in script mode: do nothing with the filenames + if not isinstance(result, Image.Image): + continue # get generation parameters # TODO: could reuse diff --git a/scripts/depthmap_api.py b/scripts/depthmap_api.py index a23d792..f731dea 100644 --- a/scripts/depthmap_api.py +++ b/scripts/depthmap_api.py @@ -67,8 +67,9 @@ async def process( results_based = [] for count, type, result in gen_obj: - if type not in ['simple_mesh', 'inpainted_mesh']: - results_based += [encode_to_base64(result)] + if not isinstance(result, Image.Image): + continue + results_based += [encode_to_base64(result)] return {"images": results_based, "info": "Success"} diff --git a/src/common_constants.py b/src/common_constants.py index a070436..36752b4 100644 --- a/src/common_constants.py +++ b/src/common_constants.py @@ -25,7 +25,8 @@ def __init__(self, default_value=None, *args): DO_OUTPUT_DEPTH = True OUTPUT_DEPTH_INVERT = False OUTPUT_DEPTH_COMBINE = False - OUTPUT_DEPTH_COMBINE_AXIS = "Horizontal" + OUTPUT_DEPTH_COMBINE_AXIS = "Horizontal" # Format (str) is subject to change + DO_OUTPUT_DEPTH_PREDICTION = False # Hidden, do not use, subject to change CLIPDEPTH = False CLIPDEPTH_FAR = 0.0 diff --git a/src/common_ui.py b/src/common_ui.py index 8514b0e..1555791 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -3,7 +3,7 @@ import gradio as gr from PIL import Image -from src import backbone +from src import backbone, video_mode from src.core import core_generation_funnel, unload_models, run_makevideo from src.depthmap_generation import ModelHolder from src.gradio_args_transport import GradioComponentBundle @@ -217,6 +217,33 @@ def open_folder_action(): else: sp.Popen(["xdg-open", path]) + +def depthmap_mode_video(inp): + inp += gr.File(elem_id='depthmap_input_video', label="Video or animated file", + file_count="single", interactive=True, type="file") + inp += gr.Checkbox(elem_id="depthmap_vm_custom_checkbox", + label="Use custom/pregenerated DepthMap video", value=False) + inp += gr.File(elem_id='depthmap_vm_custom', file_count="single", + interactive=True, type="file", visible=False) + with gr.Row(): + inp += gr.Checkbox(elem_id='depthmap_vm_compress_checkbox', label="Compress colorvideos?", value=False) + inp += gr.Slider(elem_id='depthmap_vm_compress_bitrate', label="Bitrate (kbit)", visible=False, + minimum=1000, value=15000, maximum=50000, step=250) + + inp['depthmap_vm_custom_checkbox'].change( + fn=lambda v: inp['depthmap_vm_custom'].update(visible=v), + inputs=[inp['depthmap_vm_custom_checkbox']], + outputs=[inp['depthmap_vm_custom']] + ) + + inp['depthmap_vm_compress_checkbox'].change( + fn=lambda v: inp['depthmap_vm_compress_bitrate'].update(visible=v), + inputs=[inp['depthmap_vm_compress_checkbox']], + outputs=[inp['depthmap_vm_compress_bitrate']] + ) + + return inp + def on_ui_tabs(): inp = GradioComponentBundle() with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface: @@ -248,6 +275,8 @@ def on_ui_tabs(): label="Skip generation and use (edited/custom) depthmaps " "in output directory when a file already exists.", value=True) + with gr.TabItem('Single Video') as depthmap_mode_3: + inp = depthmap_mode_video(inp) submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary') inp |= main_ui_panel(True) # Main panel is inserted here unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels") @@ -310,6 +339,7 @@ def on_ui_tabs(): depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode']) depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode']) depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) + depthmap_mode_3.select(lambda: '3', None, inp['depthmap_mode']) def custom_depthmap_change_fn(turned_on): return inp['custom_depthmap_img'].update(visible=turned_on), \ @@ -369,6 +399,18 @@ def custom_depthmap_change_fn(turned_on): return depthmap_interface +def format_exception(e: Exception): + traceback.print_exc() + msg = '

' + 'ERROR: ' + str(e) + '

' + '\n' + if 'out of GPU memory' not in msg: + msg += \ + 'Please report this issue ' \ + f'here. ' \ + 'Make sure to provide the full stacktrace: \n' + msg += '' + traceback.format_exc() + '' + return msg + + def run_generate(*inputs): inputs = GradioComponentBundle.enkey_to_dict(inputs) depthmap_mode = inputs['depthmap_mode'] @@ -381,10 +423,21 @@ def run_generate(*inputs): custom_depthmap_img = inputs['custom_depthmap_img'] inputimages = [] - # Allow supplying custom depthmaps - inputdepthmaps = [] - # Also keep track of original file names - inputnames = [] + inputdepthmaps = [] # Allow supplying custom depthmaps + inputnames = [] # Also keep track of original file names + + if depthmap_mode == '3': + try: + custom_depthmap = inputs['depthmap_vm_custom'] \ + if inputs['depthmap_vm_custom_checkbox'] else None + colorvids_bitrate = inputs['depthmap_vm_compress_bitrate'] \ + if inputs['depthmap_vm_compress_checkbox'] else None + ret = video_mode.gen_video( + inputs['depthmap_input_video'], backbone.get_outpath(), inputs, custom_depthmap, colorvids_bitrate) + return [], None, None, ret + except Exception as e: + ret = format_exception(e) + return [], None, None, ret if depthmap_mode == '2' and depthmap_batch_output_dir != '': outpath = depthmap_batch_output_dir @@ -410,7 +463,9 @@ def run_generate(*inputs): image = Image.open(os.path.abspath(img.name)) inputimages.append(image) inputnames.append(os.path.splitext(img.orig_name)[0]) + print(f'{len(inputimages)} images will be processed') elif depthmap_mode == '2': # Batch from Directory + # TODO: There is a RAM leak when we process batches, I can smell it! Or maybe it is gone. assert not backbone.get_cmd_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled' if depthmap_batch_input_dir == '': return [], None, None, "Please select an input directory." @@ -444,25 +499,22 @@ def run_generate(*inputs): gen_obj = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops()) - show_images = [] + # Saving images + img_results = [] + results_total = 0 inpainted_mesh_fi = mesh_simple_fi = None msg = "" # Empty string is never returned while True: try: input_i, type, result = next(gen_obj) + results_total += 1 except StopIteration: # TODO: return more info - msg = '

Successfully generated.

' + msg = '

Successfully generated

' if results_total > 0 else \ + '

Successfully generated nothing - please check the settings and try again

' break except Exception as e: - traceback.print_exc() - msg = '

' + 'ERROR: ' + str(e) + '

' + '\n' - if 'out of GPU memory' not in msg: - msg +=\ - 'Please report this issue ' \ - f'here. ' \ - 'Make sure to provide the full stacktrace: \n' - msg += '' + traceback.format_exc() + '' + msg = format_exception(e) break if type == 'simple_mesh': mesh_simple_fi = result @@ -470,14 +522,17 @@ def run_generate(*inputs): if type == 'inpainted_mesh': inpainted_mesh_fi = result continue + if not isinstance(result, Image.Image): + print(f'This is not supposed to happen! Somehow output type {type} is not supported! Input_i: {input_i}.') + continue + img_results += [(input_i, type, result)] - basename = 'depthmap' - if depthmap_mode == '2' and inputnames[input_i] is not None and outpath != backbone.get_opt('outdir_extras_samples', None): - basename = Path(inputnames[input_i]).stem - - show_images += [result] if inputs["save_outputs"]: try: + basename = 'depthmap' + if depthmap_mode == '2' and inputnames[input_i] is not None: + if outpath != backbone.get_opt('outdir_extras_samples', None): + basename = Path(inputnames[input_i]).stem suffix = "" if type == "depth" else f"{type}" backbone.save_image(result, path=outpath, basename=basename, seed=None, prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True, @@ -496,4 +551,4 @@ def run_generate(*inputs): if backbone.get_opt('depthmap_script_show_3d_inpaint', True): if inpainted_mesh_fi is not None and len(inpainted_mesh_fi) > 0: display_mesh_fi = inpainted_mesh_fi - return show_images, inpainted_mesh_fi, display_mesh_fi, msg.replace('\n', '
') + return map(lambda x: x[2], img_results), inpainted_mesh_fi, display_mesh_fi, msg.replace('\n', '
') diff --git a/src/core.py b/src/core.py index f936601..d07e273 100644 --- a/src/core.py +++ b/src/core.py @@ -1,4 +1,6 @@ from pathlib import Path + +import PIL.Image from PIL import Image try: @@ -37,6 +39,14 @@ model_holder = ModelHolder() +def convert_to_i16(arr): + # Single channel, 16 bit image. This loses some precision! + # uint16 conversion uses round-down, therefore values should be [0; 2**16) + numbytes = 2 + max_val = (2 ** (8 * numbytes)) + out = np.clip(arr * max_val, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing + return out.astype("uint16") + def convert_i16_to_rgb(image, like): # three channel, 8 bits per channel image output = np.zeros_like(like) @@ -50,6 +60,10 @@ class CoreGenerationFunnelInp: """This class takes a dictionary and creates a core_generation_funnel inp. Non-applicable parameters are silently discarded (no error)""" def __init__(self, values): + if isinstance(values, CoreGenerationFunnelInp): + values = values.values + values = {(k.name if isinstance(k, GenerationOptions) else k).lower(): v for k, v in values.items()} + self.values = {} for setting in GenerationOptions: name = setting.name.lower() @@ -74,7 +88,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp inp = CoreGenerationFunnelInp(inp) if ops is None: - ops = {} + ops = backbone.gather_ops() model_holder.update_settings(**ops) # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure @@ -127,17 +141,37 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp raw_prediction_invert = False """True if near=dark on raw_prediction""" out = None + if inputdepthmaps is not None and inputdepthmaps[count] is not None: # use custom depthmap - dimg = inputdepthmaps[count] - # resize if not same size as input - if dimg.width != inputimages[count].width or dimg.height != inputimages[count].height: - dimg = dimg.resize((inputimages[count].width, inputimages[count].height), Image.Resampling.LANCZOS) - - if dimg.mode == 'I' or dimg.mode == 'P' or dimg.mode == 'L': - out = np.asarray(dimg, dtype="float") + dp = inputdepthmaps[count] + if isinstance(dp, Image.Image): + if dp.width != inputimages[count].width or dp.height != inputimages[count].height: + try: # LANCZOS may fail on some formats + dp = dp.resize((inputimages[count].width, inputimages[count].height), Image.Resampling.LANCZOS) + except: + dp = dp.resize((inputimages[count].width, inputimages[count].height)) + # Trying desperately to rescale image to [0;1) without actually normalizing it + # Normalizing is avoided, because we want to preserve the scale of the original depthmaps + # (batch mode, video mode). + if len(dp.getbands()) == 1: + out = np.asarray(dp, dtype="float") + out_max = out.max() + if out_max < 256: + bit_depth = 8 + elif out_max < 65536: + bit_depth = 16 + else: + bit_depth = 32 + out /= 2.0 ** bit_depth + else: + out = np.asarray(dp, dtype="float")[:, :, 0] + out /= 256.0 else: - out = np.asarray(dimg, dtype="float")[:, :, 0] + # Should be in interval [0; 1], values outside of this range will be clipped. + out = np.asarray(dp, dtype="float") + assert inputimages[count].height == out.shape[0], "Custom depthmap height mismatch" + assert inputimages[count].width == out.shape[1], "Custom depthmap width mismatch" else: # override net size (size may be different for different images) if inp[go.NET_SIZE_MATCH]: @@ -156,20 +190,20 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp # TODO: some models may output negative values, maybe these should be clamped to zero. if raw_prediction_invert: out *= -1 + if inp[go.DO_OUTPUT_DEPTH_PREDICTION]: + yield count, 'depth_prediction', np.copy(out) if inp[go.CLIPDEPTH]: out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] out = np.clip(out, inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR]) + out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] else: # Regretfully, the depthmap is broken and will be replaced with a black image out = np.zeros(raw_prediction.shape) - out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] - - # Single channel, 16 bit image. This loses some precision! - # uint16 conversion uses round-down, therefore values should be [0; 2**16) - numbytes = 2 - max_val = (2 ** (8 * numbytes)) - out = np.clip(out * max_val, 0, max_val - 0.1) # Clipping form above is needed to avoid overflowing - img_output = out.astype("uint16") + + # Maybe we should not use img_output for everything, since we get better accuracy from + # the raw_prediction. However, it is not always supported. We maybe would like to achieve + # reproducibility, so depthmap of the image should be the same as generating the depthmap one more time. + img_output = convert_to_i16(out) """Depthmap (near=bright), as uint16""" # if 3dinpainting, store maps for processing in second pass @@ -198,8 +232,8 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp # A weird quirk: if user tries to save depthmap, whereas custom depthmap is used, # depthmap will not be outputed, even if output_depth_combine is used. - if inp[go.DO_OUTPUT_DEPTH] and inputdepthmaps[count] is None: - if inp[go.DO_OUTPUT_DEPTH]: + if inp[go.DO_OUTPUT_DEPTH]: + if inputdepthmaps[count] is None: img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output if inp[go.OUTPUT_DEPTH_COMBINE]: axis = 1 if inp[go.OUTPUT_DEPTH_COMBINE_AXIS] == 'Horizontal' else 0 @@ -209,6 +243,13 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp yield count, 'concat_depth', img_concat else: yield count, 'depth', Image.fromarray(img_depth) + else: + # TODO: make it better + # Yes, this seems stupid, but this is, logically, what should happen - + # and this improves clarity of some other code. + # But we won't return it if there is only one image. + if len(inputimages) > 1: + yield count, 'depth', Image.fromarray(img_output) if inp[go.GEN_STEREO]: print("Generating stereoscopic images..") @@ -319,7 +360,6 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp def get_uniquefn(outpath, basename, ext): - # Inefficient and may fail, maybe use unbounded binary search? basecount = backbone.get_next_sequence_number(outpath, basename) if basecount > 0: basecount = basecount - 1 fullfn = None diff --git a/src/video_mode.py b/src/video_mode.py new file mode 100644 index 0000000..5b1eef0 --- /dev/null +++ b/src/video_mode.py @@ -0,0 +1,144 @@ +import pathlib +import traceback + +from PIL import Image +import numpy as np +import os + +from src import core +from src import backbone +from src.common_constants import GenerationOptions as go + + +def open_path_as_images(path, maybe_depthvideo=False): + """Takes the filepath, returns (fps, frames). Every frame is a Pillow Image object""" + suffix = pathlib.Path(path).suffix + if suffix == '.gif': + frames = [] + img = Image.open(path) + for i in range(img.n_frames): + img.seek(i) + frames.append(img.convert('RGB')) + return 1000 / img.info['duration'], frames + if suffix in ['.avi'] and maybe_depthvideo: + import imageio_ffmpeg + gen = imageio_ffmpeg.read_frames(path) + try: + video_info = next(gen) + if video_info['pix_fmt'] == 'gray16le': + width, height = video_info['size'] + frames = [] + for frame in gen: + # Not sure if this is implemented somewhere else + result = np.frombuffer(frame, dtype='uint16') + result.shape = (height, width * 3 // 2) # Why does it work? I don't remotely have any idea. + frames += [Image.fromarray(result)] + # TODO: Wrapping frames into Pillow objects is wasteful + return video_info['fps'], frames + finally: + gen.close() + if suffix in ['.webm', '.mp4', '.avi']: + from moviepy.video.io.VideoFileClip import VideoFileClip + clip = VideoFileClip(path) + frames = [Image.fromarray(x) for x in list(clip.iter_frames())] + # TODO: Wrapping frames into Pillow objects is wasteful + return clip.fps, frames + else: + try: + return 1000, [Image.open(path)] + except Exception as e: + raise Exception(f"Probably an unsupported file format: {suffix}") from e + + +def frames_to_video(fps, frames, path, name, colorvids_bitrate=None): + if frames[0].mode == 'I;16': # depthmap video + import imageio_ffmpeg + writer = imageio_ffmpeg.write_frames( + os.path.join(path, f"{name}.avi"), frames[0].size, 'gray16le', 'gray16le', fps, codec='ffv1', + macro_block_size=1) + try: + writer.send(None) + for frame in frames: + writer.send(np.array(frame)) + finally: + writer.close() + else: + arrs = [np.asarray(frame) for frame in frames] + from moviepy.video.io.ImageSequenceClip import ImageSequenceClip + clip = ImageSequenceClip(arrs, fps=fps) + done = False + priority = [('avi', 'png'), ('avi', 'rawvideo'), ('mp4', 'libx264'), ('webm', 'libvpx')] + if colorvids_bitrate: + priority = reversed(priority) + for format, codec in priority: + try: + br = f'{colorvids_bitrate}k' if codec not in ['png', 'rawvideo'] else None + clip.write_videofile(os.path.join(path, f"{name}.{format}"), codec=codec, bitrate=br) + done = True + break + except: + traceback.print_exc() + if not done: + raise Exception('Saving the video failed!') + + +def process_predicitons(predictions): + print('Processing generated depthmaps') + # TODO: Smart normalizing (drop 0.001% of top and bottom values from the video/every cut) + preds_min_value = min([pred.min() for pred in predictions]) + preds_max_value = max([pred.max() for pred in predictions]) + + input_depths = [] + for pred in predictions: + norm = (pred - preds_min_value) / (preds_max_value - preds_min_value) # normalize to [0; 1] + input_depths += [norm] + # TODO: Smoothening between frames (use splines) + # TODO: Detect cuts and process segments separately + + return input_depths + + +def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None): + if inp[go.GEN_SIMPLE_MESH.name.lower()] or inp[go.GEN_INPAINTED_MESH.name.lower()]: + return 'Creating mesh-videos is not supported. Please split video into frames and use batch processing.' + + fps, input_images = open_path_as_images(os.path.abspath(video.name)) + os.makedirs(backbone.get_outpath(), exist_ok=True) + + if custom_depthmap is None: + print('Generating depthmaps for the video frames') + needed_keys = [go.COMPUTE_DEVICE, go.MODEL_TYPE, go.BOOST, go.NET_SIZE_MATCH, go.NET_WIDTH, go.NET_HEIGHT] + needed_keys = [x.name.lower() for x in needed_keys] + first_pass_inp = {k: v for (k, v) in inp.items() if k in needed_keys} + # We need predictions where frames are not normalized separately. + first_pass_inp[go.DO_OUTPUT_DEPTH_PREDICTION] = True + # No need in normalized frames. Properly processed depth video will be created in the second pass + first_pass_inp[go.DO_OUTPUT_DEPTH.name] = False + + gen_obj = core.core_generation_funnel(None, input_images, None, None, first_pass_inp) + predictions = [x[2] for x in list(gen_obj)] + input_depths = process_predicitons(predictions) + else: + print('Using custom depthmap video') + cdm_fps, input_depths = open_path_as_images(os.path.abspath(custom_depthmap.name), maybe_depthvideo=True) + assert len(input_depths) == len(input_images), 'Custom depthmap video length does not match input video length' + if input_depths[0].size != input_images[0].size: + print('Warning! Input video size and depthmap video size are not the same!') + + print('Generating output frames') + img_results = list(core.core_generation_funnel(None, input_images, input_depths, None, inp)) + gens = list(set(map(lambda x: x[1], img_results))) + + print('Saving generated frames as video outputs') + for gen in gens: + if gen == 'depth' and custom_depthmap is not None: + # Well, that would be extra stupid, even if user has picked this option for some reason + # (forgot to change the default?) + continue + + imgs = [x[2] for x in img_results if x[1] == gen] + basename = f'{gen}_video' + frames_to_video(fps, imgs, outpath, f"depthmap-{backbone.get_next_sequence_number()}-{basename}", + colorvids_bitrate) + print('All done. Video(s) saved!') + return 'Video generated!' if len(gens) == 1 else 'Videos generated!' From 639f5508edd44d913a50e7a7231d9f7dc305863f Mon Sep 17 00:00:00 2001 From: semjon00 Date: Thu, 27 Jul 2023 16:51:11 +0300 Subject: [PATCH 07/16] UI improvements --- src/common_ui.py | 63 +++++++++++++++++++++++++++-------------------- src/video_mode.py | 22 ++++++++--------- 2 files changed, 47 insertions(+), 38 deletions(-) diff --git a/src/common_ui.py b/src/common_ui.py index 1555791..93b500c 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -80,7 +80,7 @@ def main_ui_panel(is_depth_tab): "left-only", "only-right", "cyan-red-reverseanaglyph" ][0:8 if backbone.get_opt('depthmap_script_extra_stereomodes', False) else 5], label="Output") with gr.Row(): - inp += go.STEREO_DIVERGENCE, gr.Slider(minimum=0.05, maximum=10.005, step=0.01, + inp += go.STEREO_DIVERGENCE, gr.Slider(minimum=0.05, maximum=15.005, step=0.01, label='Divergence (3D effect)') inp += go.STEREO_SEPARATION, gr.Slider(minimum=-5.0, maximum=5.0, step=0.01, label='Separation (moves images apart)') @@ -99,13 +99,16 @@ def main_ui_panel(is_depth_tab): with gr.Column(visible=False) as normalmap_options: with gr.Row(): inp += go.NORMALMAP_PRE_BLUR, gr.Checkbox(label="Smooth before calculating normals") - inp += go.NORMALMAP_PRE_BLUR_KERNEL, gr.Slider(minimum=1, maximum=31, step=2, label='Pre-smooth kernel size') + inp += go.NORMALMAP_PRE_BLUR_KERNEL, gr.Slider(minimum=1, maximum=31, step=2, label='Pre-smooth kernel size', visible=False) + inp.add_rule(go.NORMALMAP_PRE_BLUR_KERNEL, 'visible-if', go.NORMALMAP_PRE_BLUR) with gr.Row(): inp += go.NORMALMAP_SOBEL, gr.Checkbox(label="Sobel gradient") inp += go.NORMALMAP_SOBEL_KERNEL, gr.Slider(minimum=1, maximum=31, step=2, label='Sobel kernel size') + inp.add_rule(go.NORMALMAP_SOBEL_KERNEL, 'visible-if', go.NORMALMAP_SOBEL) with gr.Row(): inp += go.NORMALMAP_POST_BLUR, gr.Checkbox(label="Smooth after calculating normals") - inp += go.NORMALMAP_POST_BLUR_KERNEL, gr.Slider(minimum=1, maximum=31, step=2, label='Post-smooth kernel size') + inp += go.NORMALMAP_POST_BLUR_KERNEL, gr.Slider(minimum=1, maximum=31, step=2, label='Post-smooth kernel size', visible=False) + inp.add_rule(go.NORMALMAP_POST_BLUR_KERNEL, 'visible-if', go.NORMALMAP_POST_BLUR) with gr.Row(): inp += go.NORMALMAP_INVERT, gr.Checkbox(label="Invert") @@ -219,8 +222,12 @@ def open_folder_action(): def depthmap_mode_video(inp): - inp += gr.File(elem_id='depthmap_input_video', label="Video or animated file", + gr.HTML(value="Single video mode allows generating videos from videos. Every frame of the video is processed, " + "please adjust generation settings, so that generation is not too slow. For the best results, " + "Use a zoedepth model, since they provide the highest level of temporal coherency.") + inp += gr.File(elem_id='depthmap_vm_input', label="Video or animated file", file_count="single", interactive=True, type="file") + inp += gr.Dropdown(elem_id="depthmap_vm_smoothening_mode", label="Smoothening", type="value", choices=['none']) inp += gr.Checkbox(elem_id="depthmap_vm_custom_checkbox", label="Use custom/pregenerated DepthMap video", value=False) inp += gr.File(elem_id='depthmap_vm_custom', file_count="single", @@ -230,23 +237,21 @@ def depthmap_mode_video(inp): inp += gr.Slider(elem_id='depthmap_vm_compress_bitrate', label="Bitrate (kbit)", visible=False, minimum=1000, value=15000, maximum=50000, step=250) - inp['depthmap_vm_custom_checkbox'].change( - fn=lambda v: inp['depthmap_vm_custom'].update(visible=v), - inputs=[inp['depthmap_vm_custom_checkbox']], - outputs=[inp['depthmap_vm_custom']] - ) - - inp['depthmap_vm_compress_checkbox'].change( - fn=lambda v: inp['depthmap_vm_compress_bitrate'].update(visible=v), - inputs=[inp['depthmap_vm_compress_checkbox']], - outputs=[inp['depthmap_vm_compress_bitrate']] - ) + inp.add_rule('depthmap_vm_custom', 'visible-if', 'depthmap_vm_custom_checkbox') + inp.add_rule('depthmap_vm_compress_bitrate', 'visible-if', 'depthmap_vm_compress_checkbox') return inp + +custom_css = """ +#depthmap_vm_input {height: 75px} +#depthmap_vm_custom {height: 75px} +""" + + def on_ui_tabs(): inp = GradioComponentBundle() - with gr.Blocks(analytics_enabled=False, title="DepthMap") as depthmap_interface: + with gr.Blocks(analytics_enabled=False, title="DepthMap", css=custom_css) as depthmap_interface: with gr.Row().style(equal_height=False): with gr.Column(variant='panel'): inp += 'depthmap_mode', gr.HTML(visible=False, value='0') @@ -341,17 +346,20 @@ def on_ui_tabs(): depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) depthmap_mode_3.select(lambda: '3', None, inp['depthmap_mode']) - def custom_depthmap_change_fn(turned_on): - return inp['custom_depthmap_img'].update(visible=turned_on), \ - inp['depthmap_gen_row_0'].update(visible=not turned_on), \ - inp['depthmap_gen_row_1'].update(visible=not turned_on), \ - inp['depthmap_gen_row_3'].update(visible=not turned_on), not turned_on - inp['custom_depthmap'].change( + def custom_depthmap_change_fn(mode, zero_on, three_on): + hide = mode == '0' and zero_on or mode == '3' and three_on + return inp['custom_depthmap_img'].update(visible=hide), \ + inp['depthmap_gen_row_0'].update(visible=not hide), \ + inp['depthmap_gen_row_1'].update(visible=not hide), \ + inp['depthmap_gen_row_3'].update(visible=not hide), not hide + custom_depthmap_change_els = ['depthmap_mode', 'custom_depthmap', 'depthmap_vm_custom_checkbox'] + for el in custom_depthmap_change_els: + inp[el].change( fn=custom_depthmap_change_fn, - inputs=[inp['custom_depthmap']], - outputs=[inp[st] for st in - ['custom_depthmap_img', 'depthmap_gen_row_0', 'depthmap_gen_row_1', 'depthmap_gen_row_3', - go.DO_OUTPUT_DEPTH]]) + inputs=[inp[el] for el in custom_depthmap_change_els], + outputs=[inp[st] for st in [ + 'custom_depthmap_img', 'depthmap_gen_row_0', 'depthmap_gen_row_1', 'depthmap_gen_row_3', + go.DO_OUTPUT_DEPTH]]) unloadmodels.click( fn=unload_models, @@ -433,7 +441,8 @@ def run_generate(*inputs): colorvids_bitrate = inputs['depthmap_vm_compress_bitrate'] \ if inputs['depthmap_vm_compress_checkbox'] else None ret = video_mode.gen_video( - inputs['depthmap_input_video'], backbone.get_outpath(), inputs, custom_depthmap, colorvids_bitrate) + inputs['depthmap_vm_input'], backbone.get_outpath(), inputs, custom_depthmap, colorvids_bitrate, + inputs['depthmap_vm_smoothening_mode']) return [], None, None, ret except Exception as e: ret = format_exception(e) diff --git a/src/video_mode.py b/src/video_mode.py index 5b1eef0..72b2dd6 100644 --- a/src/video_mode.py +++ b/src/video_mode.py @@ -82,23 +82,23 @@ def frames_to_video(fps, frames, path, name, colorvids_bitrate=None): raise Exception('Saving the video failed!') -def process_predicitons(predictions): +def process_predicitons(predictions, smoothening='none'): print('Processing generated depthmaps') # TODO: Smart normalizing (drop 0.001% of top and bottom values from the video/every cut) - preds_min_value = min([pred.min() for pred in predictions]) - preds_max_value = max([pred.max() for pred in predictions]) - - input_depths = [] - for pred in predictions: - norm = (pred - preds_min_value) / (preds_max_value - preds_min_value) # normalize to [0; 1] - input_depths += [norm] # TODO: Smoothening between frames (use splines) # TODO: Detect cuts and process segments separately - return input_depths + if smoothening == 'none': + input_depths = [] + preds_min_value = min([pred.min() for pred in predictions]) + preds_max_value = max([pred.max() for pred in predictions]) + for pred in predictions: + norm = (pred - preds_min_value) / (preds_max_value - preds_min_value) # normalize to [0; 1] + input_depths += [norm] + return input_depths -def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None): +def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None, smoothening='none'): if inp[go.GEN_SIMPLE_MESH.name.lower()] or inp[go.GEN_INPAINTED_MESH.name.lower()]: return 'Creating mesh-videos is not supported. Please split video into frames and use batch processing.' @@ -117,7 +117,7 @@ def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None) gen_obj = core.core_generation_funnel(None, input_images, None, None, first_pass_inp) predictions = [x[2] for x in list(gen_obj)] - input_depths = process_predicitons(predictions) + input_depths = process_predicitons(predictions, smoothening) else: print('Using custom depthmap video') cdm_fps, input_depths = open_path_as_images(os.path.abspath(custom_depthmap.name), maybe_depthvideo=True) From e02d04c3ea42e2dd596412d38ace51a27b64a3e8 Mon Sep 17 00:00:00 2001 From: semjon00 Date: Thu, 20 Jul 2023 21:11:49 +0300 Subject: [PATCH 08/16] Documentation changes * "Viewing" deleted, as it's info was moved to a separate page in the wiki * "Forks and Related"->"Generate normal maps" deleted, now a feature is this repo * Changelog moved to a separate file * Added 0.4.3 changelog --- CHANGELOG.md | 114 ++++++++++++++++++++++++++++++++++++++++ README.md | 146 +-------------------------------------------------- 2 files changed, 115 insertions(+), 145 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..b1d68f7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,114 @@ +## Changelog +### 0.4.2 + * Added UI options for 2 additional rembg models. + * Heatmap generation UI option is hidden - if you want to use it, please activate it in the extension settings. + * Bugfixes. +### 0.4.1 standalone mode + * Added ability to run DepthMap without WebUI. (Use main.py. Make sure all the dependencies are installed. The support is not feature-complete.) + * NormalMap generation +### 0.4.0 large code refactor + * UI improvements + * Improved Batch from Directory, Clip and renormalize DepthMap + * Slightly changed the behaviour of various options + * Extension may partially work even if some of the dependencies are unmet + +### 0.3.12 + * Fixed stereo image generation + * Other bugfixes +### 0.3.11 + * 3D model viewer (Experimental!) + * simple and fast (occluded) 3D mesh generation, support for equirectangular projection + (accurate results with ZoeDepth models only, no boost, no custom maps) + * default output format is now obj for inpainted mesh and simple mesh +### 0.3.10 + * ZoeDepth support (with boost), 3 new models, best results so far + * better heatmap +### 0.3.9 + * use existing/custom depthmaps in output dir for batch mode + * custom depthmap support for single file + * wavefront obj output support for inpainted mesh (enabled in settings) + * option to generate all stereo formats at once + * bugfix: convert single channel input image to rgb + * renamed midas imports to fix conflict with deforum + * ui cleanup +### 0.3.8 bugfix + * bugfix in remove background path +### 0.3.7 new features + * [rembg](https://github.com/danielgatis/rembg) Remove Background [PR](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/78) by [@graemeniedermayer](https://github.com/graemeniedermayer) merged + * setting to flip Left/Right SBS images + * added missing parameter for 3d inpainting (repeat_inpaint_edge) + * option to generate demo videos with mesh +### 0.3.6 new feature + * implemented binary ply file format for the inpainted 3D mesh, big reduction in filesize and save/load times. + * added progress indicators to the inpainting process +### 0.3.5 bugfix + * create path to 3dphoto models before download (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/76)) +### 0.3.4 new featues + * depth clipping option (original idea by [@Extraltodeus](https://github.com/Extraltodeus)) + * by popular demand, 3D-Photo-Inpainting is now implemented + * generate inpainted 3D mesh (PLY) and videos of said mesh +### 0.3.3 bugfix and new midas models + * updated to midas 3.1, bringing 2 new depth models (the 512 one eats VRAM for breakfast!) + * fix Next-ViT dependency issue for new installs + * extension no longer clones repositories, all dependencies are now contained in the extension +### 0.3.2 new feature and bugfixes + * several bug fixes for apple silicon and other machines without cuda + * NEW Stereo Image Generation techniques for gap filling by [@semjon00](https://github.com/semjon00) using polylines. (See [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56)) Significant improvement in quality. +### 0.3.1 bugfix + * small speed increase for anaglyph creation + * clone midas repo before midas 3.1 to fix issue (see [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/55#issue-1510266008)) +### 0.3.0 improved stereo image generation + * New improved technique for generating stereo images and balancing distortion between eyes by [@semjon00](https://github.com/semjon00) (See [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/51)) + * Substantial speedup of stereo image generation code using numba JIT +### 0.2.9 new feature + * 3D Stereo (side-by-side) and red/cyan anaglyph image generation. + (Thanks to [@sina-masoud-ansari](https://github.com/sina-masoud-ansari) for the tip! Discussion [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/discussions/45)) +### 0.2.8 bugfix + * boost (pix2pix) now also able to compute on cpu + * res101 able to compute on cpu +### 0.2.7 separate tab + * Depth Tab now available for easier stand-alone (batch) processing +### 0.2.6 ui layout and settings + * added link to repo so more people find their way to the instructions. + * boost rmax setting +### 0.2.5 bugfix + * error checking on model download (now with progressbar) +### 0.2.4 high resolution depthmaps + * multi-resolution merging is now implemented, significantly improving results! + * res101 can now also compute on CPU +### 0.2.3 bugfix + * path error on linux fixed +### 0.2.2 new features + * added (experimental) support for AdelaiDepth/LeReS (GPU Only!) + * new option to view depthmap as heatmap + * optimised ui layout +### 0.2.1 bugfix + * Correct seed is now used in filename and pnginfo when running batches. (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/35)) +### 0.2.0 upgrade + * the script is now an extension, enabling auto installation. +### 0.1.9 bugfixes + * sd model moved to system memory while computing depthmap + * memory leak/fragmentation issue fixed + * recover from out of memory error +### 0.1.8 new options + * net size can now be set as width and height, option to match input size, sliders now have the same range as generation parameters. (see usage below) + * better error handling +### 0.1.7 bugfixes + * batch img2img now works (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/21#issuecomment-1306445056)) + * generation parameters now only saved when enabled in settings + * model memory freed explicitly at end of script +### 0.1.6 new option + * option to invert depthmap (black=near, white=far), as required by some viewers. +### 0.1.5 bugfix + * saving as any format other than PNG now always produces an 8 bit, 3 channel RGB image. A single channel 16 bit image is only supported when saving as PNG. (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/15#issuecomment-1304909019)) +### 0.1.4 update + * added support for `--no-half`. Now also works with cards that don't support half precision like GTX 16xx. ([verified](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/12#issuecomment-1304656398)) +### 0.1.3 bugfix + * bugfix where some controls where not visible (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/11#issuecomment-1304400537)) +### 0.1.2 new option + * network size slider. higher resolution depth maps (see usage below) +### 0.1.1 bugfixes + * overflow issue (see [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/10) for details and examples of artifacts) + * when not combining, depthmap is now saved as single channel 16 bit +### 0.1.0 + * initial version: script mode, supports generating depthmaps with 4 different midas models \ No newline at end of file diff --git a/README.md b/README.md index 4de5f5e..a59e9ed 100644 --- a/README.md +++ b/README.md @@ -20,117 +20,6 @@ video by [@graemeniedermayer](https://github.com/graemeniedermayer), more exampl ![](https://user-images.githubusercontent.com/54073010/210012661-ef07986c-2320-4700-bc54-fad3899f0186.png) images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56#issuecomment-1367596463). -## Changelog -* v0.4.2 - * added UI options for 2 additional rembg models - * heatmap generation UI option is hidden - if you want to use it, please activate it in the extension settings - * other bugfixes -* v0.4.1 standalone mode - * ability to run DepthMap without WebUI (Use main.py. Make sure all the dependencies are installed. The support is not feature-complete.) - * NormalMap generation -* v0.4.0 large code refactor - * UI improvements - * improved Batch from Directory, Clip and renormalize DepthMap - * slightly changed the behaviour of various options - * extension may partially work even if some of the dependencies are unmet -* v0.3.12 - * Fixed stereo image generation - * Other bugfixes -* v0.3.11 - * 3D model viewer (Experimental!) - * simple and fast (occluded) 3D mesh generation, support for equirectangular projection - (accurate results with ZoeDepth models only, no boost, no custom maps) - * default output format is now obj for inpainted mesh and simple mesh -* v0.3.10 - * ZoeDepth support (with boost), 3 new models, best results so far - * better heatmap -* v0.3.9 - * use existing/custom depthmaps in output dir for batch mode - * custom depthmap support for single file - * wavefront obj output support for inpainted mesh (enabled in settings) - * option to generate all stereo formats at once - * bugfix: convert single channel input image to rgb - * renamed midas imports to fix conflict with deforum - * ui cleanup -* v0.3.8 bugfix - * bugfix in remove background path -* v0.3.7 new features - * [rembg](https://github.com/danielgatis/rembg) Remove Background [PR](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/78) by [@graemeniedermayer](https://github.com/graemeniedermayer) merged - * setting to flip Left/Right SBS images - * added missing parameter for 3d inpainting (repeat_inpaint_edge) - * option to generate demo videos with mesh -* v0.3.6 new feature - * implemented binary ply file format for the inpainted 3D mesh, big reduction in filesize and save/load times. - * added progress indicators to the inpainting process -* v0.3.5 bugfix - * create path to 3dphoto models before download (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/76)) -* v0.3.4 new featues - * depth clipping option (original idea by [@Extraltodeus](https://github.com/Extraltodeus)) - * by popular demand, 3D-Photo-Inpainting is now implemented - * generate inpainted 3D mesh (PLY) and videos of said mesh -* v0.3.3 bugfix and new midas models - * updated to midas 3.1, bringing 2 new depth models (the 512 one eats VRAM for breakfast!) - * fix Next-ViT dependency issue for new installs - * extension no longer clones repositories, all dependencies are now contained in the extension -* v0.3.2 new feature and bugfixes - * several bug fixes for apple silicon and other machines without cuda - * NEW Stereo Image Generation techniques for gap filling by [@semjon00](https://github.com/semjon00) using polylines. (See [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56)) Significant improvement in quality. -* v0.3.1 bugfix - * small speed increase for anaglyph creation - * clone midas repo before midas 3.1 to fix issue (see [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/55#issue-1510266008)) -* v0.3.0 improved stereo image generation - * New improved technique for generating stereo images and balancing distortion between eyes by [@semjon00](https://github.com/semjon00) (See [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/51)) - * Substantial speedup of stereo image generation code using numba JIT -* v0.2.9 new feature - * 3D Stereo (side-by-side) and red/cyan anaglyph image generation. - (Thanks to [@sina-masoud-ansari](https://github.com/sina-masoud-ansari) for the tip! Discussion [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/discussions/45)) -* v0.2.8 bugfix - * boost (pix2pix) now also able to compute on cpu - * res101 able to compute on cpu -* v0.2.7 separate tab - * Depth Tab now available for easier stand-alone (batch) processing -* v0.2.6 ui layout and settings - * added link to repo so more people find their way to the instructions. - * boost rmax setting -* v0.2.5 bugfix - * error checking on model download (now with progressbar) -* v0.2.4 high resolution depthmaps - * multi-resolution merging is now implemented, significantly improving results! - * res101 can now also compute on CPU -* v0.2.3 bugfix - * path error on linux fixed -* v0.2.2 new features - * added (experimental) support for AdelaiDepth/LeReS (GPU Only!) - * new option to view depthmap as heatmap - * optimised ui layout -* v0.2.1 bugfix - * Correct seed is now used in filename and pnginfo when running batches. (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/35)) -* v0.2.0 upgrade - * the script is now an extension, enabling auto installation. -* v0.1.9 bugfixes - * sd model moved to system memory while computing depthmap - * memory leak/fragmentation issue fixed - * recover from out of memory error -* v0.1.8 new options - * net size can now be set as width and height, option to match input size, sliders now have the same range as generation parameters. (see usage below) - * better error handling -* v0.1.7 bugfixes - * batch img2img now works (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/21#issuecomment-1306445056)) - * generation parameters now only saved when enabled in settings - * model memory freed explicitly at end of script -* v0.1.6 new option - * option to invert depthmap (black=near, white=far), as required by some viewers. -* v0.1.5 bugfix - * saving as any format other than PNG now always produces an 8 bit, 3 channel RGB image. A single channel 16 bit image is only supported when saving as PNG. (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/15#issuecomment-1304909019)) -* v0.1.4 update - * added support for `--no-half`. Now also works with cards that don't support half precision like GTX 16xx. ([verified](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/12#issuecomment-1304656398)) -* v0.1.3 bugfix - * bugfix where some controls where not visible (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/11#issuecomment-1304400537)) -* v0.1.2 new option - * network size slider. higher resolution depth maps (see usage below) -* v0.1.1 bugfixes - * overflow issue (see [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/10) for details and examples of artifacts) - * when not combining, depthmap is now saved as single channel 16 bit ## Install instructions The script is now also available to install from the `Available` subtab under the `Extensions` tab in the WebUI. @@ -195,40 +84,7 @@ If you often get out of memory errors when computing a depthmap on GPU while usi - You can run the MiDaS network on their colab linked here https://pytorch.org/hub/intelisl_midas_v2/ - You can run BoostingMonocularDepth on their colab linked here : https://colab.research.google.com/github/compphoto/BoostingMonocularDepth/blob/main/Boostmonoculardepth.ipynb -## Viewing - -### For viewing on 2D displays - -* There is the excellent [depthy](https://github.com/panrafal/depthy) by Rafał Lindemann. LIVE link : [https://depthy.stamina.pl/](https://depthy.stamina.pl/) -(Instructions: Drag the rgb image into the window, then select Load depthmap, and drag the depthmap into the dialog inside the window.) Generates GIF and video. - -* The [depth-player](https://github.com/spite/depth-player) by [@spite](https://github.com/spite) can load rgb and depthmap images and export a Wavefront OBJ file of a displaced plane mesh with the rgb image applied as texture. LIVE link : [https://depthplayer.ugocapeto.com/](https://depthplayer.ugocapeto.com/) Thanks to [@AugmentedRealityCat](https://github.com/AugmentedRealityCat) for the tip. - -* Simple interactive depthmap viewer using three ([source](https://github.com/thygate/depthmap-viewer-three)). LIVE link : [https://thygate.github.io/depthmap-viewer-three](https://thygate.github.io/depthmap-viewer-three) (Instructions: Drag a combined-rgb-and-depth-horizontally image into the window to view it) - -### For viewing on 3D devices - -* SBS Stereo images can easily be viewed in 3D on VR devices, even cheap ones that use a smartphone like [Google Cardboard](https://arvr.google.com/cardboard/). To view an SBS image, you may simply display it on the phone screen and then insert the phone into the headset. A more convenient option may be to stream the picture from the computer screen to the phone using Sunshine. You may want to change resolution to match phone's aspect ratio. If you decide to buy a headset, pay attention to the lens' size - usually headsets with larger lenses work the best. - -* Simple interactive depthmap viewer for Looking Glass using three. LIVE link : [https://thygate.github.io/depthmap-viewer-three-lookingglass](https://thygate.github.io/depthmap-viewer-three-lookingglass) (Instructions: Drag a combined-rgb-and-depth-horizontally image into the window to view it) - -* Unity3D project to view the depthmaps on Looking Glass in realtime as images are generated. Leave a message in the discussion section if you want me to publish it too. - -### Blender -* Blender depthmap import addon by [@Ladypoly](https://github.com/LadyPoly) ([comment](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/4252#discussioncomment-4333661)). -Download the addon here : [importdepthmap_1.0.3.zip](https://github.com/AUTOMATIC1111/stable-diffusion-webui/files/10194086/importdepthmap_1.0.3.zip) (Blender 3.3.0 or newer) -Demonstration videos : (1) https://www.youtube.com/watch?v=vfu5yzs_2EU , (2) https://www.youtube.com/watch?v=AeDngG9kQNI - - [![video](https://img.youtube.com/vi/vfu5yzs_2EU/1.jpg)](https://www.youtube.com/watch?v=vfu5yzs_2EU) [![video](https://img.youtube.com/vi/AeDngG9kQNI/2.jpg)](https://www.youtube.com/watch?v=AeDngG9kQNI) - -* To view the 3D-inpainted mesh in blender: - * import the PLY file - * Set camera to origin (0, 0, 0) and pointing up to align it with the mesh - * adjust camera FOV to match the mesh - * Add a 'Color Attribute' Node and connect it to the shader color input - ## Forks and Related -* Generate normal maps from depth maps : [stable-diffusion-webui-normalmap-script](https://github.com/graemeniedermayer/stable-diffusion-webui-normalmap-script) by [@graemeniedermayer](https://github.com/graemeniedermayer), also check his [clothseg](https://github.com/graemeniedermayer/clothseg) extension. * Several scripts by [@Extraltodeus](https://github.com/Extraltodeus) using depth maps : https://github.com/Extraltodeus?tab=repositories @@ -236,7 +92,7 @@ Demonstration videos : (1) https://www.youtube.com/watch?v=vfu5yzs_2EU , (2) htt ## Acknowledgements -This project uses code and information from following papers : +This project relies on code and information from following papers : MiDaS : From 7ab083e065b2c74e038c1f92eea19774889696e2 Mon Sep 17 00:00:00 2001 From: Semjon Kravtsenko Date: Fri, 28 Jul 2023 16:31:42 +0300 Subject: [PATCH 09/16] Bump version, add changelog Also updated some parts of the README. Other parts still need updating. --- CHANGELOG.md | 5 +++++ README.md | 52 +++++++++++++++++++++++------------------------- main.py | 5 ++--- src/common_ui.py | 7 ++++--- src/misc.py | 2 +- 5 files changed, 37 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1d68f7..7e2c8f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ ## Changelog +### 0.4.3 video processing tab + * Added an option to process videos directly from a video file. This leads to better results than batch-processing individual frames of a video. Allows generating depthmap videos, that can be used in further generations as custom depthmap videos. + * UI improvements. + * Extra stereoimage generation modes - enable in extension settings if you want to use them. + * New stereoimage generation parameter - offset exponent. Setting it to 1 may produce more realistic outputs. ### 0.4.2 * Added UI options for 2 additional rembg models. * Heatmap generation UI option is hidden - if you want to use it, please activate it in the extension settings. diff --git a/README.md b/README.md index a59e9ed..1137ae3 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # High Resolution Depth Maps for Stable Diffusion WebUI -This script is an addon for [AUTOMATIC1111's Stable Diffusion WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) that creates `depth maps`, and now also `3D stereo image pairs` as side-by-side or anaglyph from a single image. The result can be viewed on 3D or holographic devices like VR headsets or [Looking Glass](https://lookingglassfactory.com/) displays, used in Render- or Game- Engines on a plane with a displacement modifier, and maybe even 3D printed. +This program is an addon for [AUTOMATIC1111's Stable Diffusion WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) that creates depth maps. Using either generated or custom depth maps, it can also create 3D stereo image pairs (as side-by-side or anaglyph), normalmaps and 3D meshes. The outputs of the script can be viewed directly or used as an asset for a 3D engine. Please see [wiki](https://github.com/thygate/stable-diffusion-webui-depthmap-script/wiki/Viewing-Results) to learn more. The program has integration with [Rembg](https://github.com/danielgatis/rembg). It also supports batch processing, processing of videos, and can also be run in standalone mode, without Stable Diffusion WebUI. -To generate realistic depth maps `from a single image`, this script uses code and models from the [MiDaS](https://github.com/isl-org/MiDaS) and [ZoeDepth](https://github.com/isl-org/ZoeDepth) repositories by Intel ISL, or LeReS from the [AdelaiDepth](https://github.com/aim-uofa/AdelaiDepth) repository by Advanced Intelligent Machines. Multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) is used to generate high resolution depth maps. +To generate realistic depth maps from individual images, this script uses code and models from the [MiDaS](https://github.com/isl-org/MiDaS) and [ZoeDepth](https://github.com/isl-org/ZoeDepth) repositories by Intel ISL, or LeReS from the [AdelaiDepth](https://github.com/aim-uofa/AdelaiDepth) repository by Advanced Intelligent Machines. Multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) is used to generate high resolution depth maps. -3D stereo, and red/cyan anaglyph images are generated using code from the [stereo-image-generation](https://github.com/m5823779/stereo-image-generation) repository. Thanks to [@sina-masoud-ansari](https://github.com/sina-masoud-ansari) for the tip! Discussion [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/discussions/45). Improved techniques for generating stereo images and balancing distortion between eyes by [@semjon00](https://github.com/semjon00), see [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/51) and [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56). +Stereoscopic images are created using a custom-written algorithm. -3D Photography using Context-aware Layered Depth Inpainting by Virginia Tech Vision and Learning Lab , or [3D-Photo-Inpainting](https://github.com/vt-vl-lab/3d-photo-inpainting) is used to generate a `3D inpainted mesh` and render `videos` from said mesh. +3D Photography using Context-aware Layered Depth Inpainting by Virginia Tech Vision and Learning Lab, or [3D-Photo-Inpainting](https://github.com/vt-vl-lab/3d-photo-inpainting) is used to generate a `3D inpainted mesh` and render `videos` from said mesh. -[Rembg](https://github.com/danielgatis/rembg) by [@DanielGatis](https://github.com/danielgatis) support added by [@graemeniedermayer](https://github.com/graemeniedermayer), using [U-2-Net](https://github.com/xuebinqin/U-2-Net) by [@xuebinqin](https://github.com/xuebinqin) to remove backgrounds. +Rembg uses [U-2-Net](https://github.com/xuebinqin/U-2-Net) and [IS-Net](https://github.com/xuebinqin/DIS). ## Depthmap Examples [![screenshot](examples.png)](https://raw.githubusercontent.com/thygate/stable-diffusion-webui-depthmap-script/main/examples.png) @@ -20,32 +20,30 @@ video by [@graemeniedermayer](https://github.com/graemeniedermayer), more exampl ![](https://user-images.githubusercontent.com/54073010/210012661-ef07986c-2320-4700-bc54-fad3899f0186.png) images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56#issuecomment-1367596463). - ## Install instructions -The script is now also available to install from the `Available` subtab under the `Extensions` tab in the WebUI. +### As extension +The script can be installed directly from WebUI. Please navigate to `Extensions` tab, then click `Available`, `Load from` and then install the `Depth Maps` extension. Alternatively, the extension can be installed from URL: `https://github.com/thygate/stable-diffusion-webui-depthmap-script`. ### Updating In the WebUI, in the `Extensions` tab, in the `Installed` subtab, click `Check for Updates` and then `Apply and restart UI`. -### Automatic installation -In the WebUI, in the `Extensions` tab, in the `Install from URL` subtab, enter this repository -`https://github.com/thygate/stable-diffusion-webui-depthmap-script` - and click install and restart. +### Standalone +Clone the repository, install the requirements from `requirements.txt`, launch using `main.py`. ->Model `weights` will be downloaded automatically on first use and saved to /models/midas, /models/leres and /models/pix2pix +>Model weights will be downloaded automatically on their first use and saved to /models/midas, /models/leres and /models/pix2pix. Zoedepth models are stored in torch cache folder. ## Usage -Select the "DepthMap vX.X.X" script from the script selection box in either txt2img or img2img, or go to the Depth tab when using existing images. +Select the "DepthMap" script from the script selection box in either txt2img or img2img, or go to the Depth tab when using existing images. ![screenshot](options.png) -The models can `Compute on` GPU and CPU, use CPU if low on VRAM. +The models can `Compute on` GPU and CPU, use CPU if low on VRAM. -There are seven models available from the `Model` dropdown. For the first model, res101, see [AdelaiDepth/LeReS](https://github.com/aim-uofa/AdelaiDepth/tree/main/LeReS) for more info. The others are the midas models: dpt_beit_large_512, dpt_beit_large_384, dpt_large_384, dpt_hybrid_384, midas_v21, and midas_v21_small. See the [MiDaS](https://github.com/isl-org/MiDaS) repository for more info. The newest dpt_beit_large_512 model was trained on a 512x512 dataset but is VERY VRAM hungry. +There are ten models available from the `Model` dropdown. For the first model, res101, see [AdelaiDepth/LeReS](https://github.com/aim-uofa/AdelaiDepth/tree/main/LeReS) for more info. The others are the midas models: dpt_beit_large_512, dpt_beit_large_384, dpt_large_384, dpt_hybrid_384, midas_v21, and midas_v21_small. See the [MiDaS](https://github.com/isl-org/MiDaS) repository for more info. The newest dpt_beit_large_512 model was trained on a 512x512 dataset but is VERY VRAM hungry. The last three models are [ZoeDepth](https://github.com/isl-org/ZoeDepth) models. Net size can be set with `net width` and `net height`, or will be the same as the input image when `Match input size` is enabled. There is a trade-off between structural consistency and high-frequency details with respect to net size (see [observations](https://github.com/compphoto/BoostingMonocularDepth#observations)). -`Boost` will enable multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) and will significantly improve the results. Mitigating the observations mentioned above. Net size is ignored when enabled. Best results with res101. +`Boost` will enable multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) and will significantly improve the results, mitigating the observations mentioned above, and the cost of much larger compute time. Best results with res101. `Clip and renormalize` allows for clipping the depthmap on the `near` and `far` side, the values in between will be renormalized to fit the available range. Set both values equal to get a b&w mask of a single depth plane at that value. This option works on the 16-bit depthmap and allows for 1000 steps to select the clip values. @@ -55,8 +53,6 @@ Regardless of global settings, `Save DepthMap` will always save the depthmap in To see the generated output in the webui `Show DepthMap` should be enabled. When using Batch img2img this option should also be enabled. -To make the depthmap easier to analyze for human eyes, `Show HeatMap` shows an extra image in the WebUI that has a color gradient applied. It is not saved. - When `Combine into one image` is enabled, the depthmap will be combined with the original image, the orientation can be selected with `Combine axis`. When disabled, the depthmap will be saved as a 16 bit single channel PNG as opposed to a three channel (RGB), 8 bit per channel image when the option is enabled. When either `Generate Stereo` or `Generate anaglyph` is enabled, a stereo image pair will be generated. `Divergence` sets the amount of 3D effect that is desired. `Balance between eyes` determines where the (inevitable) distortion from filling up gaps will end up, -1 Left, +1 Right, and 0 balanced. @@ -78,17 +74,19 @@ If you often get out of memory errors when computing a depthmap on GPU while usi ## FAQ * `Can I use this on existing images ?` - - Yes, you can now use the Depth tab to easily process existing images. - - Yes, in img2img, set denoising strength to 0. This will effectively skip stable diffusion and use the input image. You will still have to set the correct size, and need to select `Crop and resize` instead of `Just resize` when the input image resolution does not match the set size perfectly. - * `Can I run this on google colab ?` + - Yes, you can use the Depth tab to easily process existing images. + - Another way of doing this would be to use img2img with denoising strength to 0. This will effectively skip stable diffusion and use the input image. You will still have to set the correct size, and need to select `Crop and resize` instead of `Just resize` when the input image resolution does not match the set size perfectly. + * `Can I run this on Google Colab ?` - You can run the MiDaS network on their colab linked here https://pytorch.org/hub/intelisl_midas_v2/ - You can run BoostingMonocularDepth on their colab linked here : https://colab.research.google.com/github/compphoto/BoostingMonocularDepth/blob/main/Boostmonoculardepth.ipynb - -## Forks and Related - -* Several scripts by [@Extraltodeus](https://github.com/Extraltodeus) using depth maps : https://github.com/Extraltodeus?tab=repositories - -### More updates soon .. Feel free to comment and share in the discussions. + - Running this program on Colab is not officially supported, but it may work. Please look for more suitable ways of running this. If you still decide to try, standalone installation may be easier to manage. + * `What other depth-related projects could I check out?` + - Several [scripts](https://github.com/Extraltodeus?tab=repositories) by [@Extraltodeus](https://github.com/Extraltodeus) using depth maps. + - Geo11 and [Depth3D](https://github.com/BlueSkyDefender/Depth3D) for playing existing games in 3D. + * `How can I know what changed in the new version of the script?` + - You can see the git history log or refer to the `CHANGELOG.md` file. + +### Feel free to comment and share in the discussions! ## Acknowledgements diff --git a/main.py b/main.py index 8bac4fd..a05cb2d 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,4 @@ # This launches DepthMap without the AUTOMATIC1111/stable-diffusion-webui -# If DepthMap is installed as an extension, -# you may want to change the working directory to the stable-diffusion-webui root. import argparse import os @@ -11,7 +9,8 @@ def maybe_chdir(): """Detects if DepthMap was installed as a stable-diffusion-webui script, but run without current directory set to - the stable-diffusion-webui root. Changes current directory if needed, to aviod clutter.""" + the stable-diffusion-webui root. Changes current directory if needed. + This is to avoid re-downloading models and putting results into a wrong folder.""" try: file_path = pathlib.Path(__file__) path = file_path.parts diff --git a/src/common_ui.py b/src/common_ui.py index 93b500c..504ceed 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -222,9 +222,10 @@ def open_folder_action(): def depthmap_mode_video(inp): - gr.HTML(value="Single video mode allows generating videos from videos. Every frame of the video is processed, " - "please adjust generation settings, so that generation is not too slow. For the best results, " - "Use a zoedepth model, since they provide the highest level of temporal coherency.") + gr.HTML(value="Single video mode allows generating videos from videos. Please " + "keep in mind that all the frames of the video need to be processed - therefore it is important to " + "pick settings so that the generation is not too slow. For the best results, " + "use a zoedepth model, since they provide the highest level of coherency between frames.") inp += gr.File(elem_id='depthmap_vm_input', label="Video or animated file", file_count="single", interactive=True, type="file") inp += gr.Dropdown(elem_id="depthmap_vm_smoothening_mode", label="Smoothening", type="value", choices=['none']) diff --git a/src/misc.py b/src/misc.py index 43a9695..d7ecbce 100644 --- a/src/misc.py +++ b/src/misc.py @@ -24,7 +24,7 @@ def call_git(dir): REPOSITORY_NAME = "stable-diffusion-webui-depthmap-script" SCRIPT_NAME = "DepthMap" -SCRIPT_VERSION = "v0.4.2" +SCRIPT_VERSION = "v0.4.3" SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})" From 02e38d3c4216af666abd02dcd9309c58af8d4790 Mon Sep 17 00:00:00 2001 From: Semjon Kravtsenko Date: Fri, 28 Jul 2023 16:31:50 +0300 Subject: [PATCH 10/16] Add requirements.txt --- .gitignore | 3 ++- install.py | 7 ++++++- requirements.txt | 20 ++++++++++++++++++++ src/misc.py | 15 +++------------ 4 files changed, 31 insertions(+), 14 deletions(-) create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index ba0430d..92afa22 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -__pycache__/ \ No newline at end of file +__pycache__/ +venv/ diff --git a/install.py b/install.py index 7f73937..f546224 100644 --- a/install.py +++ b/install.py @@ -39,7 +39,12 @@ def ensure(module_name, min_version=None): ensure('transforms3d', '0.4.1') ensure('imageio') # 2.4.1 -ensure('imageio-ffmpeg') +try: # Dirty hack to not reinstall every time + importlib_metadata.version('imageio-ffmpeg') +except: + ensure('imageio-ffmpeg') + + if not launch.is_installed("networkx"): launch.run_pip('install install "networkx==2.5"', "networkx requirement for depthmap script") if platform.system() == 'Windows': diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..29a2209 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +# Requirements for running in standalone mode +# First, install the corect version of PyTorch! +# PyTorch Compute Platform must match the configuration of the hardware. + +# pip install -r requirements.txt +torch +gradio>=3.32.0,<4.0 # User UI +timm~=0.6.12 # For midas +matplotlib +trimesh # For creating simple meshes +numba>=0.57.0 # Speeding up CPU stereoimage generation +vispy>=0.13.0 +rembg>=2.0.50 # Remove background +moviepy>=1.0.2,<2.0 +transforms3d>=0.4.1 +imageio>=2.4.1,<3.0 +imageio-ffmpeg +networkx>=2.5 +pyqt5; sys_platform == 'windows' +pyqt6; sys_platform != 'windows' diff --git a/src/misc.py b/src/misc.py index d7ecbce..60134ba 100644 --- a/src/misc.py +++ b/src/misc.py @@ -4,20 +4,11 @@ import builtins def get_commit_hash(): - def call_git(dir): + try: + file_path = pathlib.Path(__file__).parent return subprocess.check_output( [os.environ.get("GIT", "git"), "rev-parse", "HEAD"], - cwd=dir, shell=False, stderr=subprocess.DEVNULL, encoding='utf8').strip()[0:8] - - try: - file_path = pathlib.Path(__file__) - path = file_path.parts - while len(path) > 0 and path[-1] != REPOSITORY_NAME: - path = path[:-1] - if len(path) >= 2 and path[-1] == REPOSITORY_NAME and path[-2] == "extensions": - return call_git(str(pathlib.Path(*path))) - - return call_git(pathlib.Path.cwd().joinpath('extensions/stable-diffusion-webui-depthmap-script/')) + cwd=file_path, shell=False, stderr=subprocess.DEVNULL, encoding='utf8').strip()[0:8] except Exception: return "" From 2d9812a740140a0d17ae40d17efc9059bfaf40aa Mon Sep 17 00:00:00 2001 From: semjon00 Date: Fri, 28 Jul 2023 23:15:54 +0300 Subject: [PATCH 11/16] Implement experimental smoothening for video mode --- src/common_ui.py | 4 +++- src/video_mode.py | 36 ++++++++++++++++++++++++------------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/common_ui.py b/src/common_ui.py index 504ceed..71e41a9 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -228,9 +228,10 @@ def depthmap_mode_video(inp): "use a zoedepth model, since they provide the highest level of coherency between frames.") inp += gr.File(elem_id='depthmap_vm_input', label="Video or animated file", file_count="single", interactive=True, type="file") - inp += gr.Dropdown(elem_id="depthmap_vm_smoothening_mode", label="Smoothening", type="value", choices=['none']) inp += gr.Checkbox(elem_id="depthmap_vm_custom_checkbox", label="Use custom/pregenerated DepthMap video", value=False) + inp += gr.Dropdown(elem_id="depthmap_vm_smoothening_mode", label="Smoothening", + type="value", choices=['none', 'experimental'], value='experimental') inp += gr.File(elem_id='depthmap_vm_custom', file_count="single", interactive=True, type="file", visible=False) with gr.Row(): @@ -239,6 +240,7 @@ def depthmap_mode_video(inp): minimum=1000, value=15000, maximum=50000, step=250) inp.add_rule('depthmap_vm_custom', 'visible-if', 'depthmap_vm_custom_checkbox') + inp.add_rule('depthmap_vm_smoothening_mode', 'visible-if-not', 'depthmap_vm_custom_checkbox') inp.add_rule('depthmap_vm_compress_bitrate', 'visible-if', 'depthmap_vm_compress_checkbox') return inp diff --git a/src/video_mode.py b/src/video_mode.py index 72b2dd6..fc23aa7 100644 --- a/src/video_mode.py +++ b/src/video_mode.py @@ -70,10 +70,10 @@ def frames_to_video(fps, frames, path, name, colorvids_bitrate=None): priority = [('avi', 'png'), ('avi', 'rawvideo'), ('mp4', 'libx264'), ('webm', 'libvpx')] if colorvids_bitrate: priority = reversed(priority) - for format, codec in priority: + for v_format, codec in priority: try: br = f'{colorvids_bitrate}k' if codec not in ['png', 'rawvideo'] else None - clip.write_videofile(os.path.join(path, f"{name}.{format}"), codec=codec, bitrate=br) + clip.write_videofile(os.path.join(path, f"{name}.{v_format}"), codec=codec, bitrate=br) done = True break except: @@ -83,19 +83,31 @@ def frames_to_video(fps, frames, path, name, colorvids_bitrate=None): def process_predicitons(predictions, smoothening='none'): + def global_scaling(objs, a=None, b=None): + """Normalizes objs, but uses (a, b) instead of (minimum, maximum) value of objs, if supplied""" + normalized = [] + min_value = a if a is not None else min([obj.min() for obj in objs]) + max_value = b if b is not None else max([obj.max() for obj in objs]) + for obj in objs: + normalized += [(obj - min_value) / (max_value - min_value)] + return normalized + print('Processing generated depthmaps') - # TODO: Smart normalizing (drop 0.001% of top and bottom values from the video/every cut) - # TODO: Smoothening between frames (use splines) # TODO: Detect cuts and process segments separately - if smoothening == 'none': - input_depths = [] - preds_min_value = min([pred.min() for pred in predictions]) - preds_max_value = max([pred.max() for pred in predictions]) - for pred in predictions: - norm = (pred - preds_min_value) / (preds_max_value - preds_min_value) # normalize to [0; 1] - input_depths += [norm] - return input_depths + return global_scaling(predictions) + elif smoothening == 'experimental': + processed = [] + clip = lambda val: min(max(0, val), len(predictions) - 1) + for i in range(len(predictions)): + f = np.zeros_like(predictions[i]) + for u, mul in enumerate([0.10, 0.20, 0.40, 0.20, 0.10]): # Eyeballed it, math person please fix this + f += mul * predictions[clip(i + (u - 2))] + processed += [f] + # This could have been deterministic monte carlo... Oh well, this version is faster. + a, b = np.percentile(np.stack(processed), [0.5, 99.5]) + return global_scaling(predictions, a, b) + return predictions def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None, smoothening='none'): From ab5bfffd51fc92de10d68393a743161da857ec8b Mon Sep 17 00:00:00 2001 From: semjon00 Date: Sat, 29 Jul 2023 09:59:39 +0300 Subject: [PATCH 12/16] Video mode fixes --- src/core.py | 4 ++-- src/video_mode.py | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/core.py b/src/core.py index d07e273..5001bdd 100644 --- a/src/core.py +++ b/src/core.py @@ -44,7 +44,7 @@ def convert_to_i16(arr): # uint16 conversion uses round-down, therefore values should be [0; 2**16) numbytes = 2 max_val = (2 ** (8 * numbytes)) - out = np.clip(arr * max_val, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing + out = np.clip(arr * max_val + 0.0001, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing return out.astype("uint16") def convert_i16_to_rgb(image, like): @@ -252,7 +252,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp yield count, 'depth', Image.fromarray(img_output) if inp[go.GEN_STEREO]: - print("Generating stereoscopic images..") + # print("Generating stereoscopic image(s)..") stereoimages = create_stereoimages( inputimages[count], img_output, inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION], diff --git a/src/video_mode.py b/src/video_mode.py index fc23aa7..4b40cc0 100644 --- a/src/video_mode.py +++ b/src/video_mode.py @@ -21,9 +21,11 @@ def open_path_as_images(path, maybe_depthvideo=False): frames.append(img.convert('RGB')) return 1000 / img.info['duration'], frames if suffix in ['.avi'] and maybe_depthvideo: - import imageio_ffmpeg - gen = imageio_ffmpeg.read_frames(path) try: + import imageio_ffmpeg + # Suppose there are in fact 16 bits per pixel + # If this is not the case, this is not a 16-bit depthvideo, so no need to process it this way + gen = imageio_ffmpeg.read_frames(path, pix_fmt='gray16le', bits_per_pixel=16) video_info = next(gen) if video_info['pix_fmt'] == 'gray16le': width, height = video_info['size'] @@ -31,12 +33,13 @@ def open_path_as_images(path, maybe_depthvideo=False): for frame in gen: # Not sure if this is implemented somewhere else result = np.frombuffer(frame, dtype='uint16') - result.shape = (height, width * 3 // 2) # Why does it work? I don't remotely have any idea. + result.shape = (height, width) # Why does it work? I don't remotely have any idea. frames += [Image.fromarray(result)] # TODO: Wrapping frames into Pillow objects is wasteful return video_info['fps'], frames finally: - gen.close() + if 'gen' in locals(): + gen.close() if suffix in ['.webm', '.mp4', '.avi']: from moviepy.video.io.VideoFileClip import VideoFileClip clip = VideoFileClip(path) @@ -45,7 +48,7 @@ def open_path_as_images(path, maybe_depthvideo=False): return clip.fps, frames else: try: - return 1000, [Image.open(path)] + return 1, [Image.open(path)] except Exception as e: raise Exception(f"Probably an unsupported file format: {suffix}") from e @@ -128,8 +131,8 @@ def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None, first_pass_inp[go.DO_OUTPUT_DEPTH.name] = False gen_obj = core.core_generation_funnel(None, input_images, None, None, first_pass_inp) - predictions = [x[2] for x in list(gen_obj)] - input_depths = process_predicitons(predictions, smoothening) + input_depths = [x[2] for x in list(gen_obj)] + input_depths = process_predicitons(input_depths, smoothening) else: print('Using custom depthmap video') cdm_fps, input_depths = open_path_as_images(os.path.abspath(custom_depthmap.name), maybe_depthvideo=True) @@ -153,4 +156,5 @@ def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None, frames_to_video(fps, imgs, outpath, f"depthmap-{backbone.get_next_sequence_number()}-{basename}", colorvids_bitrate) print('All done. Video(s) saved!') - return 'Video generated!' if len(gens) == 1 else 'Videos generated!' + return '

Videos generated

' if len(gens) > 1 else '

Video generated

' if len(gens) == 1 \ + else '

Nothing generated - please check the settings and try again

' From 3fddffb97b8a501b69c137c4b7b93493705f6813 Mon Sep 17 00:00:00 2001 From: grae Date: Fri, 28 Jul 2023 22:52:29 -0600 Subject: [PATCH 13/16] api changes updates Video mode fixes Video mode fixes --- scripts/depthmap_api.py | 72 +----------------------- src/api/api_constants.py | 32 +++++++++++ src/api/api_extension.py | 114 ++++++++++++++++++++++++++++++++++++++ src/api/api_standalone.py | 0 src/core.py | 4 +- src/video_mode.py | 20 ++++--- 6 files changed, 162 insertions(+), 80 deletions(-) create mode 100644 src/api/api_constants.py create mode 100644 src/api/api_extension.py create mode 100644 src/api/api_standalone.py diff --git a/scripts/depthmap_api.py b/scripts/depthmap_api.py index f731dea..46d7bec 100644 --- a/scripts/depthmap_api.py +++ b/scripts/depthmap_api.py @@ -2,80 +2,12 @@ # (will only be on with --api starting option) # Currently no API stability guarantees are provided - API may break on any new commit. -import numpy as np -from fastapi import FastAPI, Body -from fastapi.exceptions import HTTPException -from PIL import Image - -import gradio as gr - -from modules.api.models import List, Dict -from modules.api import api - -from src.core import core_generation_funnel -from src.misc import SCRIPT_VERSION from src import backbone -from src.common_constants import GenerationOptions as go - - -def encode_to_base64(image): - if type(image) is str: - return image - elif type(image) is Image.Image: - return api.encode_pil_to_base64(image) - elif type(image) is np.ndarray: - return encode_np_to_base64(image) - else: - return "" - - -def encode_np_to_base64(image): - pil = Image.fromarray(image) - return api.encode_pil_to_base64(pil) - - -def to_base64_PIL(encoding: str): - return Image.fromarray(np.array(api.decode_base64_to_image(encoding)).astype('uint8')) - - -def depth_api(_: gr.Blocks, app: FastAPI): - @app.get("/depth/version") - async def version(): - return {"version": SCRIPT_VERSION} - - @app.get("/depth/get_options") - async def get_options(): - return {"options": sorted([x.name.lower() for x in go])} - - # TODO: some potential inputs not supported (like custom depthmaps) - @app.post("/depth/generate") - async def process( - depth_input_images: List[str] = Body([], title='Input Images'), - options: Dict[str, object] = Body("options", title='Generation options'), - ): - # TODO: restrict mesh options - - if len(depth_input_images) == 0: - raise HTTPException(status_code=422, detail="No images supplied") - print(f"Processing {str(len(depth_input_images))} images trough the API") - - pil_images = [] - for input_image in depth_input_images: - pil_images.append(to_base64_PIL(input_image)) - outpath = backbone.get_outpath() - gen_obj = core_generation_funnel(outpath, pil_images, None, None, options) - - results_based = [] - for count, type, result in gen_obj: - if not isinstance(result, Image.Image): - continue - results_based += [encode_to_base64(result)] - return {"images": results_based, "info": "Success"} - +from src.api import api_extension try: import modules.script_callbacks as script_callbacks if backbone.get_cmd_opt('api', False): - script_callbacks.on_app_started(depth_api) + script_callbacks.on_app_started(api_extension.depth_api) except: print('DepthMap API could not start') diff --git a/src/api/api_constants.py b/src/api/api_constants.py new file mode 100644 index 0000000..0c0517e --- /dev/null +++ b/src/api/api_constants.py @@ -0,0 +1,32 @@ + +api_options = { + 'outputs': ["depth"], # list of outputs to send in response. examples ["depth", "normalmap", 'heatmap', "normal", 'background_removed'] etc + #'conversions': "", #TODO implement. it's a good idea to give some options serverside for because often that's challenging in js/clientside + 'save':"" #TODO implement. To save on local machine. Can be very helpful for debugging. +} + +# TODO: These are intended to be temporary +api_defaults={ + "BOOST": False, + "NET_SIZE_MATCH": True +} + +#These are enforced after user inputs +api_forced={ + "GEN_SIMPLE_MESH": False, + "GEN_INPAINTED_MESH": False +} + +#model diction TODO find a way to remove without forcing people do know indexes of models +models_to_index = { + 'res101':0, + 'dpt_beit_large_512 (midas 3.1)':1, + 'dpt_beit_large_384 (midas 3.1)':2, + 'dpt_large_384 (midas 3.0)':3, + 'dpt_hybrid_384 (midas 3.0)':4, + 'midas_v21':5, + 'midas_v21_small':6, + 'zoedepth_n (indoor)':7, + 'zoedepth_k (outdoor)':8, + 'zoedepth_nk':9 +} \ No newline at end of file diff --git a/src/api/api_extension.py b/src/api/api_extension.py new file mode 100644 index 0000000..954b60c --- /dev/null +++ b/src/api/api_extension.py @@ -0,0 +1,114 @@ +# Non-public API. Don't host publicly - SECURITY RISKS! +# (will only be on with --api starting option) +# Currently no API stability guarantees are provided - API may break on any new commit. + +import numpy as np +from fastapi import FastAPI, Body +from fastapi.exceptions import HTTPException +from PIL import Image +from itertools import tee +import json + +import gradio as gr + +from modules.api.models import List, Dict +from modules.api import api + +from src.common_constants import GenerationOptions as go +from src.core import core_generation_funnel, CoreGenerationFunnelInp +from src import backbone +from src.misc import SCRIPT_VERSION +from src.api.api_constants import api_defaults, api_forced, api_options, models_to_index + +def encode_to_base64(image): + if type(image) is str: + return image + elif type(image) is Image.Image: + return api.encode_pil_to_base64(image) + elif type(image) is np.ndarray: + return encode_np_to_base64(image) + else: + return "" + +def encode_np_to_base64(image): + pil = Image.fromarray(image) + return api.encode_pil_to_base64(pil) + +def to_base64_PIL(encoding: str): + return Image.fromarray(np.array(api.decode_base64_to_image(encoding)).astype('uint8')) + + +def api_gen(input_images, client_options): + + default_options = CoreGenerationFunnelInp(api_defaults).values + + #TODO try-catch type errors here + for key, value in client_options.items(): + if key == "model_type": + default_options[key] = models_to_index(value) + continue + default_options[key] = value + + for key, value in api_forced.items(): + default_options[key.lower()] = value + + print(f"Processing {str(len(input_images))} images through the API") + + print(default_options) + + pil_images = [] + for input_image in input_images: + pil_images.append(to_base64_PIL(input_image)) + outpath = backbone.get_outpath() + gen_obj = core_generation_funnel(outpath, pil_images, None, None, default_options) + return gen_obj + +def depth_api(_: gr.Blocks, app: FastAPI): + @app.get("/depth/version") + async def version(): + return {"version": SCRIPT_VERSION} + + @app.get("/depth/get_options") + async def get_options(): + return { + "gen_options": [x.name.lower() for x in go], + "api_options": api_options + } + + @app.post("/depth/generate") + async def process( + input_images: List[str] = Body([], title='Input Images'), + generate_options: Dict[str, object] = Body({}, title='Generation options', options= [x.name.lower() for x in go]), + api_options: Dict[str, object] = Body({'outputs': ["depth"]}, title='Api options', options= api_options) + ): + + if len(input_images)==0: + raise HTTPException(status_code=422, detail="No images supplied") + + gen_obj = api_gen(input_images, generate_options) + + #NOTE Work around yield. (Might not be necessary, not sure if yield caches) + _, gen_obj = tee (gen_obj) + + # If no outputs are specified assume depthmap is expected + if len(api_options["outputs"])==0: + api_options["outputs"] = ["depth"] + + results_based = {} + for output_type in api_options["outputs"]: + results_per_type = [] + + for count, img_type, result in gen_obj: + if img_type == output_type: + results_per_type.append( encode_to_base64(result) ) + + # simpler output for simpler request. + if api_options["outputs"] == ["depth"]: + return {"images": results_per_type, "info": "Success"} + + if len(results_per_type)==0: + results_based[output_type] = "Check options. no img-type of " + str(type) + " where generated" + else: + results_based[output_type] = results_per_type + return {"images": results_based, "info": "Success"} + \ No newline at end of file diff --git a/src/api/api_standalone.py b/src/api/api_standalone.py new file mode 100644 index 0000000..e69de29 diff --git a/src/core.py b/src/core.py index d07e273..5001bdd 100644 --- a/src/core.py +++ b/src/core.py @@ -44,7 +44,7 @@ def convert_to_i16(arr): # uint16 conversion uses round-down, therefore values should be [0; 2**16) numbytes = 2 max_val = (2 ** (8 * numbytes)) - out = np.clip(arr * max_val, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing + out = np.clip(arr * max_val + 0.0001, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing return out.astype("uint16") def convert_i16_to_rgb(image, like): @@ -252,7 +252,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp yield count, 'depth', Image.fromarray(img_output) if inp[go.GEN_STEREO]: - print("Generating stereoscopic images..") + # print("Generating stereoscopic image(s)..") stereoimages = create_stereoimages( inputimages[count], img_output, inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION], diff --git a/src/video_mode.py b/src/video_mode.py index fc23aa7..4b40cc0 100644 --- a/src/video_mode.py +++ b/src/video_mode.py @@ -21,9 +21,11 @@ def open_path_as_images(path, maybe_depthvideo=False): frames.append(img.convert('RGB')) return 1000 / img.info['duration'], frames if suffix in ['.avi'] and maybe_depthvideo: - import imageio_ffmpeg - gen = imageio_ffmpeg.read_frames(path) try: + import imageio_ffmpeg + # Suppose there are in fact 16 bits per pixel + # If this is not the case, this is not a 16-bit depthvideo, so no need to process it this way + gen = imageio_ffmpeg.read_frames(path, pix_fmt='gray16le', bits_per_pixel=16) video_info = next(gen) if video_info['pix_fmt'] == 'gray16le': width, height = video_info['size'] @@ -31,12 +33,13 @@ def open_path_as_images(path, maybe_depthvideo=False): for frame in gen: # Not sure if this is implemented somewhere else result = np.frombuffer(frame, dtype='uint16') - result.shape = (height, width * 3 // 2) # Why does it work? I don't remotely have any idea. + result.shape = (height, width) # Why does it work? I don't remotely have any idea. frames += [Image.fromarray(result)] # TODO: Wrapping frames into Pillow objects is wasteful return video_info['fps'], frames finally: - gen.close() + if 'gen' in locals(): + gen.close() if suffix in ['.webm', '.mp4', '.avi']: from moviepy.video.io.VideoFileClip import VideoFileClip clip = VideoFileClip(path) @@ -45,7 +48,7 @@ def open_path_as_images(path, maybe_depthvideo=False): return clip.fps, frames else: try: - return 1000, [Image.open(path)] + return 1, [Image.open(path)] except Exception as e: raise Exception(f"Probably an unsupported file format: {suffix}") from e @@ -128,8 +131,8 @@ def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None, first_pass_inp[go.DO_OUTPUT_DEPTH.name] = False gen_obj = core.core_generation_funnel(None, input_images, None, None, first_pass_inp) - predictions = [x[2] for x in list(gen_obj)] - input_depths = process_predicitons(predictions, smoothening) + input_depths = [x[2] for x in list(gen_obj)] + input_depths = process_predicitons(input_depths, smoothening) else: print('Using custom depthmap video') cdm_fps, input_depths = open_path_as_images(os.path.abspath(custom_depthmap.name), maybe_depthvideo=True) @@ -153,4 +156,5 @@ def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None, frames_to_video(fps, imgs, outpath, f"depthmap-{backbone.get_next_sequence_number()}-{basename}", colorvids_bitrate) print('All done. Video(s) saved!') - return 'Video generated!' if len(gens) == 1 else 'Videos generated!' + return '

Videos generated

' if len(gens) > 1 else '

Video generated

' if len(gens) == 1 \ + else '

Nothing generated - please check the settings and try again

' From 3b98706cb06e08df19e7a55a3018af27cba1f83d Mon Sep 17 00:00:00 2001 From: Semjon Kravtsenko Date: Mon, 31 Jul 2023 13:06:57 +0300 Subject: [PATCH 14/16] Minor cleanup --- README.md | 14 +++++++------- src/common_ui.py | 14 ++++++++------ src/core.py | 35 +++++++++-------------------------- 3 files changed, 24 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 1137ae3..56f5e36 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # High Resolution Depth Maps for Stable Diffusion WebUI -This program is an addon for [AUTOMATIC1111's Stable Diffusion WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) that creates depth maps. Using either generated or custom depth maps, it can also create 3D stereo image pairs (as side-by-side or anaglyph), normalmaps and 3D meshes. The outputs of the script can be viewed directly or used as an asset for a 3D engine. Please see [wiki](https://github.com/thygate/stable-diffusion-webui-depthmap-script/wiki/Viewing-Results) to learn more. The program has integration with [Rembg](https://github.com/danielgatis/rembg). It also supports batch processing, processing of videos, and can also be run in standalone mode, without Stable Diffusion WebUI. +This program is an addon for [AUTOMATIC1111's Stable Diffusion WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) that creates depth maps. Using either generated or custom depth maps, it can also create 3D stereo image pairs (side-by-side or anaglyph), normalmaps and 3D meshes. The outputs of the script can be viewed directly or used as an asset for a 3D engine. Please see [wiki](https://github.com/thygate/stable-diffusion-webui-depthmap-script/wiki/Viewing-Results) to learn more. The program has integration with [Rembg](https://github.com/danielgatis/rembg). It also supports batch processing, processing of videos, and can also be run in standalone mode, without Stable Diffusion WebUI. To generate realistic depth maps from individual images, this script uses code and models from the [MiDaS](https://github.com/isl-org/MiDaS) and [ZoeDepth](https://github.com/isl-org/ZoeDepth) repositories by Intel ISL, or LeReS from the [AdelaiDepth](https://github.com/aim-uofa/AdelaiDepth) repository by Advanced Intelligent Machines. Multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) is used to generate high resolution depth maps. @@ -22,7 +22,7 @@ images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, mo ## Install instructions ### As extension -The script can be installed directly from WebUI. Please navigate to `Extensions` tab, then click `Available`, `Load from` and then install the `Depth Maps` extension. Alternatively, the extension can be installed from URL: `https://github.com/thygate/stable-diffusion-webui-depthmap-script`. +The script can be installed directly from WebUI. Please navigate to `Extensions` tab, then click `Available`, `Load from` and then install the `Depth Maps` extension. Alternatively, the extension can be installed from the URL: `https://github.com/thygate/stable-diffusion-webui-depthmap-script`. ### Updating In the WebUI, in the `Extensions` tab, in the `Installed` subtab, click `Check for Updates` and then `Apply and restart UI`. @@ -30,7 +30,7 @@ In the WebUI, in the `Extensions` tab, in the `Installed` subtab, click `Check f ### Standalone Clone the repository, install the requirements from `requirements.txt`, launch using `main.py`. ->Model weights will be downloaded automatically on their first use and saved to /models/midas, /models/leres and /models/pix2pix. Zoedepth models are stored in torch cache folder. +>Model weights will be downloaded automatically on their first use and saved to /models/midas, /models/leres and /models/pix2pix. Zoedepth models are stored in the torch cache folder. ## Usage @@ -43,7 +43,7 @@ There are ten models available from the `Model` dropdown. For the first model, r Net size can be set with `net width` and `net height`, or will be the same as the input image when `Match input size` is enabled. There is a trade-off between structural consistency and high-frequency details with respect to net size (see [observations](https://github.com/compphoto/BoostingMonocularDepth#observations)). -`Boost` will enable multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) and will significantly improve the results, mitigating the observations mentioned above, and the cost of much larger compute time. Best results with res101. +`Boost` will enable multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) and will significantly improve the results, mitigating the observations mentioned above, at the cost of much larger compute time. Best results with res101. `Clip and renormalize` allows for clipping the depthmap on the `near` and `far` side, the values in between will be renormalized to fit the available range. Set both values equal to get a b&w mask of a single depth plane at that value. This option works on the 16-bit depthmap and allows for 1000 steps to select the clip values. @@ -76,17 +76,17 @@ If you often get out of memory errors when computing a depthmap on GPU while usi * `Can I use this on existing images ?` - Yes, you can use the Depth tab to easily process existing images. - Another way of doing this would be to use img2img with denoising strength to 0. This will effectively skip stable diffusion and use the input image. You will still have to set the correct size, and need to select `Crop and resize` instead of `Just resize` when the input image resolution does not match the set size perfectly. - * `Can I run this on Google Colab ?` + * `Can I run this on Google Colab?` - You can run the MiDaS network on their colab linked here https://pytorch.org/hub/intelisl_midas_v2/ - You can run BoostingMonocularDepth on their colab linked here : https://colab.research.google.com/github/compphoto/BoostingMonocularDepth/blob/main/Boostmonoculardepth.ipynb - Running this program on Colab is not officially supported, but it may work. Please look for more suitable ways of running this. If you still decide to try, standalone installation may be easier to manage. * `What other depth-related projects could I check out?` - Several [scripts](https://github.com/Extraltodeus?tab=repositories) by [@Extraltodeus](https://github.com/Extraltodeus) using depth maps. - - Geo11 and [Depth3D](https://github.com/BlueSkyDefender/Depth3D) for playing existing games in 3D. + - geo-11 and [Depth3D](https://github.com/BlueSkyDefender/Depth3D) for playing existing games in 3D. * `How can I know what changed in the new version of the script?` - You can see the git history log or refer to the `CHANGELOG.md` file. -### Feel free to comment and share in the discussions! +### Feel free to comment and share in the discussions! Submitting issues and merge requests is heavilly appreciated! ## Acknowledgements diff --git a/src/common_ui.py b/src/common_ui.py index 71e41a9..35be96c 100644 --- a/src/common_ui.py +++ b/src/common_ui.py @@ -122,7 +122,7 @@ def main_ui_panel(is_depth_tab): inp += go.GEN_SIMPLE_MESH, gr.Checkbox(label="Generate simple 3D mesh") with gr.Column(visible=False) as mesh_options: with gr.Row(): - gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps") + gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps.") with gr.Row(): inp += go.SIMPLE_MESH_OCCLUDE, gr.Checkbox(label="Remove occluded edges") inp += go.SIMPLE_MESH_SPHERICAL, gr.Checkbox(label="Equirectangular projection") @@ -133,10 +133,10 @@ def main_ui_panel(is_depth_tab): inp += go.GEN_INPAINTED_MESH, gr.Checkbox( label="Generate 3D inpainted mesh") with gr.Column(visible=False) as inpaint_options_row_0: - gr.HTML("Generation is sloooow, required for generating videos") + gr.HTML("Generation is sloooow. Required for generating videos from mesh.") inp += go.GEN_INPAINTED_MESH_DEMOS, gr.Checkbox( label="Generate 4 demo videos with 3D inpainted mesh.") - gr.HTML("More options for generating video can be found in the Generate video tab") + gr.HTML("More options for generating video can be found in the Generate video tab.") with gr.Box(): # TODO: it should be clear from the UI that there is an option of the background removal @@ -184,12 +184,14 @@ def update_default_net_size(model_type): inp[go.CLIPDEPTH_FAR].change( fn=lambda a, b: a if b < a else b, inputs=[inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR]], - outputs=[inp[go.CLIPDEPTH_NEAR]] + outputs=[inp[go.CLIPDEPTH_NEAR]], + show_progress=False ) inp[go.CLIPDEPTH_NEAR].change( fn=lambda a, b: a if b > a else b, inputs=[inp[go.CLIPDEPTH_NEAR], inp[go.CLIPDEPTH_FAR]], - outputs=[inp[go.CLIPDEPTH_FAR]] + outputs=[inp[go.CLIPDEPTH_FAR]], + show_progress=False ) inp.add_rule(stereo_options, 'visible-if', go.GEN_STEREO) @@ -558,7 +560,7 @@ def run_generate(*inputs): # Deciding what mesh to display (and if) display_mesh_fi = None - if not backbone.get_opt('depthmap_script_show_3d', True): + if backbone.get_opt('depthmap_script_show_3d', True): display_mesh_fi = mesh_simple_fi if backbone.get_opt('depthmap_script_show_3d_inpaint', True): if inpainted_mesh_fi is not None and len(inpainted_mesh_fi) > 0: diff --git a/src/core.py b/src/core.py index 5001bdd..3742880 100644 --- a/src/core.py +++ b/src/core.py @@ -125,9 +125,6 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp if not inputdepthmaps_complete: print("Loading model(s) ..") model_holder.ensure_models(inp[go.MODEL_TYPE], device, inp[go.BOOST]) - model = model_holder.depth_model - pix2pix_model = model_holder.pix2pix_model - print("Computing output(s) ..") # iterate over input images for count in trange(0, len(inputimages)): @@ -231,25 +228,17 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp yield count, 'foreground_mask', mask_image # A weird quirk: if user tries to save depthmap, whereas custom depthmap is used, - # depthmap will not be outputed, even if output_depth_combine is used. + # custom depthmap will be outputed if inp[go.DO_OUTPUT_DEPTH]: - if inputdepthmaps[count] is None: - img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output - if inp[go.OUTPUT_DEPTH_COMBINE]: - axis = 1 if inp[go.OUTPUT_DEPTH_COMBINE_AXIS] == 'Horizontal' else 0 - img_concat = Image.fromarray(np.concatenate( - (inputimages[count], convert_i16_to_rgb(img_depth, inputimages[count])), - axis=axis)) - yield count, 'concat_depth', img_concat - else: - yield count, 'depth', Image.fromarray(img_depth) + img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output + if inp[go.OUTPUT_DEPTH_COMBINE]: + axis = 1 if inp[go.OUTPUT_DEPTH_COMBINE_AXIS] == 'Horizontal' else 0 + img_concat = Image.fromarray(np.concatenate( + (inputimages[count], convert_i16_to_rgb(img_depth, inputimages[count])), + axis=axis)) + yield count, 'concat_depth', img_concat else: - # TODO: make it better - # Yes, this seems stupid, but this is, logically, what should happen - - # and this improves clarity of some other code. - # But we won't return it if there is only one image. - if len(inputimages) > 1: - yield count, 'depth', Image.fromarray(img_output) + yield count, 'depth', Image.fromarray(img_depth) if inp[go.GEN_STEREO]: # print("Generating stereoscopic image(s)..") @@ -335,17 +324,11 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp if backbone.get_opt('depthmap_script_keepmodels', True): model_holder.offload() # Swap to CPU memory else: - if 'model' in locals(): - del model - if 'pix2pixmodel' in locals(): - del pix2pix_model model_holder.unload_models() - gc.collect() backbone.torch_gc() # TODO: This should not be here - mesh_fi = None if inp[go.GEN_INPAINTED_MESH]: try: mesh_fi = run_3dphoto(device, inpaint_imgs, inpaint_depths, inputnames, outpath, From 8a3abcc1d568f92547c960841791a69c7fb781c2 Mon Sep 17 00:00:00 2001 From: grae Date: Mon, 31 Jul 2023 14:14:32 -0600 Subject: [PATCH 15/16] refractor + standalone api --- scripts/depthmap_api.py | 4 +- src/api/api_core.py | 85 +++++++++++++++++++++ src/api/{api_extension.py => api_routes.py} | 57 ++------------ src/api/api_standalone.py | 0 4 files changed, 92 insertions(+), 54 deletions(-) create mode 100644 src/api/api_core.py rename src/api/{api_extension.py => api_routes.py} (58%) delete mode 100644 src/api/api_standalone.py diff --git a/scripts/depthmap_api.py b/scripts/depthmap_api.py index 46d7bec..fbfac00 100644 --- a/scripts/depthmap_api.py +++ b/scripts/depthmap_api.py @@ -3,11 +3,11 @@ # Currently no API stability guarantees are provided - API may break on any new commit. from src import backbone -from src.api import api_extension +from api import api_routes try: import modules.script_callbacks as script_callbacks if backbone.get_cmd_opt('api', False): - script_callbacks.on_app_started(api_extension.depth_api) + script_callbacks.on_app_started(api_routes.depth_api) except: print('DepthMap API could not start') diff --git a/src/api/api_core.py b/src/api/api_core.py new file mode 100644 index 0000000..abdb73e --- /dev/null +++ b/src/api/api_core.py @@ -0,0 +1,85 @@ +import numpy as np +from PIL import PngImagePlugin, Image +import base64 +from io import BytesIO +from fastapi.exceptions import HTTPException + +import gradio as gr + + +from src.core import core_generation_funnel, CoreGenerationFunnelInp +from src import backbone +from src.api.api_constants import api_defaults, api_forced, models_to_index + +# moedified from modules/api/api.py auto1111 +def decode_base64_to_image(encoding): + if encoding.startswith("data:image/"): + encoding = encoding.split(";")[1].split(",")[1] + try: + image = Image.open(BytesIO(base64.b64decode(encoding))) + return image + except Exception as e: + raise HTTPException(status_code=500, detail="Invalid encoded image") from e + +# modified from modules/api/api.py auto1111. TODO check that internally we always use png. Removed webp and jpeg +def encode_pil_to_base64(image, image_type='png'): + with BytesIO() as output_bytes: + + if image_type == 'png': + use_metadata = False + metadata = PngImagePlugin.PngInfo() + for key, value in image.info.items(): + if isinstance(key, str) and isinstance(value, str): + metadata.add_text(key, value) + use_metadata = True + image.save(output_bytes, format="PNG", pnginfo=(metadata if use_metadata else None)) + + else: + raise HTTPException(status_code=500, detail="Invalid image format") + + bytes_data = output_bytes.getvalue() + + return base64.b64encode(bytes_data) + +def encode_to_base64(image): + if type(image) is str: + return image + elif type(image) is Image.Image: + return encode_pil_to_base64(image) + elif type(image) is np.ndarray: + return encode_np_to_base64(image) + else: + return "" + +def encode_np_to_base64(image): + pil = Image.fromarray(image) + return encode_pil_to_base64(pil) + +def to_base64_PIL(encoding: str): + return Image.fromarray(np.array(decode_base64_to_image(encoding)).astype('uint8')) + + +def api_gen(input_images, client_options): + + default_options = CoreGenerationFunnelInp(api_defaults).values + + #TODO try-catch type errors here + for key, value in client_options.items(): + if key == "model_type": + default_options[key] = models_to_index(value) + continue + default_options[key] = value + + for key, value in api_forced.items(): + default_options[key.lower()] = value + + print(f"Processing {str(len(input_images))} images through the API") + + print(default_options) + + pil_images = [] + for input_image in input_images: + pil_images.append(to_base64_PIL(input_image)) + outpath = backbone.get_outpath() + gen_obj = core_generation_funnel(outpath, pil_images, None, None, default_options) + return gen_obj \ No newline at end of file diff --git a/src/api/api_extension.py b/src/api/api_routes.py similarity index 58% rename from src/api/api_extension.py rename to src/api/api_routes.py index 954b60c..e0dcbb8 100644 --- a/src/api/api_extension.py +++ b/src/api/api_routes.py @@ -2,66 +2,18 @@ # (will only be on with --api starting option) # Currently no API stability guarantees are provided - API may break on any new commit. -import numpy as np from fastapi import FastAPI, Body from fastapi.exceptions import HTTPException -from PIL import Image from itertools import tee -import json import gradio as gr -from modules.api.models import List, Dict -from modules.api import api +from typing import Dict, List from src.common_constants import GenerationOptions as go -from src.core import core_generation_funnel, CoreGenerationFunnelInp -from src import backbone from src.misc import SCRIPT_VERSION -from src.api.api_constants import api_defaults, api_forced, api_options, models_to_index - -def encode_to_base64(image): - if type(image) is str: - return image - elif type(image) is Image.Image: - return api.encode_pil_to_base64(image) - elif type(image) is np.ndarray: - return encode_np_to_base64(image) - else: - return "" - -def encode_np_to_base64(image): - pil = Image.fromarray(image) - return api.encode_pil_to_base64(pil) - -def to_base64_PIL(encoding: str): - return Image.fromarray(np.array(api.decode_base64_to_image(encoding)).astype('uint8')) - - -def api_gen(input_images, client_options): - - default_options = CoreGenerationFunnelInp(api_defaults).values - - #TODO try-catch type errors here - for key, value in client_options.items(): - if key == "model_type": - default_options[key] = models_to_index(value) - continue - default_options[key] = value - - for key, value in api_forced.items(): - default_options[key.lower()] = value - - print(f"Processing {str(len(input_images))} images through the API") - - print(default_options) - - pil_images = [] - for input_image in input_images: - pil_images.append(to_base64_PIL(input_image)) - outpath = backbone.get_outpath() - gen_obj = core_generation_funnel(outpath, pil_images, None, None, default_options) - return gen_obj +from src.api.api_constants import api_options, models_to_index +from api.api_core import api_gen, encode_to_base64 def depth_api(_: gr.Blocks, app: FastAPI): @app.get("/depth/version") @@ -72,7 +24,8 @@ async def version(): async def get_options(): return { "gen_options": [x.name.lower() for x in go], - "api_options": api_options + "api_options": api_options, + "model_names": models_to_index.keys() } @app.post("/depth/generate") diff --git a/src/api/api_standalone.py b/src/api/api_standalone.py deleted file mode 100644 index e69de29..0000000 From 8dbf9972ce623f86a316549adf6225a3303faf66 Mon Sep 17 00:00:00 2001 From: grae Date: Mon, 31 Jul 2023 16:14:36 -0600 Subject: [PATCH 16/16] clearing up standalone api --- main.py | 17 ++++++++++++++++- src/api/api_core.py | 2 +- src/api/api_routes.py | 3 ++- src/api/api_standalone.py | 22 ++++++++++++++++++++++ 4 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 src/api/api_standalone.py diff --git a/main.py b/main.py index a05cb2d..a929ccc 100644 --- a/main.py +++ b/main.py @@ -29,6 +29,7 @@ def maybe_chdir(): parser = argparse.ArgumentParser() parser.add_argument("--share", help="Create public link", action='store_true') parser.add_argument("--listen", help="Create public link", action='store_true') + parser.add_argument("--api", help="start-up api", action='store_true') parser.add_argument("--no_chdir", help="Do not try to use the root of stable-diffusion-webui", action='store_true') args = parser.parse_args() @@ -37,4 +38,18 @@ def maybe_chdir(): maybe_chdir() server_name = "0.0.0.0" if args.listen else None import src.common_ui - src.common_ui.on_ui_tabs().launch(share=args.share, server_name=server_name) + + ui_block = src.common_ui.on_ui_tabs() + + if args.api is not True: + ui_block.launch(share=args.share, server_name=server_name) + else: + app, _, _ = ui_block.launch(share=args.share, server_name=server_name, prevent_thread_lock=True) + print(f"starting depth api") + from src.api.api_standalone import init_api + init_api(ui_block, app) + while True: + pass + + + diff --git a/src/api/api_core.py b/src/api/api_core.py index abdb73e..995b628 100644 --- a/src/api/api_core.py +++ b/src/api/api_core.py @@ -66,7 +66,7 @@ def api_gen(input_images, client_options): #TODO try-catch type errors here for key, value in client_options.items(): if key == "model_type": - default_options[key] = models_to_index(value) + default_options[key] = models_to_index[value] continue default_options[key] = value diff --git a/src/api/api_routes.py b/src/api/api_routes.py index e0dcbb8..c5154d8 100644 --- a/src/api/api_routes.py +++ b/src/api/api_routes.py @@ -13,8 +13,9 @@ from src.common_constants import GenerationOptions as go from src.misc import SCRIPT_VERSION from src.api.api_constants import api_options, models_to_index -from api.api_core import api_gen, encode_to_base64 +from src.api.api_core import api_gen, encode_to_base64 +# gr.Blocks is needed for auto1111 extensions def depth_api(_: gr.Blocks, app: FastAPI): @app.get("/depth/version") async def version(): diff --git a/src/api/api_standalone.py b/src/api/api_standalone.py new file mode 100644 index 0000000..4abc79e --- /dev/null +++ b/src/api/api_standalone.py @@ -0,0 +1,22 @@ +from fastapi import FastAPI +import uvicorn +from fastapi import FastAPI +from src.api.api_routes import depth_api + +import gradio as gr + +#TODO very primitive + +#TODO add CORS + +#TODO enable easy SSL. right now completely unsecured. + +def init_api_no_webui(): + app = FastAPI() + print("setting up endpoints") + depth_api( gr.Blocks(), app) + uvicorn.run('src.api.api_standalone:depth_api', port=7860, host="127.0.0.1") + +def init_api(block, app): + print("setting up endpoints") + depth_api( block, app) \ No newline at end of file