From ef418f96c145e54096e3a92f2ca1e90bd81fb094 Mon Sep 17 00:00:00 2001 From: grae Date: Wed, 4 Jan 2023 20:51:02 -0600 Subject: [PATCH 1/4] add rembg integration --- README.md | 12 ++++++++ install.py | 2 ++ scripts/depthmap.py | 74 +++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 82 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f69102c..ec1a964 100644 --- a/README.md +++ b/README.md @@ -246,4 +246,16 @@ Boosting Monocular Depth Estimation Models to High-Resolution via Content-Adapti booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, year = {2020} } +``` + +U2-Net: +``` +@InProceedings{Qin_2020_PR, +title = {U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection}, +author = {Qin, Xuebin and Zhang, Zichen and Huang, Chenyang and Dehghan, Masood and Zaiane, Osmar and Jagersand, Martin}, +journal = {Pattern Recognition}, +volume = {106}, +pages = {107404}, +year = {2020} +} ``` \ No newline at end of file diff --git a/install.py b/install.py index 3f0b596..d395a09 100644 --- a/install.py +++ b/install.py @@ -11,6 +11,8 @@ if not launch.is_installed("vispy"): launch.run_pip("install vispy", "vispy requirement for depthmap script") +if not launch.is_installed("rembg"): + launch.run_pip("install rembg", "rembg requirement for depthmap script") if not launch.is_installed("moviepy"): launch.run_pip("install moviepy==1.0.2", "moviepy requirement for depthmap script") diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 1f71f3e..9d28a1e 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -56,6 +56,9 @@ from inpaint.utils import path_planning from inpaint.bilateral_filtering import sparse_bilateral_filtering +# background removal +from rembg import new_session, remove + whole_size_threshold = 1600 # R_max from the paper pix2pixsize = 1024 scriptname = "DepthMap v0.3.6" @@ -110,7 +113,14 @@ def ui(self, is_img2img): with gr.Box(): gr.HTML("Information, comment and share @ https://github.com/thygate/stable-diffusion-webui-depthmap-script") - + + with gr.Group(): + with gr.Row(): + background_removal_model = gr.Dropdown(label="Model", choices=['u2net','u2netp','u2net_human_seg', 'silueta'], value='u2net', type="value", elem_id="model_type") + with gr.Row(): + background_removal = gr.Checkbox(label="remove background",value=False) + save_background_removal_masks = gr.Checkbox(label="save the foreground masks",value=False) + pre_depth_background_removal = gr.Checkbox(label="pre-depth background removal",value=False) clipthreshold_far.change( fn = lambda a, b: a if b < a else b, @@ -124,10 +134,10 @@ def ui(self, is_img2img): outputs=[clipthreshold_far] ) - return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint] + return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, background_removal_model, background_removal, pre_depth_background_removal, save_background_removal_masks] # run from script in txt2img or img2img - def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint): + def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, background_removal_model, background_removal, pre_depth_background_removal, save_background_removal_masks): # sd process processed = processing.process_images(p) @@ -140,14 +150,23 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size, if count == 0 and len(processed.images) > 1: continue inputimages.append(processed.images[count]) + + #remove on base image before depth calculation + if background_removal: + if pre_depth_background_removal: + inputimages = batched_background_removal(inputimages, background_removal_model) + background_removed_images = inputimages + else: + background_removed_images = batched_background_removal(inputimages, background_removal_model) - newmaps, mesh_fi = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, "mp4", 0) + newmaps, mesh_fi = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, "mp4", 0, background_removal, background_removed_images, save_background_removal_masks) + for img in newmaps: processed.images.append(img) return processed -def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa): +def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa, background_removal, background_removed_images, save_background_removal_masks): if len(inputimages) == 0 or inputimages[0] == None: return [] @@ -379,6 +398,29 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo p = Path(inputnames[count]) basename = p.stem + rgb_image = inputimages[count] + + #applying background masks after depth + if background_removal: + print('applying background masks') + background_removed_image = background_removed_images[count-1] + #maybe a threshold cut would be better on the line below. + background_removed_array = np.array(background_removed_image) + bg_mask = (background_removed_array[:,:,0]==0)|(background_removed_array[:,:,1]==0)|(background_removed_array[:,:,2]==0) + far_value = 255 if invert_depth else 0 + + img_output[bg_mask] = far_value * far_value #255*255 or 0*0 + + #should this be optional + images.save_image(background_removed_image, path=outpath, basename='depthmap', seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None, suffix="_background_removed") + outimages.append(background_removed_image ) + if save_background_removal_masks: + bg_array = (1 - bg_mask.astype('int8'))*255 + mask_array = np.stack( (bg_array, bg_array, bg_array, bg_array), axis=2) + mask_image = Image.fromarray( mask_array.astype(np.uint8)) + images.save_image(mask_image, path=outpath, basename='depthmap', seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None, suffix="_foreground_mask") + outimages.append(mask_image) + if not combine_output: if show_depth: outimages.append(Image.fromarray(img_output)) @@ -396,7 +438,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo else: images.save_image(Image.fromarray(img_output2), path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None) else: - img_concat = np.concatenate((inputimages[count], img_output2), axis=combine_output_axis) + img_concat = np.concatenate((rgb_image, img_output2), axis=combine_output_axis) if show_depth: outimages.append(Image.fromarray(img_concat)) if save_depth and processed is not None: @@ -1224,6 +1266,26 @@ def on_ui_tabs(): script_callbacks.on_ui_settings(on_ui_settings) script_callbacks.on_ui_tabs(on_ui_tabs) +def batched_background_removal(inimages, model_name): + print('creating background masks') + outimages = [] + + # model path and name + bg_model_dir = Path.joinpath(Path().resolve(), "models/rem_bg") + os.makedirs(bg_model_dir, exist_ok=True) + os.environ["U2NET_HOME"] = str(bg_model_dir) + + #starting a session + background_removal_session = new_session(model_name) + for count in range(0, len(inimages)): + # skip first grid image + if count == 0 and len(inimages) > 1: + continue + bg_remove_img = np.array(remove(inimages[count], session=background_removal_session)) + outimages.append(Image.fromarray(bg_remove_img)) + #The line below might be redundant + del background_removal_session + return outimages def download_file(filename, url): print("Downloading", url, "to", filename) From b38832ec1cbe1cae3941d07ef39f7a5340025cf0 Mon Sep 17 00:00:00 2001 From: grae Date: Wed, 4 Jan 2023 20:55:08 -0600 Subject: [PATCH 2/4] ui update --- scripts/depthmap.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 9d28a1e..f21ca47 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -111,9 +111,6 @@ def ui(self, is_img2img): with gr.Row(): inpaint = gr.Checkbox(label="Generate 3D inpainted mesh. (Slooooooooow)",value=False, visible=False) - with gr.Box(): - gr.HTML("Information, comment and share @ https://github.com/thygate/stable-diffusion-webui-depthmap-script") - with gr.Group(): with gr.Row(): background_removal_model = gr.Dropdown(label="Model", choices=['u2net','u2netp','u2net_human_seg', 'silueta'], value='u2net', type="value", elem_id="model_type") @@ -122,6 +119,9 @@ def ui(self, is_img2img): save_background_removal_masks = gr.Checkbox(label="save the foreground masks",value=False) pre_depth_background_removal = gr.Checkbox(label="pre-depth background removal",value=False) + with gr.Box(): + gr.HTML("Information, comment and share @ https://github.com/thygate/stable-diffusion-webui-depthmap-script") + clipthreshold_far.change( fn = lambda a, b: a if b < a else b, inputs = [clipthreshold_far, clipthreshold_near], From 39cfa3db41153cc73fbe005377039453bb3938f0 Mon Sep 17 00:00:00 2001 From: grae Date: Wed, 4 Jan 2023 21:10:21 -0600 Subject: [PATCH 3/4] bug fixes --- scripts/depthmap.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/depthmap.py b/scripts/depthmap.py index f21ca47..1e4c3f0 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -152,6 +152,7 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size, inputimages.append(processed.images[count]) #remove on base image before depth calculation + background_removed_images = [] if background_removal: if pre_depth_background_removal: inputimages = batched_background_removal(inputimages, background_removal_model) From bb4bf46c652ed8ccd0094eee970e8e25c56911c3 Mon Sep 17 00:00:00 2001 From: grae Date: Wed, 4 Jan 2023 22:06:22 -0600 Subject: [PATCH 4/4] fixing depthtab --- scripts/depthmap.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/scripts/depthmap.py b/scripts/depthmap.py index 1e4c3f0..13db921 100644 --- a/scripts/depthmap.py +++ b/scripts/depthmap.py @@ -1048,6 +1048,10 @@ def run_generate(depthmap_mode, clipthreshold_far, clipthreshold_near, inpaint, + background_removal_model, + background_removal, + pre_depth_background_removal, + save_background_removal_masks, vid_format, vid_ssaa ): @@ -1091,8 +1095,15 @@ def run_generate(depthmap_mode, else: outpath = opts.outdir_samples or opts.outdir_extras_samples + background_removed_images = [] + if background_removal: + if pre_depth_background_removal: + imageArr = batched_background_removal(imageArr, background_removal_model) + background_removed_images = imageArr + else: + background_removed_images = batched_background_removal(imageArr, background_removal_model) - outputs, mesh_fi = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa) + outputs, mesh_fi = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa, background_removal, background_removed_images, save_background_removal_masks) return outputs, mesh_fi, plaintext_to_html('info'), '' @@ -1157,6 +1168,14 @@ def on_ui_tabs(): with gr.Row(): inpaint = gr.Checkbox(label="Generate 3D inpainted mesh and demo videos. (Sloooow)",value=False) + with gr.Group(): + with gr.Row(): + background_removal_model = gr.Dropdown(label="Model", choices=['u2net','u2netp','u2net_human_seg', 'silueta'], value='u2net', type="value", elem_id="model_type") + with gr.Row(): + background_removal = gr.Checkbox(label="remove background",value=False) + save_background_removal_masks = gr.Checkbox(label="save the foreground masks",value=False) + pre_depth_background_removal = gr.Checkbox(label="pre-depth background removal",value=False) + with gr.Box(): gr.HTML("Information, comment and share @ https://github.com/thygate/stable-diffusion-webui-depthmap-script") @@ -1231,6 +1250,10 @@ def on_ui_tabs(): clipthreshold_far, clipthreshold_near, inpaint, + background_removal_model, + background_removal, + pre_depth_background_removal, + save_background_removal_masks, vid_format, vid_ssaa ],