diff --git a/README.md b/README.md
index f69102c..ec1a964 100644
--- a/README.md
+++ b/README.md
@@ -246,4 +246,16 @@ Boosting Monocular Depth Estimation Models to High-Resolution via Content-Adapti
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2020}
}
+```
+
+U2-Net:
+```
+@InProceedings{Qin_2020_PR,
+title = {U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection},
+author = {Qin, Xuebin and Zhang, Zichen and Huang, Chenyang and Dehghan, Masood and Zaiane, Osmar and Jagersand, Martin},
+journal = {Pattern Recognition},
+volume = {106},
+pages = {107404},
+year = {2020}
+}
```
\ No newline at end of file
diff --git a/install.py b/install.py
index 3f0b596..d395a09 100644
--- a/install.py
+++ b/install.py
@@ -11,6 +11,8 @@
if not launch.is_installed("vispy"):
launch.run_pip("install vispy", "vispy requirement for depthmap script")
+if not launch.is_installed("rembg"):
+ launch.run_pip("install rembg", "rembg requirement for depthmap script")
if not launch.is_installed("moviepy"):
launch.run_pip("install moviepy==1.0.2", "moviepy requirement for depthmap script")
diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index 1f71f3e..13db921 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -56,6 +56,9 @@
from inpaint.utils import path_planning
from inpaint.bilateral_filtering import sparse_bilateral_filtering
+# background removal
+from rembg import new_session, remove
+
whole_size_threshold = 1600 # R_max from the paper
pix2pixsize = 1024
scriptname = "DepthMap v0.3.6"
@@ -108,10 +111,17 @@ def ui(self, is_img2img):
with gr.Row():
inpaint = gr.Checkbox(label="Generate 3D inpainted mesh. (Slooooooooow)",value=False, visible=False)
+ with gr.Group():
+ with gr.Row():
+ background_removal_model = gr.Dropdown(label="Model", choices=['u2net','u2netp','u2net_human_seg', 'silueta'], value='u2net', type="value", elem_id="model_type")
+ with gr.Row():
+ background_removal = gr.Checkbox(label="remove background",value=False)
+ save_background_removal_masks = gr.Checkbox(label="save the foreground masks",value=False)
+ pre_depth_background_removal = gr.Checkbox(label="pre-depth background removal",value=False)
+
with gr.Box():
gr.HTML("Information, comment and share @ https://github.com/thygate/stable-diffusion-webui-depthmap-script")
-
clipthreshold_far.change(
fn = lambda a, b: a if b < a else b,
inputs = [clipthreshold_far, clipthreshold_near],
@@ -124,10 +134,10 @@ def ui(self, is_img2img):
outputs=[clipthreshold_far]
)
- return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint]
+ return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, background_removal_model, background_removal, pre_depth_background_removal, save_background_removal_masks]
# run from script in txt2img or img2img
- def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint):
+ def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, background_removal_model, background_removal, pre_depth_background_removal, save_background_removal_masks):
# sd process
processed = processing.process_images(p)
@@ -140,14 +150,24 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
if count == 0 and len(processed.images) > 1:
continue
inputimages.append(processed.images[count])
+
+ #remove on base image before depth calculation
+ background_removed_images = []
+ if background_removal:
+ if pre_depth_background_removal:
+ inputimages = batched_background_removal(inputimages, background_removal_model)
+ background_removed_images = inputimages
+ else:
+ background_removed_images = batched_background_removal(inputimages, background_removal_model)
- newmaps, mesh_fi = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, "mp4", 0)
+ newmaps, mesh_fi = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, "mp4", 0, background_removal, background_removed_images, save_background_removal_masks)
+
for img in newmaps:
processed.images.append(img)
return processed
-def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa):
+def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa, background_removal, background_removed_images, save_background_removal_masks):
if len(inputimages) == 0 or inputimages[0] == None:
return []
@@ -379,6 +399,29 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
p = Path(inputnames[count])
basename = p.stem
+ rgb_image = inputimages[count]
+
+ #applying background masks after depth
+ if background_removal:
+ print('applying background masks')
+ background_removed_image = background_removed_images[count-1]
+ #maybe a threshold cut would be better on the line below.
+ background_removed_array = np.array(background_removed_image)
+ bg_mask = (background_removed_array[:,:,0]==0)|(background_removed_array[:,:,1]==0)|(background_removed_array[:,:,2]==0)
+ far_value = 255 if invert_depth else 0
+
+ img_output[bg_mask] = far_value * far_value #255*255 or 0*0
+
+ #should this be optional
+ images.save_image(background_removed_image, path=outpath, basename='depthmap', seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None, suffix="_background_removed")
+ outimages.append(background_removed_image )
+ if save_background_removal_masks:
+ bg_array = (1 - bg_mask.astype('int8'))*255
+ mask_array = np.stack( (bg_array, bg_array, bg_array, bg_array), axis=2)
+ mask_image = Image.fromarray( mask_array.astype(np.uint8))
+ images.save_image(mask_image, path=outpath, basename='depthmap', seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None, suffix="_foreground_mask")
+ outimages.append(mask_image)
+
if not combine_output:
if show_depth:
outimages.append(Image.fromarray(img_output))
@@ -396,7 +439,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
else:
images.save_image(Image.fromarray(img_output2), path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None)
else:
- img_concat = np.concatenate((inputimages[count], img_output2), axis=combine_output_axis)
+ img_concat = np.concatenate((rgb_image, img_output2), axis=combine_output_axis)
if show_depth:
outimages.append(Image.fromarray(img_concat))
if save_depth and processed is not None:
@@ -1005,6 +1048,10 @@ def run_generate(depthmap_mode,
clipthreshold_far,
clipthreshold_near,
inpaint,
+ background_removal_model,
+ background_removal,
+ pre_depth_background_removal,
+ save_background_removal_masks,
vid_format,
vid_ssaa
):
@@ -1048,8 +1095,15 @@ def run_generate(depthmap_mode,
else:
outpath = opts.outdir_samples or opts.outdir_extras_samples
+ background_removed_images = []
+ if background_removal:
+ if pre_depth_background_removal:
+ imageArr = batched_background_removal(imageArr, background_removal_model)
+ background_removed_images = imageArr
+ else:
+ background_removed_images = batched_background_removal(imageArr, background_removal_model)
- outputs, mesh_fi = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa)
+ outputs, mesh_fi = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa, background_removal, background_removed_images, save_background_removal_masks)
return outputs, mesh_fi, plaintext_to_html('info'), ''
@@ -1114,6 +1168,14 @@ def on_ui_tabs():
with gr.Row():
inpaint = gr.Checkbox(label="Generate 3D inpainted mesh and demo videos. (Sloooow)",value=False)
+ with gr.Group():
+ with gr.Row():
+ background_removal_model = gr.Dropdown(label="Model", choices=['u2net','u2netp','u2net_human_seg', 'silueta'], value='u2net', type="value", elem_id="model_type")
+ with gr.Row():
+ background_removal = gr.Checkbox(label="remove background",value=False)
+ save_background_removal_masks = gr.Checkbox(label="save the foreground masks",value=False)
+ pre_depth_background_removal = gr.Checkbox(label="pre-depth background removal",value=False)
+
with gr.Box():
gr.HTML("Information, comment and share @ https://github.com/thygate/stable-diffusion-webui-depthmap-script")
@@ -1188,6 +1250,10 @@ def on_ui_tabs():
clipthreshold_far,
clipthreshold_near,
inpaint,
+ background_removal_model,
+ background_removal,
+ pre_depth_background_removal,
+ save_background_removal_masks,
vid_format,
vid_ssaa
],
@@ -1224,6 +1290,26 @@ def on_ui_tabs():
script_callbacks.on_ui_settings(on_ui_settings)
script_callbacks.on_ui_tabs(on_ui_tabs)
+def batched_background_removal(inimages, model_name):
+ print('creating background masks')
+ outimages = []
+
+ # model path and name
+ bg_model_dir = Path.joinpath(Path().resolve(), "models/rem_bg")
+ os.makedirs(bg_model_dir, exist_ok=True)
+ os.environ["U2NET_HOME"] = str(bg_model_dir)
+
+ #starting a session
+ background_removal_session = new_session(model_name)
+ for count in range(0, len(inimages)):
+ # skip first grid image
+ if count == 0 and len(inimages) > 1:
+ continue
+ bg_remove_img = np.array(remove(inimages[count], session=background_removal_session))
+ outimages.append(Image.fromarray(bg_remove_img))
+ #The line below might be redundant
+ del background_removal_session
+ return outimages
def download_file(filename, url):
print("Downloading", url, "to", filename)