From ef418f96c145e54096e3a92f2ca1e90bd81fb094 Mon Sep 17 00:00:00 2001
From: grae <graen.ai.ar@gmail.com>
Date: Wed, 4 Jan 2023 20:51:02 -0600
Subject: [PATCH 1/4] add rembg integration

---
 README.md           | 12 ++++++++
 install.py          |  2 ++
 scripts/depthmap.py | 74 +++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 82 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index f69102c..ec1a964 100644
--- a/README.md
+++ b/README.md
@@ -246,4 +246,16 @@ Boosting Monocular Depth Estimation Models to High-Resolution via Content-Adapti
 	booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
 	year = {2020}
 }
+```
+
+U2-Net:
+```
+@InProceedings{Qin_2020_PR,
+title = {U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection},
+author = {Qin, Xuebin and Zhang, Zichen and Huang, Chenyang and Dehghan, Masood and Zaiane, Osmar and Jagersand, Martin},
+journal = {Pattern Recognition},
+volume = {106},
+pages = {107404},
+year = {2020}
+}
 ```
\ No newline at end of file
diff --git a/install.py b/install.py
index 3f0b596..d395a09 100644
--- a/install.py
+++ b/install.py
@@ -11,6 +11,8 @@
 if not launch.is_installed("vispy"):
     launch.run_pip("install vispy", "vispy requirement for depthmap script")
 
+if not launch.is_installed("rembg"):
+    launch.run_pip("install rembg", "rembg requirement for depthmap script")
 
 if not launch.is_installed("moviepy"):
     launch.run_pip("install moviepy==1.0.2", "moviepy requirement for depthmap script")
diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index 1f71f3e..9d28a1e 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -56,6 +56,9 @@
 from inpaint.utils import path_planning
 from inpaint.bilateral_filtering import sparse_bilateral_filtering
 
+# background removal
+from rembg import new_session, remove
+
 whole_size_threshold = 1600  # R_max from the paper
 pix2pixsize = 1024
 scriptname = "DepthMap v0.3.6"
@@ -110,7 +113,14 @@ def ui(self, is_img2img):
 
 			with gr.Box():
 				gr.HTML("Information, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
-
+			
+			with gr.Group():
+				with gr.Row():
+					background_removal_model = gr.Dropdown(label="Model", choices=['u2net','u2netp','u2net_human_seg', 'silueta'], value='u2net', type="value", elem_id="model_type")
+				with gr.Row():	
+					background_removal = gr.Checkbox(label="remove background",value=False)
+					save_background_removal_masks = gr.Checkbox(label="save the foreground masks",value=False)
+					pre_depth_background_removal = gr.Checkbox(label="pre-depth background removal",value=False)
 
 			clipthreshold_far.change(
 				fn = lambda a, b: a if b < a else b,
@@ -124,10 +134,10 @@ def ui(self, is_img2img):
 				outputs=[clipthreshold_far]
 			)
 
-		return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint]
+		return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, background_removal_model, background_removal, pre_depth_background_removal, save_background_removal_masks]
 
 	# run from script in txt2img or img2img
-	def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint):
+	def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, background_removal_model, background_removal, pre_depth_background_removal, save_background_removal_masks):
 
 		# sd process 
 		processed = processing.process_images(p)
@@ -140,14 +150,23 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
 			if count == 0 and len(processed.images) > 1:
 				continue
 			inputimages.append(processed.images[count])
+		
+		#remove on base image before depth calculation
+		if background_removal:
+			if pre_depth_background_removal:
+				inputimages = batched_background_removal(inputimages, background_removal_model)
+				background_removed_images = inputimages
+			else:
+				background_removed_images = batched_background_removal(inputimages, background_removal_model)			
 
-		newmaps, mesh_fi = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, "mp4", 0)
+		newmaps, mesh_fi = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, "mp4", 0, background_removal, background_removed_images, save_background_removal_masks)
+		
 		for img in newmaps:
 			processed.images.append(img)
 
 		return processed
 
-def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa):
+def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa, background_removal, background_removed_images, save_background_removal_masks):
 
 	if len(inputimages) == 0 or inputimages[0] == None:
 		return []
@@ -379,6 +398,29 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
 					p = Path(inputnames[count])
 					basename = p.stem
 
+			rgb_image = inputimages[count]
+
+			#applying background masks after depth
+			if background_removal:
+				print('applying background masks')
+				background_removed_image = background_removed_images[count-1]
+				#maybe a threshold cut would be better on the line below.
+				background_removed_array = np.array(background_removed_image)
+				bg_mask = (background_removed_array[:,:,0]==0)|(background_removed_array[:,:,1]==0)|(background_removed_array[:,:,2]==0)
+				far_value = 255 if invert_depth else 0
+
+				img_output[bg_mask] = far_value * far_value #255*255 or 0*0
+				
+				#should this be optional
+				images.save_image(background_removed_image, path=outpath, basename='depthmap', seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None, suffix="_background_removed")
+				outimages.append(background_removed_image )
+				if save_background_removal_masks:
+					bg_array = (1 - bg_mask.astype('int8'))*255
+					mask_array = np.stack( (bg_array, bg_array, bg_array, bg_array), axis=2)
+					mask_image = Image.fromarray( mask_array.astype(np.uint8))
+					images.save_image(mask_image, path=outpath, basename='depthmap', seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None, suffix="_foreground_mask")
+					outimages.append(mask_image)
+
 			if not combine_output:
 				if show_depth:
 					outimages.append(Image.fromarray(img_output))
@@ -396,7 +438,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
 					else:
 						images.save_image(Image.fromarray(img_output2), path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None)
 			else:
-				img_concat = np.concatenate((inputimages[count], img_output2), axis=combine_output_axis)
+				img_concat = np.concatenate((rgb_image, img_output2), axis=combine_output_axis)
 				if show_depth:
 					outimages.append(Image.fromarray(img_concat))
 				if save_depth and processed is not None:
@@ -1224,6 +1266,26 @@ def on_ui_tabs():
 script_callbacks.on_ui_settings(on_ui_settings)
 script_callbacks.on_ui_tabs(on_ui_tabs)
 
+def batched_background_removal(inimages, model_name):
+	print('creating background masks')
+	outimages = []
+
+	# model path and name
+	bg_model_dir = Path.joinpath(Path().resolve(), "models/rem_bg")
+	os.makedirs(bg_model_dir, exist_ok=True)
+	os.environ["U2NET_HOME"] = str(bg_model_dir)
+	
+	#starting a session
+	background_removal_session = new_session(model_name)
+	for count in range(0, len(inimages)):
+		# skip first grid image
+		if count == 0 and len(inimages) > 1:
+			continue
+		bg_remove_img = np.array(remove(inimages[count], session=background_removal_session))
+		outimages.append(Image.fromarray(bg_remove_img))
+	#The line below might be redundant
+	del background_removal_session
+	return outimages
 
 def download_file(filename, url):
 	print("Downloading", url, "to", filename)

From b38832ec1cbe1cae3941d07ef39f7a5340025cf0 Mon Sep 17 00:00:00 2001
From: grae <graen.ai.ar@gmail.com>
Date: Wed, 4 Jan 2023 20:55:08 -0600
Subject: [PATCH 2/4] ui update

---
 scripts/depthmap.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index 9d28a1e..f21ca47 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -111,9 +111,6 @@ def ui(self, is_img2img):
 				with gr.Row():
 					inpaint = gr.Checkbox(label="Generate 3D inpainted mesh. (Slooooooooow)",value=False, visible=False)
 
-			with gr.Box():
-				gr.HTML("Information, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
-			
 			with gr.Group():
 				with gr.Row():
 					background_removal_model = gr.Dropdown(label="Model", choices=['u2net','u2netp','u2net_human_seg', 'silueta'], value='u2net', type="value", elem_id="model_type")
@@ -122,6 +119,9 @@ def ui(self, is_img2img):
 					save_background_removal_masks = gr.Checkbox(label="save the foreground masks",value=False)
 					pre_depth_background_removal = gr.Checkbox(label="pre-depth background removal",value=False)
 
+			with gr.Box():
+				gr.HTML("Information, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
+
 			clipthreshold_far.change(
 				fn = lambda a, b: a if b < a else b,
 				inputs = [clipthreshold_far, clipthreshold_near],

From 39cfa3db41153cc73fbe005377039453bb3938f0 Mon Sep 17 00:00:00 2001
From: grae <graen.ai.ar@gmail.com>
Date: Wed, 4 Jan 2023 21:10:21 -0600
Subject: [PATCH 3/4] bug fixes

---
 scripts/depthmap.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index f21ca47..1e4c3f0 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -152,6 +152,7 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
 			inputimages.append(processed.images[count])
 		
 		#remove on base image before depth calculation
+		background_removed_images = []
 		if background_removal:
 			if pre_depth_background_removal:
 				inputimages = batched_background_removal(inputimages, background_removal_model)

From bb4bf46c652ed8ccd0094eee970e8e25c56911c3 Mon Sep 17 00:00:00 2001
From: grae <graen.ai.ar@gmail.com>
Date: Wed, 4 Jan 2023 22:06:22 -0600
Subject: [PATCH 4/4] fixing depthtab

---
 scripts/depthmap.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index 1e4c3f0..13db921 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -1048,6 +1048,10 @@ def run_generate(depthmap_mode,
 				clipthreshold_far,
 				clipthreshold_near,
 				inpaint,
+                background_removal_model, 
+                background_removal, 
+                pre_depth_background_removal, 
+                save_background_removal_masks,
 				vid_format,
 				vid_ssaa
 				):
@@ -1091,8 +1095,15 @@ def run_generate(depthmap_mode,
 	else:
 		outpath = opts.outdir_samples or opts.outdir_extras_samples
 
+	background_removed_images = []
+	if background_removal:
+		if pre_depth_background_removal:
+			imageArr = batched_background_removal(imageArr, background_removal_model)
+			background_removed_images = imageArr
+		else:
+			background_removed_images = batched_background_removal(imageArr, background_removal_model)	
 
-	outputs, mesh_fi = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa)
+	outputs, mesh_fi = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance, clipdepth, clipthreshold_far, clipthreshold_near, inpaint, fnExt, vid_ssaa, background_removal, background_removed_images, save_background_removal_masks)
 
 	return outputs, mesh_fi, plaintext_to_html('info'), ''
 
@@ -1157,6 +1168,14 @@ def on_ui_tabs():
                     with gr.Row():
                         inpaint = gr.Checkbox(label="Generate 3D inpainted mesh and demo videos. (Sloooow)",value=False)
 
+                with gr.Group():
+                    with gr.Row():
+                        background_removal_model = gr.Dropdown(label="Model", choices=['u2net','u2netp','u2net_human_seg', 'silueta'], value='u2net', type="value", elem_id="model_type")
+                    with gr.Row():	
+                        background_removal = gr.Checkbox(label="remove background",value=False)
+                        save_background_removal_masks = gr.Checkbox(label="save the foreground masks",value=False)
+                        pre_depth_background_removal = gr.Checkbox(label="pre-depth background removal",value=False)
+
                 with gr.Box():
                     gr.HTML("Information, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
 
@@ -1231,6 +1250,10 @@ def on_ui_tabs():
 				clipthreshold_far,
 				clipthreshold_near,
 				inpaint,
+				background_removal_model, 
+				background_removal, 
+				pre_depth_background_removal, 
+				save_background_removal_masks,
 				vid_format,
 				vid_ssaa
             ],