stereo image generation

thygate · thygate · commit 43e1f7a8dec6 · 2022-12-15T10:53:17.000+01:00
diff --git a/README.md b/README.md
@@ -1,12 +1,17 @@
 ﻿# High Resolution Depth Maps for Stable Diffusion WebUI
-This script is an addon for [AUTOMATIC1111's Stable Diffusion WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) that creates `depth maps` from the generated images. The result can be viewed on 3D or holographic devices like VR headsets or [Looking Glass](https://lookingglassfactory.com/) displays, used in Render- or Game- Engines on a plane with a displacement modifier, and maybe even 3D printed.
+This script is an addon for [AUTOMATIC1111's Stable Diffusion WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) that creates `depth maps` from the generated or existing images. The result can be viewed on 3D or holographic devices like VR headsets or [Looking Glass](https://lookingglassfactory.com/) displays, used in Render- or Game- Engines on a plane with a displacement modifier, and maybe even 3D printed.
 
 To generate realistic depth maps from a single image, this script uses code and models from the [MiDaS](https://github.com/isl-org/MiDaS) repository by Intel ISL (see [https://pytorch.org/hub/intelisl_midas_v2/](https://pytorch.org/hub/intelisl_midas_v2/) for more info), or LeReS from the [AdelaiDepth](https://github.com/aim-uofa/AdelaiDepth) repository by Advanced Intelligent Machines. Multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) is used to generate high resolution depth maps.
 
+3D stereo, and red/cyan anaglyph images are generated using code from the [stereo-image-generation](https://github.com/m5823779/stereo-image-generation) repository. Thanks to [@sina-masoud-ansari](https://github.com/sina-masoud-ansari) for the tip! Discussion [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/discussions/45).
+
 ## Examples
 [![screenshot](examples.png)](https://raw.githubusercontent.com/thygate/stable-diffusion-webui-depthmap-script/main/examples.png)
 
 ## Changelog
+* v0.2.9 new feature 
+    * 3D Stereo (side-by-side) and red/cyan anaglyph image generation.   
+    (Thanks to [@sina-masoud-ansari](https://github.com/sina-masoud-ansari) for the tip! Discussion [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/discussions/45))
 * v0.2.8 bugfix
     * boost (pix2pix) now also able to compute on cpu
     * res101 able to compute on cpu
@@ -94,11 +99,15 @@ To see the generated output in the webui `Show DepthMap` should be enabled. When
 To make the depthmap easier to analyze for human eyes, `Show HeatMap` shows an extra image in the WebUI that has a color gradient applied. It is not saved.
 
 When `Combine into one image` is enabled, the depthmap will be combined with the original image, the orientation can be selected with `Combine axis`. When disabled, the depthmap will be saved as a 16 bit single channel PNG as opposed to a three channel (RGB), 8 bit per channel image when the option is enabled.
+
+When either `Generate Stereo` or `Generate anaglyph` is enabled, a stereo image will be generated. The `IPD`, or Pupillary distance is given in centimeter along with the `Screen Width`.
+
 > 💡 Saving as any format other than PNG always produces an 8 bit, 3 channel RGB image. A single channel 16 bit image is only supported when saving as PNG.
 
 ## FAQ
 
  * `Can I use this on existing images ?`
+    - Yes, you can now use the Depth tab to easily process existing images.
     - Yes, in img2img, set denoising strength to 0. This will effectively skip stable diffusion and use the input image. You will still have to set the correct size, and need to select `Crop and resize` instead of `Just resize` when the input image resolution does not match the set size perfectly.
  * `Can I run this on google colab ?`
     - You can run the MiDaS network on their colab linked here https://pytorch.org/hub/intelisl_midas_v2/
diff --git a/scripts/depthmap.py b/scripts/depthmap.py
@@ -51,7 +51,7 @@
 
 whole_size_threshold = 1600  # R_max from the paper
 pix2pixsize = 1024
-scriptname = "DepthMap v0.2.8"
+scriptname = "DepthMap v0.2.9"
 
 class Script(scripts.Script):
 	def title(self):
@@ -78,13 +78,21 @@ def ui(self, is_img2img):
 			save_depth = gr.Checkbox(label="Save DepthMap",value=True)
 			show_depth = gr.Checkbox(label="Show DepthMap",value=True)
 			show_heat = gr.Checkbox(label="Show HeatMap",value=False)
+		with gr.Group():
+			with gr.Row():
+				gen_stereo = gr.Checkbox(label="Generate Stereo side-by-side image",value=False)
+				gen_anaglyph = gr.Checkbox(label="Generate Stereo anaglyph image (red/cyan)",value=False)
+			with gr.Row():
+				stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
+				stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
+
 		with gr.Box():
 			gr.HTML("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
 
-		return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis]
+		return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size]
 
 	# run from script in txt2img or img2img
-	def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis):
+	def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size):
 
 		# sd process 
 		processed = processing.process_images(p)
@@ -98,13 +106,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
 				continue
 			inputimages.append(processed.images[count])
 
-		newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis)
+		newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size)
 		for img in newmaps:
 			processed.images.append(img)
 
 		return processed
 
-def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis):
+def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size):
 
 	# unload sd model
 	shared.sd_model.cond_stage_model.to(devices.cpu)
@@ -320,6 +328,30 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
 				heatmap = (colormap(img_output2[:,:,0] / 256.0) * 2**16).astype(np.uint16)[:,:,:3]
 				outimages.append(heatmap)
 
+			if gen_stereo or gen_anaglyph:
+				print("Generating Stereo image..")
+				#img_output = cv2.blur(img_output, (3, 3))
+				left_img = np.asarray(inputimages[count])
+				right_img = generate_stereo(left_img, img_output, stereo_ipd, stereo_size)
+				stereo_img = np.hstack([right_img, inputimages[count]])
+				if gen_stereo:
+					outimages.append(stereo_img)
+				if gen_anaglyph:
+					print("Generating Anaglyph image..")
+					anaglyph_img = overlap(right_img, left_img)
+					outimages.append(anaglyph_img)
+				if (processed is not None):
+					if gen_stereo:
+						images.save_image(Image.fromarray(stereo_img), outpath, "", processed.all_seeds[count], processed.all_prompts[count], opts.samples_format, info=info, p=processed, suffix="_stereo")
+					if gen_anaglyph:
+						images.save_image(Image.fromarray(anaglyph_img), outpath, "", processed.all_seeds[count], processed.all_prompts[count], opts.samples_format, info=info, p=processed, suffix="_anaglyph")
+				else:
+					# from tab
+					if gen_stereo:
+						images.save_image(Image.fromarray(stereo_img), path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None, suffix="_stereo")
+					if gen_anaglyph:
+						images.save_image(Image.fromarray(anaglyph_img), path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=None, forced_filename=None, suffix="_anaglyph")
+
 		print("Done.")
 
 	except RuntimeError as e:
@@ -343,6 +375,74 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
 
 	return outimages
 
+
+
+def generate_stereo(left_img, depth, ipd, monitor_w):
+	#MONITOR_W = 38.5 #50 #38.5
+    h, w, c = left_img.shape
+
+    depth_min = depth.min()
+    depth_max = depth.max()
+    depth = (depth - depth_min) / (depth_max - depth_min)
+
+    right = np.zeros_like(left_img)
+
+    deviation_cm = ipd * 0.12
+    deviation = deviation_cm * monitor_w * (w / 1920)
+
+    print("deviation:", deviation)
+
+    for row in range(h):
+        for col in range(w):
+            col_r = col - int((1 - depth[row][col] ** 2) * deviation)
+            # col_r = col - int((1 - depth[row][col]) * deviation)
+            if col_r >= 0:
+                right[row][col_r] = left_img[row][col]
+
+    right_fix = np.array(right)
+    gray = cv2.cvtColor(right_fix, cv2.COLOR_BGR2GRAY)
+    rows, cols = np.where(gray == 0)
+    for row, col in zip(rows, cols):
+        for offset in range(1, int(deviation)):
+            r_offset = col + offset
+            l_offset = col - offset
+            if r_offset < w and not np.all(right_fix[row][r_offset] == 0):
+                right_fix[row][col] = right_fix[row][r_offset]
+                break
+            if l_offset >= 0 and not np.all(right_fix[row][l_offset] == 0):
+                right_fix[row][col] = right_fix[row][l_offset]
+                break
+
+    return right_fix
+
+def overlap(im1, im2):
+    width1 = im1.shape[1]
+    height1 = im1.shape[0]
+    width2 = im2.shape[1]
+    height2 = im2.shape[0]
+
+    # final image
+    composite = np.zeros((height2, width2, 3), np.uint8)
+
+    # iterate through "left" image, filling in red values of final image
+    for i in range(height1):
+        for j in range(width1):
+            try:
+                composite[i, j, 0] = im1[i, j, 0]
+            except IndexError:
+                pass
+
+    # iterate through "right" image, filling in blue/green values of final image
+    for i in range(height2):
+        for j in range(width2):
+            try:
+                composite[i, j, 1] = im2[i, j, 1]
+                composite[i, j, 2] = im2[i, j, 2]
+            except IndexError:
+                pass
+
+    return composite
+
 def run_generate(depthmap_mode, 
 				depthmap_image,
                 image_batch,
@@ -359,7 +459,11 @@ def run_generate(depthmap_mode,
 				show_depth, 
 				show_heat, 
 				combine_output, 
-				combine_output_axis
+				combine_output_axis,
+				gen_stereo, 
+				gen_anaglyph, 
+				stereo_ipd, 
+				stereo_size
 				):
 
 	imageArr = []
@@ -396,7 +500,7 @@ def run_generate(depthmap_mode,
 		outpath = opts.outdir_samples or opts.outdir_extras_samples
 
 
-	outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis)
+	outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size)
 
 	return outputs, plaintext_to_html('info'), ''
 
@@ -441,6 +545,14 @@ def on_ui_tabs():
                         save_depth = gr.Checkbox(label="Save DepthMap",value=True)
                         show_depth = gr.Checkbox(label="Show DepthMap",value=True)
                         show_heat = gr.Checkbox(label="Show HeatMap",value=False)
+                with gr.Group():
+                    with gr.Row():
+                        gen_stereo = gr.Checkbox(label="Generate Stereo side-by-side image",value=False)
+                        gen_anaglyph = gr.Checkbox(label="Generate Stereo anaglyph image (red/cyan)",value=False)
+                    with gr.Row():
+                        stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
+                        stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)	
+
                 with gr.Box():
                     gr.HTML("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
 
@@ -474,7 +586,11 @@ def on_ui_tabs():
 				show_depth, 
 				show_heat, 
 				combine_output, 
-				combine_output_axis
+				combine_output_axis,
+				gen_stereo, 
+				gen_anaglyph, 
+				stereo_ipd, 
+				stereo_size
             ],
             outputs=[
                 result_images,