diff --git a/scripts/depthmap.py b/scripts/depthmap.py
index e3125cb..4334b88 100644
--- a/scripts/depthmap.py
+++ b/scripts/depthmap.py
@@ -11,6 +11,7 @@
from modules.processing import create_infotext, process_images, Processed
from modules.shared import opts, cmd_opts, state, Options
from modules import script_callbacks
+from numba import njit
from torchvision.transforms import Compose, transforms
from PIL import Image
from pathlib import Path
@@ -85,14 +86,17 @@ def ui(self, is_img2img):
with gr.Row():
stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
+ with gr.Row():
+ stereo_fill = gr.Checkbox(label="Improve accuracy", value=False)
+ stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)
with gr.Box():
gr.HTML("Instructions, comment and share @ https://github.com/thygate/stable-diffusion-webui-depthmap-script")
- return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size]
+ return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance]
# run from script in txt2img or img2img
- def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size):
+ def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance):
# sd process
processed = processing.process_images(p)
@@ -106,13 +110,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
continue
inputimages.append(processed.images[count])
- newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size)
+ newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance)
for img in newmaps:
processed.images.append(img)
return processed
-def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size):
+def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance):
# unload sd model
shared.sd_model.cond_stage_model.to(devices.cpu)
@@ -331,14 +335,20 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
if gen_stereo or gen_anaglyph:
print("Generating Stereo image..")
#img_output = cv2.blur(img_output, (3, 3))
- left_img = np.asarray(inputimages[count])
- right_img = generate_stereo(left_img, img_output, stereo_ipd, stereo_size)
- stereo_img = np.hstack([right_img, inputimages[count]])
+ deviation = calculate_total_deviation(stereo_ipd, stereo_size, inputimages[count].width)
+ balance = (stereo_balance + 1) / 2
+ original_image = np.asarray(inputimages[count])
+ left_image = original_image if balance < 0.001 else \
+ apply_stereo_deviation(original_image, img_output, - deviation * balance, stereo_fill)
+ right_image = original_image if balance > 0.999 else \
+ apply_stereo_deviation(original_image, img_output, deviation * (1 - balance), stereo_fill)
+ stereo_img = np.hstack([left_image, right_image])
+
if gen_stereo:
outimages.append(stereo_img)
if gen_anaglyph:
print("Generating Anaglyph image..")
- anaglyph_img = overlap(right_img, left_img)
+ anaglyph_img = overlap(left_image, right_image)
outimages.append(anaglyph_img)
if (processed is not None):
if gen_stereo:
@@ -375,45 +385,82 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
return outimages
+def calculate_total_deviation(ipd, monitor_w, image_width):
+ deviation_cm = ipd * 0.12
+ deviation = deviation_cm * monitor_w * (image_width / 1920)
+ print("deviation:", deviation)
+ return deviation
+def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
+ import time
+ print("TIME:", time.time())
+ ret = apply_stereo_deviation_core(original_image, depth, deviation, fill_technique)
+ print("TIME:", time.time())
+ return ret
-def generate_stereo(left_img, depth, ipd, monitor_w):
- #MONITOR_W = 38.5 #50 #38.5
- h, w, c = left_img.shape
+@njit
+def apply_stereo_deviation_core(original_image, depth, deviation, fill_technique):
+ #MONITOR_W = 38.5 #50 #38.5
+ h, w, c = original_image.shape
depth_min = depth.min()
depth_max = depth.max()
depth = (depth - depth_min) / (depth_max - depth_min)
- right = np.zeros_like(left_img)
-
- deviation_cm = ipd * 0.12
- deviation = deviation_cm * monitor_w * (w / 1920)
-
- print("deviation:", deviation)
+ derived_image = np.zeros_like(original_image)
+ filled = np.zeros(h * w, dtype=np.uint8)
for row in range(h):
- for col in range(w):
- col_r = col - int((1 - depth[row][col] ** 2) * deviation)
- # col_r = col - int((1 - depth[row][col]) * deviation)
- if col_r >= 0:
- right[row][col_r] = left_img[row][col]
-
- right_fix = np.array(right)
- gray = cv2.cvtColor(right_fix, cv2.COLOR_BGR2GRAY)
- rows, cols = np.where(gray == 0)
- for row, col in zip(rows, cols):
- for offset in range(1, int(deviation)):
- r_offset = col + offset
- l_offset = col - offset
- if r_offset < w and not np.all(right_fix[row][r_offset] == 0):
- right_fix[row][col] = right_fix[row][r_offset]
- break
- if l_offset >= 0 and not np.all(right_fix[row][l_offset] == 0):
- right_fix[row][col] = right_fix[row][l_offset]
- break
-
- return right_fix
+ # Swipe order should ensure that pixels that are closer overwrite
+ # (at their destination) pixels that are less close
+ for col in range(w) if deviation < 0 else range(w - 1, -1, -1):
+ col_d = col + int((1 - depth[row][col] ** 2) * deviation)
+ # col_d = col + int((1 - depth[row][col]) * deviation)
+ if 0 <= col_d < w:
+ derived_image[row][col_d] = original_image[row][col]
+ filled[row * w + col_d] = 1
+
+ # Fill the gaps
+ if fill_technique == 2: # soft_horizontal
+ for row in range(h):
+ for l_pointer in range(w):
+ # This if (and the next if) performs two checks that are almost the same - for performance reasons
+ if sum(derived_image[row][l_pointer]) != 0 or filled[row * w + l_pointer]:
+ continue
+ l_border = derived_image[row][l_pointer - 1] if l_pointer > 0 else np.zeros(3, dtype=np.uint8)
+ r_border = np.zeros(3, dtype=np.uint8)
+ r_pointer = l_pointer + 1
+ while r_pointer != w:
+ if sum(derived_image[row][r_pointer]) != 0 and filled[row * w + r_pointer]:
+ r_border = derived_image[row][r_pointer]
+ break
+ r_pointer += 1
+ if sum(l_border) == 0:
+ l_border = r_border
+ elif sum(r_border) == 0:
+ r_border = l_border
+ total_steps = 1 + r_pointer - l_pointer
+ step = (r_border.astype(np.float_) - l_border) / total_steps
+ for col in range(l_pointer, r_pointer):
+ derived_image[row][col] = l_border + (step * (col - l_pointer + 1)).astype(np.uint8)
+ return derived_image
+ elif fill_technique == 1: # hard_horizontal
+ derived_fix = np.copy(derived_image)
+ for pos in np.where(filled == 0)[0]:
+ row = pos // w
+ col = pos % w
+ for offset in range(1, abs(int(deviation)) + 2):
+ r_offset = col + offset
+ l_offset = col - offset
+ if r_offset < w and filled[row * w + r_offset]:
+ derived_fix[row][col] = derived_image[row][r_offset]
+ break
+ if 0 <= l_offset and filled[row * w + l_offset]:
+ derived_fix[row][col] = derived_image[row][l_offset]
+ break
+ return derived_fix
+ else: # none
+ return derived_image
def overlap(im1, im2):
width1 = im1.shape[1]
@@ -463,7 +510,9 @@ def run_generate(depthmap_mode,
gen_stereo,
gen_anaglyph,
stereo_ipd,
- stereo_size
+ stereo_size,
+ stereo_fill,
+ stereo_balance
):
imageArr = []
@@ -500,7 +549,7 @@ def run_generate(depthmap_mode,
outpath = opts.outdir_samples or opts.outdir_extras_samples
- outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size)
+ outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance)
return outputs, plaintext_to_html('info'), ''
@@ -551,7 +600,10 @@ def on_ui_tabs():
gen_anaglyph = gr.Checkbox(label="Generate Stereo anaglyph image (red/cyan)",value=False)
with gr.Row():
stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
- stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
+ stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
+ with gr.Row():
+ stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'hard_horizontal', 'soft_horizontal'], value='soft_horizontal', type="index", elem_id="stereo_fill_type")
+ stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)
with gr.Box():
gr.HTML("Instructions, comment and share @ https://github.com/thygate/stable-diffusion-webui-depthmap-script")
@@ -590,7 +642,9 @@ def on_ui_tabs():
gen_stereo,
gen_anaglyph,
stereo_ipd,
- stereo_size
+ stereo_size,
+ stereo_fill,
+ stereo_balance
],
outputs=[
result_images,
@@ -1212,7 +1266,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):
# Generate the base estimate using the double estimation.
whole_estimate = doubleestimate(img, net_receptive_field_size, whole_image_optimal_size, pix2pixsize, model, model_type, pix2pixmodel)
-
+
# Compute the multiplier described in section 6 of the main paper to make sure our initial patch can select
# small high-density regions of the image.
global factor