Skip to content

improvements for stereo image generation #51

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 22, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 97 additions & 43 deletions scripts/depthmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from modules.processing import create_infotext, process_images, Processed
from modules.shared import opts, cmd_opts, state, Options
from modules import script_callbacks
from numba import njit
from torchvision.transforms import Compose, transforms
from PIL import Image
from pathlib import Path
Expand Down Expand Up @@ -85,14 +86,17 @@ def ui(self, is_img2img):
with gr.Row():
stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
with gr.Row():
stereo_fill = gr.Checkbox(label="Improve accuracy", value=False)
stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)

with gr.Box():
gr.HTML("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")

return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size]
return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance]

# run from script in txt2img or img2img
def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size):
def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance):

# sd process
processed = processing.process_images(p)
Expand All @@ -106,13 +110,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
continue
inputimages.append(processed.images[count])

newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size)
newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance)
for img in newmaps:
processed.images.append(img)

return processed

def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size):
def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance):

# unload sd model
shared.sd_model.cond_stage_model.to(devices.cpu)
Expand Down Expand Up @@ -331,14 +335,20 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
if gen_stereo or gen_anaglyph:
print("Generating Stereo image..")
#img_output = cv2.blur(img_output, (3, 3))
left_img = np.asarray(inputimages[count])
right_img = generate_stereo(left_img, img_output, stereo_ipd, stereo_size)
stereo_img = np.hstack([right_img, inputimages[count]])
deviation = calculate_total_deviation(stereo_ipd, stereo_size, inputimages[count].width)
balance = (stereo_balance + 1) / 2
original_image = np.asarray(inputimages[count])
left_image = original_image if balance < 0.001 else \
apply_stereo_deviation(original_image, img_output, - deviation * balance, stereo_fill)
right_image = original_image if balance > 0.999 else \
apply_stereo_deviation(original_image, img_output, deviation * (1 - balance), stereo_fill)
stereo_img = np.hstack([left_image, right_image])

if gen_stereo:
outimages.append(stereo_img)
if gen_anaglyph:
print("Generating Anaglyph image..")
anaglyph_img = overlap(right_img, left_img)
anaglyph_img = overlap(left_image, right_image)
outimages.append(anaglyph_img)
if (processed is not None):
if gen_stereo:
Expand Down Expand Up @@ -375,45 +385,82 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo

return outimages

def calculate_total_deviation(ipd, monitor_w, image_width):
deviation_cm = ipd * 0.12
deviation = deviation_cm * monitor_w * (image_width / 1920)
print("deviation:", deviation)
return deviation

def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
import time
print("TIME:", time.time())
ret = apply_stereo_deviation_core(original_image, depth, deviation, fill_technique)
print("TIME:", time.time())
return ret

def generate_stereo(left_img, depth, ipd, monitor_w):
#MONITOR_W = 38.5 #50 #38.5
h, w, c = left_img.shape
@njit
def apply_stereo_deviation_core(original_image, depth, deviation, fill_technique):
#MONITOR_W = 38.5 #50 #38.5
h, w, c = original_image.shape

depth_min = depth.min()
depth_max = depth.max()
depth = (depth - depth_min) / (depth_max - depth_min)

right = np.zeros_like(left_img)

deviation_cm = ipd * 0.12
deviation = deviation_cm * monitor_w * (w / 1920)

print("deviation:", deviation)
derived_image = np.zeros_like(original_image)
filled = np.zeros(h * w, dtype=np.uint8)

for row in range(h):
for col in range(w):
col_r = col - int((1 - depth[row][col] ** 2) * deviation)
# col_r = col - int((1 - depth[row][col]) * deviation)
if col_r >= 0:
right[row][col_r] = left_img[row][col]

right_fix = np.array(right)
gray = cv2.cvtColor(right_fix, cv2.COLOR_BGR2GRAY)
rows, cols = np.where(gray == 0)
for row, col in zip(rows, cols):
for offset in range(1, int(deviation)):
r_offset = col + offset
l_offset = col - offset
if r_offset < w and not np.all(right_fix[row][r_offset] == 0):
right_fix[row][col] = right_fix[row][r_offset]
break
if l_offset >= 0 and not np.all(right_fix[row][l_offset] == 0):
right_fix[row][col] = right_fix[row][l_offset]
break

return right_fix
# Swipe order should ensure that pixels that are closer overwrite
# (at their destination) pixels that are less close
for col in range(w) if deviation < 0 else range(w - 1, -1, -1):
col_d = col + int((1 - depth[row][col] ** 2) * deviation)
# col_d = col + int((1 - depth[row][col]) * deviation)
if 0 <= col_d < w:
derived_image[row][col_d] = original_image[row][col]
filled[row * w + col_d] = 1

# Fill the gaps
if fill_technique == 2: # soft_horizontal
for row in range(h):
for l_pointer in range(w):
# This if (and the next if) performs two checks that are almost the same - for performance reasons
if sum(derived_image[row][l_pointer]) != 0 or filled[row * w + l_pointer]:
continue
l_border = derived_image[row][l_pointer - 1] if l_pointer > 0 else np.zeros(3, dtype=np.uint8)
r_border = np.zeros(3, dtype=np.uint8)
r_pointer = l_pointer + 1
while r_pointer != w:
if sum(derived_image[row][r_pointer]) != 0 and filled[row * w + r_pointer]:
r_border = derived_image[row][r_pointer]
break
r_pointer += 1
if sum(l_border) == 0:
l_border = r_border
elif sum(r_border) == 0:
r_border = l_border
total_steps = 1 + r_pointer - l_pointer
step = (r_border.astype(np.float_) - l_border) / total_steps
for col in range(l_pointer, r_pointer):
derived_image[row][col] = l_border + (step * (col - l_pointer + 1)).astype(np.uint8)
return derived_image
elif fill_technique == 1: # hard_horizontal
derived_fix = np.copy(derived_image)
for pos in np.where(filled == 0)[0]:
row = pos // w
col = pos % w
for offset in range(1, abs(int(deviation)) + 2):
r_offset = col + offset
l_offset = col - offset
if r_offset < w and filled[row * w + r_offset]:
derived_fix[row][col] = derived_image[row][r_offset]
break
if 0 <= l_offset and filled[row * w + l_offset]:
derived_fix[row][col] = derived_image[row][l_offset]
break
return derived_fix
else: # none
return derived_image

def overlap(im1, im2):
width1 = im1.shape[1]
Expand Down Expand Up @@ -463,7 +510,9 @@ def run_generate(depthmap_mode,
gen_stereo,
gen_anaglyph,
stereo_ipd,
stereo_size
stereo_size,
stereo_fill,
stereo_balance
):

imageArr = []
Expand Down Expand Up @@ -500,7 +549,7 @@ def run_generate(depthmap_mode,
outpath = opts.outdir_samples or opts.outdir_extras_samples


outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size)
outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance)

return outputs, plaintext_to_html('info'), ''

Expand Down Expand Up @@ -551,7 +600,10 @@ def on_ui_tabs():
gen_anaglyph = gr.Checkbox(label="Generate Stereo anaglyph image (red/cyan)",value=False)
with gr.Row():
stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
with gr.Row():
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'hard_horizontal', 'soft_horizontal'], value='soft_horizontal', type="index", elem_id="stereo_fill_type")
stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)

with gr.Box():
gr.HTML("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
Expand Down Expand Up @@ -590,7 +642,9 @@ def on_ui_tabs():
gen_stereo,
gen_anaglyph,
stereo_ipd,
stereo_size
stereo_size,
stereo_fill,
stereo_balance
],
outputs=[
result_images,
Expand Down Expand Up @@ -1212,7 +1266,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):

# Generate the base estimate using the double estimation.
whole_estimate = doubleestimate(img, net_receptive_field_size, whole_image_optimal_size, pix2pixsize, model, model_type, pix2pixmodel)

# Compute the multiplier described in section 6 of the main paper to make sure our initial patch can select
# small high-density regions of the image.
global factor
Expand Down