Skip to content

Commit a79d487

Browse files
authored
Merge pull request #56 from semjon00/stereo
[WIP] new stereo image generation technique (polylines)
2 parents 761f790 + 125fb32 commit a79d487

File tree

1 file changed

+178
-44
lines changed

1 file changed

+178
-44
lines changed

scripts/depthmap.py

Lines changed: 178 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -89,20 +89,19 @@ def ui(self, is_img2img):
8989
gen_stereo = gr.Checkbox(label="Generate Stereo side-by-side image",value=False)
9090
gen_anaglyph = gr.Checkbox(label="Generate Stereo anaglyph image (red/cyan)",value=False)
9191
with gr.Row():
92-
stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
93-
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
92+
stereo_divergence = gr.Slider(minimum=0.05, maximum=10.005, step=0.01, label='Divergence (3D effect)', value=2.5)
9493
with gr.Row():
95-
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'hard_horizontal', 'soft_horizontal'], value='soft_horizontal', type="index", elem_id="stereo_fill_type")
94+
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'], value='naive_interpolating', type="index", elem_id="stereo_fill_type")
9695
stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)
9796

9897

9998
with gr.Box():
10099
gr.HTML("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>")
101100

102-
return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance]
101+
return [compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance]
103102

104103
# run from script in txt2img or img2img
105-
def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance):
104+
def run(self, p, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance):
106105

107106
# sd process
108107
processed = processing.process_images(p)
@@ -116,13 +115,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
116115
continue
117116
inputimages.append(processed.images[count])
118117

119-
newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance)
118+
newmaps = run_depthmap(processed, p.outpath_samples, inputimages, None, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance)
120119
for img in newmaps:
121120
processed.images.append(img)
122121

123122
return processed
124123

125-
def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance):
124+
def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance):
126125

127126
# unload sd model
128127
shared.sd_model.cond_stage_model.to(devices.cpu)
@@ -250,7 +249,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
250249
# iterate over input (generated) images
251250
numimages = len(inputimages)
252251
for count in trange(0, numimages):
253-
252+
254253
#if numimages > 1:
255254
# print("\nDepthmap", count+1, '/', numimages)
256255
print('\n')
@@ -357,13 +356,12 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
357356
if gen_stereo or gen_anaglyph:
358357
print("Generating Stereo image..")
359358
#img_output = cv2.blur(img_output, (3, 3))
360-
deviation = calculate_total_deviation(stereo_ipd, stereo_size, inputimages[count].width)
361359
balance = (stereo_balance + 1) / 2
362360
original_image = np.asarray(inputimages[count])
363361
left_image = original_image if balance < 0.001 else \
364-
apply_stereo_deviation(original_image, img_output, - deviation * balance, stereo_fill)
362+
apply_stereo_divergence(original_image, img_output, - stereo_divergence * balance, stereo_fill)
365363
right_image = original_image if balance > 0.999 else \
366-
apply_stereo_deviation(original_image, img_output, deviation * (1 - balance), stereo_fill)
364+
apply_stereo_divergence(original_image, img_output, stereo_divergence * (1 - balance), stereo_fill)
367365
stereo_img = np.hstack([left_image, right_image])
368366

369367
if gen_stereo:
@@ -407,35 +405,35 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
407405

408406
return outimages
409407

410-
def calculate_total_deviation(ipd, monitor_w, image_width):
411-
deviation_cm = ipd * 0.12
412-
deviation = deviation_cm * monitor_w * (image_width / 1920)
413-
print("deviation:", deviation)
414-
return deviation
415-
416-
@njit
417-
def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
418-
h, w, c = original_image.shape
419-
408+
def apply_stereo_divergence(original_image, depth, divergence, fill_technique):
420409
depth_min = depth.min()
421410
depth_max = depth.max()
422411
depth = (depth - depth_min) / (depth_max - depth_min)
412+
divergence_px = (divergence / 100.0) * original_image.shape[1]
413+
414+
if fill_technique in [0, 1, 2]:
415+
return apply_stereo_divergence_naive(original_image, depth, divergence_px, fill_technique)
416+
if fill_technique in [3, 4]:
417+
return apply_stereo_divergence_polylines(original_image, depth, divergence_px, fill_technique)
418+
419+
@njit
420+
def apply_stereo_divergence_naive(original_image, normalized_depth, divergence_px: float, fill_technique):
421+
h, w, c = original_image.shape
423422

424423
derived_image = np.zeros_like(original_image)
425424
filled = np.zeros(h * w, dtype=np.uint8)
426425

427-
for row in range(h):
426+
for row in prange(h):
428427
# Swipe order should ensure that pixels that are closer overwrite
429428
# (at their destination) pixels that are less close
430-
for col in range(w) if deviation < 0 else range(w - 1, -1, -1):
431-
col_d = col + int((1 - depth[row][col] ** 2) * deviation)
432-
# col_d = col + int((1 - depth[row][col]) * deviation)
429+
for col in range(w) if divergence_px < 0 else range(w - 1, -1, -1):
430+
col_d = col + int((1 - normalized_depth[row][col] ** 2) * divergence_px)
433431
if 0 <= col_d < w:
434432
derived_image[row][col_d] = original_image[row][col]
435433
filled[row * w + col_d] = 1
436434

437435
# Fill the gaps
438-
if fill_technique == 2: # soft_horizontal
436+
if fill_technique == 2: # naive_interpolating
439437
for row in range(h):
440438
for l_pointer in range(w):
441439
# This if (and the next if) performs two checks that are almost the same - for performance reasons
@@ -444,7 +442,7 @@ def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
444442
l_border = derived_image[row][l_pointer - 1] if l_pointer > 0 else np.zeros(3, dtype=np.uint8)
445443
r_border = np.zeros(3, dtype=np.uint8)
446444
r_pointer = l_pointer + 1
447-
while r_pointer != w:
445+
while r_pointer < w:
448446
if sum(derived_image[row][r_pointer]) != 0 and filled[row * w + r_pointer]:
449447
r_border = derived_image[row][r_pointer]
450448
break
@@ -453,30 +451,169 @@ def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
453451
l_border = r_border
454452
elif sum(r_border) == 0:
455453
r_border = l_border
454+
# Example illustrating positions of pointers at this point in code:
455+
# is filled? : + - - - - +
456+
# pointers : l r
457+
# interpolated: 0 1 2 3 4 5
458+
# In total: 5 steps between two filled pixels
456459
total_steps = 1 + r_pointer - l_pointer
457460
step = (r_border.astype(np.float_) - l_border) / total_steps
458461
for col in range(l_pointer, r_pointer):
459462
derived_image[row][col] = l_border + (step * (col - l_pointer + 1)).astype(np.uint8)
460463
return derived_image
461-
elif fill_technique == 1: # hard_horizontal
464+
elif fill_technique == 1: # naive
462465
derived_fix = np.copy(derived_image)
463466
for pos in np.where(filled == 0)[0]:
464467
row = pos // w
465468
col = pos % w
466-
for offset in range(1, abs(int(deviation)) + 2):
469+
row_times_w = row * w
470+
for offset in range(1, abs(int(divergence_px)) + 2):
467471
r_offset = col + offset
468472
l_offset = col - offset
469-
if r_offset < w and filled[row * w + r_offset]:
473+
if r_offset < w and filled[row_times_w + r_offset]:
470474
derived_fix[row][col] = derived_image[row][r_offset]
471475
break
472-
if 0 <= l_offset and filled[row * w + l_offset]:
476+
if 0 <= l_offset and filled[row_times_w + l_offset]:
473477
derived_fix[row][col] = derived_image[row][l_offset]
474478
break
475479
return derived_fix
476480
else: # none
477481
return derived_image
478482

479-
@njit(parallel=True)
483+
@njit(fastmath=True, parallel=True)
484+
def apply_stereo_divergence_polylines(original_image, normalized_depth, divergence_px: float, fill_technique):
485+
# This code treats rows of the image as polylines
486+
# It generates polylines, morphs them (applies divergence) to them, and then rasterizes them
487+
# Would be great to have some optimizations for it
488+
489+
# total_segments = 0
490+
# visible_segments = np.zeros(abs(int(divergence_px)) + 3, dtype=np.int32)
491+
# overlapping_segments = np.zeros(abs(int(divergence_px)) + 3, dtype=np.int32)
492+
# insertion_sort_operations = 0
493+
494+
EPSILON = 1e-7
495+
h, w, c = original_image.shape
496+
derived_image = np.zeros_like(original_image)
497+
SAMPLES = [1/6, 3/6, 5/6] if fill_technique == 3 else [0.1, 0.3, 0.5, 0.7, 0.9]
498+
499+
for row in prange(h):
500+
# generating the polyline
501+
# format of each segment: new coordinate of first point, its divergence,
502+
# new coordinate of second point, its divergence,
503+
# original column of the first pixel, original column of the second pixel
504+
# it is not guaranteed that first pixel is the left pixel
505+
sg = np.zeros((0, 6), dtype=np.float_)
506+
sg_end = 0
507+
if fill_technique == 3: # polylines_soft
508+
sg = np.zeros((w + 3, 6), dtype=np.float_)
509+
sg[sg_end] = [-3.0 * abs(divergence_px), -0.1, -1337.0, -0.1, 0.0, 0.0]
510+
sg_end += 1
511+
for col in range(0, w - 1):
512+
ld = (1 - normalized_depth[row][col] ** 2) * divergence_px
513+
rd = (1 - normalized_depth[row][col + 1] ** 2) * divergence_px
514+
lx, rx = ld + col, rd + (col + 1)
515+
sg[sg_end] = [lx, abs(ld), rx, abs(rd), float(col), float(col + 1)]
516+
sg_end += 1
517+
if col == 0:
518+
sg[0][2] = sg[1][0] + EPSILON
519+
sg[sg_end] = [sg[sg_end - 1][2] - EPSILON, -0.1, w + 3.0 * abs(divergence_px), -0.1, w - 1, w - 1]
520+
sg_end += 1
521+
if fill_technique == 4: # polylines_sharp
522+
PIXEL_HALF_WIDTH = 0.45
523+
sg = np.zeros((2 * w + 5, 6), dtype=np.float_)
524+
sg[sg_end] = [-3.0 * abs(divergence_px), -0.1, -1337.0, -0.1, 0, 0]
525+
sg_end += 1
526+
for col in range(0, w):
527+
# each pixel gets a segment
528+
d = (1 - normalized_depth[row][col] ** 2) * divergence_px
529+
center = col + d
530+
fx = center - PIXEL_HALF_WIDTH - EPSILON
531+
sx = center + PIXEL_HALF_WIDTH + EPSILON
532+
533+
if col == 0:
534+
sg[0][2] = fx + EPSILON
535+
else:
536+
# each space between two adjacent pixels gets a segment
537+
sg[sg_end] = [(sg[sg_end - 1][0] + sg[sg_end-1][2]) / 2, sg[sg_end - 1][3] - EPSILON,
538+
center, abs(d) - EPSILON,
539+
col - 1, col]
540+
sg_end += 1
541+
542+
# each pixel gets a segment
543+
sg[sg_end] = [fx, abs(d), sx, abs(d), col, col]
544+
sg_end += 1
545+
546+
sg[sg_end] = [sg[sg_end - 1][2] - EPSILON, -0.1, w + 3.0 * abs(divergence_px), -0.1, w - 1, w - 1]
547+
sg_end += 1
548+
# total_segments += sg_end
549+
550+
# sort segments using insertion sort
551+
# has a very good performance in practice, since segments are almost sorted to begin with
552+
for i in range(1, sg_end):
553+
u = i - 1
554+
while sg[u][0] > sg[u + 1][0] and 0 <= u:
555+
# insertion_sort_operations += 1
556+
sg[u], sg[u + 1] = np.copy(sg[u + 1]), np.copy(sg[u])
557+
u -= 1
558+
559+
# Possible improvement: a more accurate logic instead of just sampling a region multiple times
560+
# rasterizing
561+
# at each point in time we keep track of segments that are "active" (or "current")
562+
cs = np.zeros((5 * int(abs(divergence_px)) + 25, 6), dtype=np.float_)
563+
cs_end = 0
564+
seg_pointer = 0
565+
for col in range(w):
566+
# removing from current segments
567+
cs_i = 0
568+
while cs_i < cs_end:
569+
if cs[cs_i][2] < col:
570+
cs[cs_i] = cs[cs_end - 1]
571+
cs_end -= 1
572+
else:
573+
cs_i += 1
574+
575+
# adding to current segments
576+
while seg_pointer < sg_end and sg[seg_pointer][0] < col + 1.0:
577+
cs[cs_end] = sg[seg_pointer]
578+
seg_pointer += 1
579+
cs_end += 1
580+
581+
color = np.full(c, 0.5, dtype=np.float_) # we start with 0.5 because of how floats are converted to ints
582+
# visible_segments_col = np.zeros_like(samples)
583+
for sample_i in range(len(SAMPLES)):
584+
# finding the segment that is the closest at the position
585+
sample = SAMPLES[sample_i]
586+
pos = col + sample
587+
best_i = 0
588+
best_closeness = -1.1
589+
for cs_i in range(cs_end):
590+
# interpolating, works regardless if first point is left point
591+
ip_k = (pos - cs[cs_i][0]) / (cs[cs_i][2] - cs[cs_i][0])
592+
closeness = (1.0 - ip_k) * cs[cs_i][1] + ip_k * cs[cs_i][3]
593+
if best_closeness < closeness and 0.0 < ip_k < 1.0:
594+
best_closeness = closeness
595+
best_i = cs_i
596+
# overlapping_segments[cs_end] += 1
597+
# assert best_closeness > 0
598+
# visible_segments_col[sample_i] = best_i
599+
600+
# getting the color
601+
pos = col + sample
602+
col_l, col_r = int(cs[best_i][4] + 0.001), int(cs[best_i][5] + 0.001)
603+
ip_k = (pos - cs[best_i][0]) / (cs[best_i][2] - cs[best_i][0])
604+
color += (original_image[row][col_l] * (1.0 - ip_k) + original_image[row][col_r] * ip_k) / len(SAMPLES)
605+
606+
# visible_segments[len(np.unique(visible_segments_col))] += 1
607+
derived_image[row][col] = np.asarray(color, dtype=np.uint8)
608+
609+
# print(f'image dimensions: h:{h}, w:{w}, total:{h*w}')
610+
# print('total segments: ', int(total_segments))
611+
# print('overlapping segments: ', list(overlapping_segments))
612+
# print('visible segments: ', list(visible_segments))
613+
# print('insertion sort operations: ', insertion_sort_operations)
614+
return derived_image
615+
616+
@njit(parallel=True)
480617
def overlap(im1, im2):
481618
width1 = im1.shape[1]
482619
height1 = im1.shape[0]
@@ -523,9 +660,8 @@ def run_generate(depthmap_mode,
523660
combine_output,
524661
combine_output_axis,
525662
gen_stereo,
526-
gen_anaglyph,
527-
stereo_ipd,
528-
stereo_size,
663+
gen_anaglyph,
664+
stereo_divergence,
529665
stereo_fill,
530666
stereo_balance
531667
):
@@ -564,7 +700,7 @@ def run_generate(depthmap_mode,
564700
outpath = opts.outdir_samples or opts.outdir_extras_samples
565701

566702

567-
outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_ipd, stereo_size, stereo_fill, stereo_balance)
703+
outputs = run_depthmap(None, outpath, imageArr, imageNameArr, compute_device, model_type, net_width, net_height, match_size, invert_depth, boost, save_depth, show_depth, show_heat, combine_output, combine_output_axis, gen_stereo, gen_anaglyph, stereo_divergence, stereo_fill, stereo_balance)
568704

569705
return outputs, plaintext_to_html('info'), ''
570706

@@ -614,10 +750,9 @@ def on_ui_tabs():
614750
gen_stereo = gr.Checkbox(label="Generate Stereo side-by-side image",value=False)
615751
gen_anaglyph = gr.Checkbox(label="Generate Stereo anaglyph image (red/cyan)",value=False)
616752
with gr.Row():
617-
stereo_ipd = gr.Slider(minimum=5, maximum=7.5, step=0.1, label='IPD (cm)', value=6.4)
618-
stereo_size = gr.Slider(minimum=20, maximum=100, step=0.5, label='Screen Width (cm)', value=38.5)
753+
stereo_divergence = gr.Slider(minimum=0.05, maximum=10.005, step=0.01, label='Divergence (3D effect)', value=2.5)
619754
with gr.Row():
620-
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'hard_horizontal', 'soft_horizontal'], value='soft_horizontal', type="index", elem_id="stereo_fill_type")
755+
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'], value='naive_interpolating', type="index", elem_id="stereo_fill_type")
621756
stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)
622757

623758
with gr.Box():
@@ -655,9 +790,8 @@ def on_ui_tabs():
655790
combine_output,
656791
combine_output_axis,
657792
gen_stereo,
658-
gen_anaglyph,
659-
stereo_ipd,
660-
stereo_size,
793+
gen_anaglyph,
794+
stereo_divergence,
661795
stereo_fill,
662796
stereo_balance
663797
],
@@ -1404,7 +1538,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):
14041538

14051539
# output
14061540
return cv2.resize(imageandpatchs.estimation_updated_image, (input_resolution[1], input_resolution[0]), interpolation=cv2.INTER_CUBIC)
1407-
1541+
14081542
# taken from 3d-photo-inpainting and modified
14091543
def sparse_bilateral_filtering(
14101544
depth, image, filter_size, depth_threshold, sigma_s, sigma_r, HR=False, mask=None, gsHR=True, edge_id=None, num_iter=None, num_gs_iter=None, spdb=False
@@ -1443,7 +1577,7 @@ def sparse_bilateral_filtering(
14431577
def vis_depth_discontinuity(depth, depth_threshold, vis_diff=False, label=False, mask=None):
14441578
"""
14451579
config:
1446-
-
1580+
-
14471581
"""
14481582
if label == False:
14491583
disp = 1./depth

0 commit comments

Comments
 (0)