Skip to content

Commit 3674d87

Browse files
committed
Refactor stereoimage stuff into a separate file
1 parent 3cb2460 commit 3674d87

File tree

2 files changed

+270
-221
lines changed

2 files changed

+270
-221
lines changed

scripts/depthmap.py

Lines changed: 5 additions & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636

3737
sys.path.append('extensions/stable-diffusion-webui-depthmap-script/scripts')
3838

39+
from stereoimage_generation import *
40+
3941
# midas imports
4042
from midas.dpt_depth import DPTDepthModel
4143
from midas.midas_net import MidasNet
@@ -106,7 +108,7 @@ def ui(self, is_img2img):
106108
with gr.Row():
107109
stereo_divergence = gr.Slider(minimum=0.05, maximum=10.005, step=0.01, label='Divergence (3D effect)', value=2.5)
108110
with gr.Row():
109-
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'], value='polylines_sharp', type="index", elem_id="stereo_fill_type")
111+
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'], value='polylines_sharp', type="value", elem_id="stereo_fill_type")
110112
stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)
111113
with gr.Group():
112114
with gr.Row():
@@ -486,7 +488,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
486488
outimages.append(stereotb_img)
487489
if gen_anaglyph:
488490
print("Generating Anaglyph image..")
489-
anaglyph_img = overlap(left_image, right_image)
491+
anaglyph_img = overlap_red_cyan(left_image, right_image)
490492
outimages.append(anaglyph_img)
491493
if (processed is not None):
492494
if gen_stereo:
@@ -853,224 +855,6 @@ def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_bord
853855
return fn_saved[-1], fn_saved[-1], ''
854856

855857

856-
def apply_stereo_divergence(original_image, depth, divergence, fill_technique):
857-
depth_min = depth.min()
858-
depth_max = depth.max()
859-
depth = (depth - depth_min) / (depth_max - depth_min)
860-
divergence_px = (divergence / 100.0) * original_image.shape[1]
861-
862-
if fill_technique in [0, 1, 2]:
863-
return apply_stereo_divergence_naive(original_image, depth, divergence_px, fill_technique)
864-
if fill_technique in [3, 4]:
865-
return apply_stereo_divergence_polylines(original_image, depth, divergence_px, fill_technique)
866-
867-
@njit
868-
def apply_stereo_divergence_naive(original_image, normalized_depth, divergence_px: float, fill_technique):
869-
h, w, c = original_image.shape
870-
871-
derived_image = np.zeros_like(original_image)
872-
filled = np.zeros(h * w, dtype=np.uint8)
873-
874-
for row in prange(h):
875-
# Swipe order should ensure that pixels that are closer overwrite
876-
# (at their destination) pixels that are less close
877-
for col in range(w) if divergence_px < 0 else range(w - 1, -1, -1):
878-
col_d = col + int((1 - normalized_depth[row][col] ** 2) * divergence_px)
879-
if 0 <= col_d < w:
880-
derived_image[row][col_d] = original_image[row][col]
881-
filled[row * w + col_d] = 1
882-
883-
# Fill the gaps
884-
if fill_technique == 2: # naive_interpolating
885-
for row in range(h):
886-
for l_pointer in range(w):
887-
# This if (and the next if) performs two checks that are almost the same - for performance reasons
888-
if sum(derived_image[row][l_pointer]) != 0 or filled[row * w + l_pointer]:
889-
continue
890-
l_border = derived_image[row][l_pointer - 1] if l_pointer > 0 else np.zeros(3, dtype=np.uint8)
891-
r_border = np.zeros(3, dtype=np.uint8)
892-
r_pointer = l_pointer + 1
893-
while r_pointer < w:
894-
if sum(derived_image[row][r_pointer]) != 0 and filled[row * w + r_pointer]:
895-
r_border = derived_image[row][r_pointer]
896-
break
897-
r_pointer += 1
898-
if sum(l_border) == 0:
899-
l_border = r_border
900-
elif sum(r_border) == 0:
901-
r_border = l_border
902-
# Example illustrating positions of pointers at this point in code:
903-
# is filled? : + - - - - +
904-
# pointers : l r
905-
# interpolated: 0 1 2 3 4 5
906-
# In total: 5 steps between two filled pixels
907-
total_steps = 1 + r_pointer - l_pointer
908-
step = (r_border.astype(np.float_) - l_border) / total_steps
909-
for col in range(l_pointer, r_pointer):
910-
derived_image[row][col] = l_border + (step * (col - l_pointer + 1)).astype(np.uint8)
911-
return derived_image
912-
elif fill_technique == 1: # naive
913-
derived_fix = np.copy(derived_image)
914-
for pos in np.where(filled == 0)[0]:
915-
row = pos // w
916-
col = pos % w
917-
row_times_w = row * w
918-
for offset in range(1, abs(int(divergence_px)) + 2):
919-
r_offset = col + offset
920-
l_offset = col - offset
921-
if r_offset < w and filled[row_times_w + r_offset]:
922-
derived_fix[row][col] = derived_image[row][r_offset]
923-
break
924-
if 0 <= l_offset and filled[row_times_w + l_offset]:
925-
derived_fix[row][col] = derived_image[row][l_offset]
926-
break
927-
return derived_fix
928-
else: # none
929-
return derived_image
930-
931-
@njit(parallel=True) # fastmath=True does not reasonably improve performance
932-
def apply_stereo_divergence_polylines(original_image, normalized_depth, divergence_px: float, fill_technique):
933-
# This code treats rows of the image as polylines
934-
# It generates polylines, morphs them (applies divergence) to them, and then rasterizes them
935-
EPSILON = 1e-7
936-
PIXEL_HALF_WIDTH = 0.45 if fill_technique == 4 else 0.0
937-
# PERF_COUNTERS = [0, 0, 0]
938-
939-
h, w, c = original_image.shape
940-
derived_image = np.zeros_like(original_image)
941-
for row in prange(h):
942-
# generating the vertices of the morphed polyline
943-
# format: new coordinate of the vertex, divergence (closeness), column of pixel that contains the point's color
944-
pt = np.zeros((5 + 2 * w, 3), dtype=np.float_)
945-
pt_end: int = 0
946-
pt[pt_end] = [-3.0 * abs(divergence_px), 0.0, 0.0]
947-
pt_end += 1
948-
for col in range(0, w):
949-
coord_d = (1 - normalized_depth[row][col] ** 2) * divergence_px
950-
coord_x = col + 0.5 + coord_d
951-
if PIXEL_HALF_WIDTH < EPSILON:
952-
pt[pt_end] = [coord_x, abs(coord_d), col]
953-
pt_end += 1
954-
else:
955-
pt[pt_end] = [coord_x - PIXEL_HALF_WIDTH, abs(coord_d), col]
956-
pt[pt_end + 1] = [coord_x + PIXEL_HALF_WIDTH, abs(coord_d), col]
957-
pt_end += 2
958-
pt[pt_end] = [w + 3.0 * abs(divergence_px), 0.0, w - 1]
959-
pt_end += 1
960-
961-
# generating the segments of the morphed polyline
962-
# format: coord_x, coord_d, color_i of the first point, then the same for the second point
963-
sg_end: int = pt_end - 1
964-
sg = np.zeros((sg_end, 6), dtype=np.float_)
965-
for i in range(sg_end):
966-
sg[i] += np.concatenate((pt[i], pt[i + 1]))
967-
# Here is an informal proof that this (morphed) polyline does not self-intersect:
968-
# Draw a plot with two axes: coord_x and coord_d. Now draw the original line - it will be positioned at the
969-
# bottom of the graph (that is, for every point coord_d == 0). Now draw the morphed line using the vertices of
970-
# the original polyline. Observe that for each vertex in the new polyline, its increments
971-
# (from the corresponding vertex in the old polyline) over coord_x and coord_d are in direct proportion.
972-
# In fact, this proportion is equal for all the vertices and it is equal either -1 or +1,
973-
# depending on the sign of divergence_px. Now draw the lines from each old vertex to a corresponding new vertex.
974-
# Since the proportions are equal, these lines have the same angle with an axe and are parallel.
975-
# So, these lines do not intersect. Now rotate the plot by 45 or -45 degrees and observe that
976-
# each dot of the polyline is further right from the last dot,
977-
# which makes it impossible for the polyline to self-interset. QED.
978-
979-
# sort segments and points using insertion sort
980-
# has a very good performance in practice, since these are almost sorted to begin with
981-
for i in range(1, sg_end):
982-
u = i - 1
983-
while pt[u][0] > pt[u + 1][0] and 0 <= u:
984-
pt[u], pt[u + 1] = np.copy(pt[u + 1]), np.copy(pt[u])
985-
sg[u], sg[u + 1] = np.copy(sg[u + 1]), np.copy(sg[u])
986-
u -= 1
987-
988-
# rasterizing
989-
# at each point in time we keep track of segments that are "active" (or "current")
990-
csg = np.zeros((5 * int(abs(divergence_px)) + 25, 6), dtype=np.float_)
991-
csg_end: int = 0
992-
sg_pointer: int = 0
993-
# and index of the point that should be processed next
994-
pt_i: int = 0
995-
for col in range(w): # iterate over regions (that will be rasterizeed into pixels)
996-
color = np.full(c, 0.5, dtype=np.float_) # we start with 0.5 because of how floats are converted to ints
997-
while pt[pt_i][0] < col:
998-
pt_i += 1
999-
pt_i -= 1 # pt_i now points to the dot before the region start
1000-
# Finding segment' parts that contribute color to the region
1001-
while pt[pt_i][0] < col + 1:
1002-
coord_from = max(col, pt[pt_i][0]) + EPSILON
1003-
coord_to = min(col + 1, pt[pt_i + 1][0]) - EPSILON
1004-
significance = coord_to - coord_from
1005-
# the color at center point is the same as the average of color of segment part
1006-
coord_center = coord_from + 0.5 * significance
1007-
1008-
# adding semgents that now may contribute
1009-
while sg_pointer < sg_end and sg[sg_pointer][0] < coord_center:
1010-
csg[csg_end] = sg[sg_pointer]
1011-
sg_pointer += 1
1012-
csg_end += 1
1013-
# removing segments that will no longer contribute
1014-
csg_i = 0
1015-
while csg_i < csg_end:
1016-
if csg[csg_i][3] < coord_center:
1017-
csg[csg_i] = csg[csg_end - 1]
1018-
csg_end -= 1
1019-
else:
1020-
csg_i += 1
1021-
# finding the closest segment (segment with most divergence)
1022-
# note that this segment will be the closest from coord_from right up to coord_to, since there
1023-
# no new segments "appearing" inbetween these two and _the polyline does not self-intersect_
1024-
best_csg_i: int = 0
1025-
# PERF_COUNTERS[0] += 1
1026-
if csg_end != 1:
1027-
# PERF_COUNTERS[1] += 1
1028-
best_csg_closeness: float = -EPSILON
1029-
for csg_i in range(csg_end):
1030-
ip_k = (coord_center - csg[csg_i][0]) / (csg[csg_i][3] - csg[csg_i][0])
1031-
# assert 0.0 <= ip_k <= 1.0
1032-
closeness = (1.0 - ip_k) * csg[csg_i][1] + ip_k * csg[csg_i][4]
1033-
if best_csg_closeness < closeness and 0.0 < ip_k < 1.0:
1034-
best_csg_closeness = closeness
1035-
best_csg_i = csg_i
1036-
# getting the color
1037-
col_l: int = int(csg[best_csg_i][2] + EPSILON)
1038-
col_r: int = int(csg[best_csg_i][5] + EPSILON)
1039-
if col_l == col_r:
1040-
color += original_image[row][col_l] * significance
1041-
else:
1042-
# PERF_COUNTERS[2] += 1
1043-
ip_k = (coord_center - csg[best_csg_i][0]) / (csg[best_csg_i][3] - csg[best_csg_i][0])
1044-
color += (original_image[row][col_l] * (1.0 - ip_k) + original_image[row][col_r] * ip_k) \
1045-
* significance
1046-
pt_i += 1
1047-
derived_image[row][col] = np.asarray(color, dtype=np.uint8)
1048-
# print(PERF_COUNTERS)
1049-
return derived_image
1050-
1051-
@njit(parallel=True)
1052-
def overlap(im1, im2):
1053-
width1 = im1.shape[1]
1054-
height1 = im1.shape[0]
1055-
width2 = im2.shape[1]
1056-
height2 = im2.shape[0]
1057-
1058-
# final image
1059-
composite = np.zeros((height2, width2, 3), np.uint8)
1060-
1061-
# iterate through "left" image, filling in red values of final image
1062-
for i in prange(height1):
1063-
for j in range(width1):
1064-
composite[i, j, 0] = im1[i, j, 0]
1065-
1066-
# iterate through "right" image, filling in blue/green values of final image
1067-
for i in prange(height2):
1068-
for j in range(width2):
1069-
composite[i, j, 1] = im2[i, j, 1]
1070-
composite[i, j, 2] = im2[i, j, 2]
1071-
1072-
return composite
1073-
1074858
# called from depth tab
1075859
def run_generate(depthmap_mode,
1076860
depthmap_image,
@@ -1216,7 +1000,7 @@ def on_ui_tabs():
12161000
with gr.Row():
12171001
stereo_divergence = gr.Slider(minimum=0.05, maximum=10.005, step=0.01, label='Divergence (3D effect)', value=2.5)
12181002
with gr.Row():
1219-
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'], value='polylines_sharp', type="index", elem_id="stereo_fill_type")
1003+
stereo_fill = gr.Dropdown(label="Gap fill technique", choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'], value='polylines_sharp', type="value", elem_id="stereo_fill_type")
12201004
stereo_balance = gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, label='Balance between eyes', value=0.0)
12211005
with gr.Group():
12221006
with gr.Row():

0 commit comments

Comments
 (0)