@@ -89,20 +89,19 @@ def ui(self, is_img2img):
89
89
gen_stereo = gr .Checkbox (label = "Generate Stereo side-by-side image" ,value = False )
90
90
gen_anaglyph = gr .Checkbox (label = "Generate Stereo anaglyph image (red/cyan)" ,value = False )
91
91
with gr .Row ():
92
- stereo_ipd = gr .Slider (minimum = 5 , maximum = 7.5 , step = 0.1 , label = 'IPD (cm)' , value = 6.4 )
93
- stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
92
+ stereo_divergence = gr .Slider (minimum = 0.05 , maximum = 10.005 , step = 0.01 , label = 'Divergence (3D effect)' , value = 2.5 )
94
93
with gr .Row ():
95
- stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'hard_horizontal ' , 'soft_horizontal' ], value = 'soft_horizontal ' , type = "index" , elem_id = "stereo_fill_type" )
94
+ stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'naive ' , 'naive_interpolating' , 'polylines_soft' , 'polylines_sharp' ], value = 'naive_interpolating ' , type = "index" , elem_id = "stereo_fill_type" )
96
95
stereo_balance = gr .Slider (minimum = - 1.0 , maximum = 1.0 , step = 0.05 , label = 'Balance between eyes' , value = 0.0 )
97
96
98
97
99
98
with gr .Box ():
100
99
gr .HTML ("Instructions, comment and share @ <a href='https://github.com/thygate/stable-diffusion-webui-depthmap-script'>https://github.com/thygate/stable-diffusion-webui-depthmap-script</a>" )
101
100
102
- return [compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ]
101
+ return [compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance ]
103
102
104
103
# run from script in txt2img or img2img
105
- def run (self , p , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ):
104
+ def run (self , p , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance ):
106
105
107
106
# sd process
108
107
processed = processing .process_images (p )
@@ -116,13 +115,13 @@ def run(self, p, compute_device, model_type, net_width, net_height, match_size,
116
115
continue
117
116
inputimages .append (processed .images [count ])
118
117
119
- newmaps = run_depthmap (processed , p .outpath_samples , inputimages , None , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance )
118
+ newmaps = run_depthmap (processed , p .outpath_samples , inputimages , None , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance )
120
119
for img in newmaps :
121
120
processed .images .append (img )
122
121
123
122
return processed
124
123
125
- def run_depthmap (processed , outpath , inputimages , inputnames , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance ):
124
+ def run_depthmap (processed , outpath , inputimages , inputnames , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance ):
126
125
127
126
# unload sd model
128
127
shared .sd_model .cond_stage_model .to (devices .cpu )
@@ -250,7 +249,7 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
250
249
# iterate over input (generated) images
251
250
numimages = len (inputimages )
252
251
for count in trange (0 , numimages ):
253
-
252
+
254
253
#if numimages > 1:
255
254
# print("\nDepthmap", count+1, '/', numimages)
256
255
print ('\n ' )
@@ -357,13 +356,12 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
357
356
if gen_stereo or gen_anaglyph :
358
357
print ("Generating Stereo image.." )
359
358
#img_output = cv2.blur(img_output, (3, 3))
360
- deviation = calculate_total_deviation (stereo_ipd , stereo_size , inputimages [count ].width )
361
359
balance = (stereo_balance + 1 ) / 2
362
360
original_image = np .asarray (inputimages [count ])
363
361
left_image = original_image if balance < 0.001 else \
364
- apply_stereo_deviation (original_image , img_output , - deviation * balance , stereo_fill )
362
+ apply_stereo_divergence (original_image , img_output , - stereo_divergence * balance , stereo_fill )
365
363
right_image = original_image if balance > 0.999 else \
366
- apply_stereo_deviation (original_image , img_output , deviation * (1 - balance ), stereo_fill )
364
+ apply_stereo_divergence (original_image , img_output , stereo_divergence * (1 - balance ), stereo_fill )
367
365
stereo_img = np .hstack ([left_image , right_image ])
368
366
369
367
if gen_stereo :
@@ -407,35 +405,35 @@ def run_depthmap(processed, outpath, inputimages, inputnames, compute_device, mo
407
405
408
406
return outimages
409
407
410
- def calculate_total_deviation (ipd , monitor_w , image_width ):
411
- deviation_cm = ipd * 0.12
412
- deviation = deviation_cm * monitor_w * (image_width / 1920 )
413
- print ("deviation:" , deviation )
414
- return deviation
415
-
416
- @njit
417
- def apply_stereo_deviation (original_image , depth , deviation , fill_technique ):
418
- h , w , c = original_image .shape
419
-
408
+ def apply_stereo_divergence (original_image , depth , divergence , fill_technique ):
420
409
depth_min = depth .min ()
421
410
depth_max = depth .max ()
422
411
depth = (depth - depth_min ) / (depth_max - depth_min )
412
+ divergence_px = (divergence / 100.0 ) * original_image .shape [1 ]
413
+
414
+ if fill_technique in [0 , 1 , 2 ]:
415
+ return apply_stereo_divergence_naive (original_image , depth , divergence_px , fill_technique )
416
+ if fill_technique in [3 , 4 ]:
417
+ return apply_stereo_divergence_polylines (original_image , depth , divergence_px , fill_technique )
418
+
419
+ @njit
420
+ def apply_stereo_divergence_naive (original_image , normalized_depth , divergence_px : float , fill_technique ):
421
+ h , w , c = original_image .shape
423
422
424
423
derived_image = np .zeros_like (original_image )
425
424
filled = np .zeros (h * w , dtype = np .uint8 )
426
425
427
- for row in range (h ):
426
+ for row in prange (h ):
428
427
# Swipe order should ensure that pixels that are closer overwrite
429
428
# (at their destination) pixels that are less close
430
- for col in range (w ) if deviation < 0 else range (w - 1 , - 1 , - 1 ):
431
- col_d = col + int ((1 - depth [row ][col ] ** 2 ) * deviation )
432
- # col_d = col + int((1 - depth[row][col]) * deviation)
429
+ for col in range (w ) if divergence_px < 0 else range (w - 1 , - 1 , - 1 ):
430
+ col_d = col + int ((1 - normalized_depth [row ][col ] ** 2 ) * divergence_px )
433
431
if 0 <= col_d < w :
434
432
derived_image [row ][col_d ] = original_image [row ][col ]
435
433
filled [row * w + col_d ] = 1
436
434
437
435
# Fill the gaps
438
- if fill_technique == 2 : # soft_horizontal
436
+ if fill_technique == 2 : # naive_interpolating
439
437
for row in range (h ):
440
438
for l_pointer in range (w ):
441
439
# This if (and the next if) performs two checks that are almost the same - for performance reasons
@@ -444,7 +442,7 @@ def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
444
442
l_border = derived_image [row ][l_pointer - 1 ] if l_pointer > 0 else np .zeros (3 , dtype = np .uint8 )
445
443
r_border = np .zeros (3 , dtype = np .uint8 )
446
444
r_pointer = l_pointer + 1
447
- while r_pointer != w :
445
+ while r_pointer < w :
448
446
if sum (derived_image [row ][r_pointer ]) != 0 and filled [row * w + r_pointer ]:
449
447
r_border = derived_image [row ][r_pointer ]
450
448
break
@@ -453,30 +451,169 @@ def apply_stereo_deviation(original_image, depth, deviation, fill_technique):
453
451
l_border = r_border
454
452
elif sum (r_border ) == 0 :
455
453
r_border = l_border
454
+ # Example illustrating positions of pointers at this point in code:
455
+ # is filled? : + - - - - +
456
+ # pointers : l r
457
+ # interpolated: 0 1 2 3 4 5
458
+ # In total: 5 steps between two filled pixels
456
459
total_steps = 1 + r_pointer - l_pointer
457
460
step = (r_border .astype (np .float_ ) - l_border ) / total_steps
458
461
for col in range (l_pointer , r_pointer ):
459
462
derived_image [row ][col ] = l_border + (step * (col - l_pointer + 1 )).astype (np .uint8 )
460
463
return derived_image
461
- elif fill_technique == 1 : # hard_horizontal
464
+ elif fill_technique == 1 : # naive
462
465
derived_fix = np .copy (derived_image )
463
466
for pos in np .where (filled == 0 )[0 ]:
464
467
row = pos // w
465
468
col = pos % w
466
- for offset in range (1 , abs (int (deviation )) + 2 ):
469
+ row_times_w = row * w
470
+ for offset in range (1 , abs (int (divergence_px )) + 2 ):
467
471
r_offset = col + offset
468
472
l_offset = col - offset
469
- if r_offset < w and filled [row * w + r_offset ]:
473
+ if r_offset < w and filled [row_times_w + r_offset ]:
470
474
derived_fix [row ][col ] = derived_image [row ][r_offset ]
471
475
break
472
- if 0 <= l_offset and filled [row * w + l_offset ]:
476
+ if 0 <= l_offset and filled [row_times_w + l_offset ]:
473
477
derived_fix [row ][col ] = derived_image [row ][l_offset ]
474
478
break
475
479
return derived_fix
476
480
else : # none
477
481
return derived_image
478
482
479
- @njit (parallel = True )
483
+ @njit (fastmath = True , parallel = True )
484
+ def apply_stereo_divergence_polylines (original_image , normalized_depth , divergence_px : float , fill_technique ):
485
+ # This code treats rows of the image as polylines
486
+ # It generates polylines, morphs them (applies divergence) to them, and then rasterizes them
487
+ # Would be great to have some optimizations for it
488
+
489
+ # total_segments = 0
490
+ # visible_segments = np.zeros(abs(int(divergence_px)) + 3, dtype=np.int32)
491
+ # overlapping_segments = np.zeros(abs(int(divergence_px)) + 3, dtype=np.int32)
492
+ # insertion_sort_operations = 0
493
+
494
+ EPSILON = 1e-7
495
+ h , w , c = original_image .shape
496
+ derived_image = np .zeros_like (original_image )
497
+ SAMPLES = [1 / 6 , 3 / 6 , 5 / 6 ] if fill_technique == 3 else [0.1 , 0.3 , 0.5 , 0.7 , 0.9 ]
498
+
499
+ for row in prange (h ):
500
+ # generating the polyline
501
+ # format of each segment: new coordinate of first point, its divergence,
502
+ # new coordinate of second point, its divergence,
503
+ # original column of the first pixel, original column of the second pixel
504
+ # it is not guaranteed that first pixel is the left pixel
505
+ sg = np .zeros ((0 , 6 ), dtype = np .float_ )
506
+ sg_end = 0
507
+ if fill_technique == 3 : # polylines_soft
508
+ sg = np .zeros ((w + 3 , 6 ), dtype = np .float_ )
509
+ sg [sg_end ] = [- 3.0 * abs (divergence_px ), - 0.1 , - 1337.0 , - 0.1 , 0.0 , 0.0 ]
510
+ sg_end += 1
511
+ for col in range (0 , w - 1 ):
512
+ ld = (1 - normalized_depth [row ][col ] ** 2 ) * divergence_px
513
+ rd = (1 - normalized_depth [row ][col + 1 ] ** 2 ) * divergence_px
514
+ lx , rx = ld + col , rd + (col + 1 )
515
+ sg [sg_end ] = [lx , abs (ld ), rx , abs (rd ), float (col ), float (col + 1 )]
516
+ sg_end += 1
517
+ if col == 0 :
518
+ sg [0 ][2 ] = sg [1 ][0 ] + EPSILON
519
+ sg [sg_end ] = [sg [sg_end - 1 ][2 ] - EPSILON , - 0.1 , w + 3.0 * abs (divergence_px ), - 0.1 , w - 1 , w - 1 ]
520
+ sg_end += 1
521
+ if fill_technique == 4 : # polylines_sharp
522
+ PIXEL_HALF_WIDTH = 0.45
523
+ sg = np .zeros ((2 * w + 5 , 6 ), dtype = np .float_ )
524
+ sg [sg_end ] = [- 3.0 * abs (divergence_px ), - 0.1 , - 1337.0 , - 0.1 , 0 , 0 ]
525
+ sg_end += 1
526
+ for col in range (0 , w ):
527
+ # each pixel gets a segment
528
+ d = (1 - normalized_depth [row ][col ] ** 2 ) * divergence_px
529
+ center = col + d
530
+ fx = center - PIXEL_HALF_WIDTH - EPSILON
531
+ sx = center + PIXEL_HALF_WIDTH + EPSILON
532
+
533
+ if col == 0 :
534
+ sg [0 ][2 ] = fx + EPSILON
535
+ else :
536
+ # each space between two adjacent pixels gets a segment
537
+ sg [sg_end ] = [(sg [sg_end - 1 ][0 ] + sg [sg_end - 1 ][2 ]) / 2 , sg [sg_end - 1 ][3 ] - EPSILON ,
538
+ center , abs (d ) - EPSILON ,
539
+ col - 1 , col ]
540
+ sg_end += 1
541
+
542
+ # each pixel gets a segment
543
+ sg [sg_end ] = [fx , abs (d ), sx , abs (d ), col , col ]
544
+ sg_end += 1
545
+
546
+ sg [sg_end ] = [sg [sg_end - 1 ][2 ] - EPSILON , - 0.1 , w + 3.0 * abs (divergence_px ), - 0.1 , w - 1 , w - 1 ]
547
+ sg_end += 1
548
+ # total_segments += sg_end
549
+
550
+ # sort segments using insertion sort
551
+ # has a very good performance in practice, since segments are almost sorted to begin with
552
+ for i in range (1 , sg_end ):
553
+ u = i - 1
554
+ while sg [u ][0 ] > sg [u + 1 ][0 ] and 0 <= u :
555
+ # insertion_sort_operations += 1
556
+ sg [u ], sg [u + 1 ] = np .copy (sg [u + 1 ]), np .copy (sg [u ])
557
+ u -= 1
558
+
559
+ # Possible improvement: a more accurate logic instead of just sampling a region multiple times
560
+ # rasterizing
561
+ # at each point in time we keep track of segments that are "active" (or "current")
562
+ cs = np .zeros ((5 * int (abs (divergence_px )) + 25 , 6 ), dtype = np .float_ )
563
+ cs_end = 0
564
+ seg_pointer = 0
565
+ for col in range (w ):
566
+ # removing from current segments
567
+ cs_i = 0
568
+ while cs_i < cs_end :
569
+ if cs [cs_i ][2 ] < col :
570
+ cs [cs_i ] = cs [cs_end - 1 ]
571
+ cs_end -= 1
572
+ else :
573
+ cs_i += 1
574
+
575
+ # adding to current segments
576
+ while seg_pointer < sg_end and sg [seg_pointer ][0 ] < col + 1.0 :
577
+ cs [cs_end ] = sg [seg_pointer ]
578
+ seg_pointer += 1
579
+ cs_end += 1
580
+
581
+ color = np .full (c , 0.5 , dtype = np .float_ ) # we start with 0.5 because of how floats are converted to ints
582
+ # visible_segments_col = np.zeros_like(samples)
583
+ for sample_i in range (len (SAMPLES )):
584
+ # finding the segment that is the closest at the position
585
+ sample = SAMPLES [sample_i ]
586
+ pos = col + sample
587
+ best_i = 0
588
+ best_closeness = - 1.1
589
+ for cs_i in range (cs_end ):
590
+ # interpolating, works regardless if first point is left point
591
+ ip_k = (pos - cs [cs_i ][0 ]) / (cs [cs_i ][2 ] - cs [cs_i ][0 ])
592
+ closeness = (1.0 - ip_k ) * cs [cs_i ][1 ] + ip_k * cs [cs_i ][3 ]
593
+ if best_closeness < closeness and 0.0 < ip_k < 1.0 :
594
+ best_closeness = closeness
595
+ best_i = cs_i
596
+ # overlapping_segments[cs_end] += 1
597
+ # assert best_closeness > 0
598
+ # visible_segments_col[sample_i] = best_i
599
+
600
+ # getting the color
601
+ pos = col + sample
602
+ col_l , col_r = int (cs [best_i ][4 ] + 0.001 ), int (cs [best_i ][5 ] + 0.001 )
603
+ ip_k = (pos - cs [best_i ][0 ]) / (cs [best_i ][2 ] - cs [best_i ][0 ])
604
+ color += (original_image [row ][col_l ] * (1.0 - ip_k ) + original_image [row ][col_r ] * ip_k ) / len (SAMPLES )
605
+
606
+ # visible_segments[len(np.unique(visible_segments_col))] += 1
607
+ derived_image [row ][col ] = np .asarray (color , dtype = np .uint8 )
608
+
609
+ # print(f'image dimensions: h:{h}, w:{w}, total:{h*w}')
610
+ # print('total segments: ', int(total_segments))
611
+ # print('overlapping segments: ', list(overlapping_segments))
612
+ # print('visible segments: ', list(visible_segments))
613
+ # print('insertion sort operations: ', insertion_sort_operations)
614
+ return derived_image
615
+
616
+ @njit (parallel = True )
480
617
def overlap (im1 , im2 ):
481
618
width1 = im1 .shape [1 ]
482
619
height1 = im1 .shape [0 ]
@@ -523,9 +660,8 @@ def run_generate(depthmap_mode,
523
660
combine_output ,
524
661
combine_output_axis ,
525
662
gen_stereo ,
526
- gen_anaglyph ,
527
- stereo_ipd ,
528
- stereo_size ,
663
+ gen_anaglyph ,
664
+ stereo_divergence ,
529
665
stereo_fill ,
530
666
stereo_balance
531
667
):
@@ -564,7 +700,7 @@ def run_generate(depthmap_mode,
564
700
outpath = opts .outdir_samples or opts .outdir_extras_samples
565
701
566
702
567
- outputs = run_depthmap (None , outpath , imageArr , imageNameArr , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_ipd , stereo_size , stereo_fill , stereo_balance )
703
+ outputs = run_depthmap (None , outpath , imageArr , imageNameArr , compute_device , model_type , net_width , net_height , match_size , invert_depth , boost , save_depth , show_depth , show_heat , combine_output , combine_output_axis , gen_stereo , gen_anaglyph , stereo_divergence , stereo_fill , stereo_balance )
568
704
569
705
return outputs , plaintext_to_html ('info' ), ''
570
706
@@ -614,10 +750,9 @@ def on_ui_tabs():
614
750
gen_stereo = gr .Checkbox (label = "Generate Stereo side-by-side image" ,value = False )
615
751
gen_anaglyph = gr .Checkbox (label = "Generate Stereo anaglyph image (red/cyan)" ,value = False )
616
752
with gr .Row ():
617
- stereo_ipd = gr .Slider (minimum = 5 , maximum = 7.5 , step = 0.1 , label = 'IPD (cm)' , value = 6.4 )
618
- stereo_size = gr .Slider (minimum = 20 , maximum = 100 , step = 0.5 , label = 'Screen Width (cm)' , value = 38.5 )
753
+ stereo_divergence = gr .Slider (minimum = 0.05 , maximum = 10.005 , step = 0.01 , label = 'Divergence (3D effect)' , value = 2.5 )
619
754
with gr .Row ():
620
- stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'hard_horizontal ' , 'soft_horizontal' ], value = 'soft_horizontal ' , type = "index" , elem_id = "stereo_fill_type" )
755
+ stereo_fill = gr .Dropdown (label = "Gap fill technique" , choices = ['none' , 'naive ' , 'naive_interpolating' , 'polylines_soft' , 'polylines_sharp' ], value = 'naive_interpolating ' , type = "index" , elem_id = "stereo_fill_type" )
621
756
stereo_balance = gr .Slider (minimum = - 1.0 , maximum = 1.0 , step = 0.05 , label = 'Balance between eyes' , value = 0.0 )
622
757
623
758
with gr .Box ():
@@ -655,9 +790,8 @@ def on_ui_tabs():
655
790
combine_output ,
656
791
combine_output_axis ,
657
792
gen_stereo ,
658
- gen_anaglyph ,
659
- stereo_ipd ,
660
- stereo_size ,
793
+ gen_anaglyph ,
794
+ stereo_divergence ,
661
795
stereo_fill ,
662
796
stereo_balance
663
797
],
@@ -1404,7 +1538,7 @@ def estimateboost(img, model, model_type, pix2pixmodel):
1404
1538
1405
1539
# output
1406
1540
return cv2 .resize (imageandpatchs .estimation_updated_image , (input_resolution [1 ], input_resolution [0 ]), interpolation = cv2 .INTER_CUBIC )
1407
-
1541
+
1408
1542
# taken from 3d-photo-inpainting and modified
1409
1543
def sparse_bilateral_filtering (
1410
1544
depth , image , filter_size , depth_threshold , sigma_s , sigma_r , HR = False , mask = None , gsHR = True , edge_id = None , num_iter = None , num_gs_iter = None , spdb = False
@@ -1443,7 +1577,7 @@ def sparse_bilateral_filtering(
1443
1577
def vis_depth_discontinuity (depth , depth_threshold , vis_diff = False , label = False , mask = None ):
1444
1578
"""
1445
1579
config:
1446
- -
1580
+ -
1447
1581
"""
1448
1582
if label == False :
1449
1583
disp = 1. / depth
0 commit comments