Skip to content

Commit 928f6d1

Browse files
committed
implemented binary ply file support
1 parent 3014f1a commit 928f6d1

File tree

3 files changed

+176
-62
lines changed

3 files changed

+176
-62
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ To generate realistic depth maps `from a single image`, this script uses code an
1111
[![screenshot](examples.png)](https://raw.githubusercontent.com/thygate/stable-diffusion-webui-depthmap-script/main/examples.png)
1212

1313
## Changelog
14+
* v0.3.6 new feature
15+
* implemented binary ply file format for the inpainted 3D mesh, big reduction in filesize and save/load times.
16+
* added progress indicators to the inpainting process
1417
* v0.3.5 bugfix
1518
* create path to 3dphoto models before download (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/76))
1619
* v0.3.4 new featues

scripts/depthmap.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858

5959
whole_size_threshold = 1600 # R_max from the paper
6060
pix2pixsize = 1024
61-
scriptname = "DepthMap v0.3.5"
61+
scriptname = "DepthMap v0.3.6"
6262

6363
class Script(scripts.Script):
6464
def title(self):
@@ -549,6 +549,8 @@ def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, fnExt, vid_ssaa
549549
config['largest_size'] = 512
550550
config['save_ply'] = True
551551

552+
config['ply_fmt'] = "bin"
553+
552554
if device == torch.device("cpu"):
553555
config["gpu_ids"] = -1
554556

@@ -1138,8 +1140,8 @@ def on_ui_tabs():
11381140
vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3', type="index", elem_id="video_ssaa")
11391141
with gr.Row():
11401142
vid_traj = gr.Dropdown(label="Trajectory", choices=['straight-line', 'double-straight-line', 'circle'], value='double-straight-line', type="index", elem_id="video_trajectory")
1141-
vid_border = gr.Textbox(label="Crop: top, left, bottom, right", value="0.03, 0.03, 0.05, 0.03")
11421143
vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05")
1144+
vid_border = gr.Textbox(label="Crop: top, left, bottom, right", value="0.03, 0.03, 0.05, 0.03")
11431145
vid_dolly = gr.Checkbox(label="Dolly",value=False)
11441146
with gr.Row():
11451147
submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo", variant='primary')

scripts/inpaint/mesh.py

Lines changed: 169 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
import transforms3d
2828
import random
2929
from functools import reduce
30+
import struct
31+
import tqdm
32+
import sys
3033

3134
def create_mesh(depth, image, int_mtx, config):
3235
H, W, C = image.shape
@@ -1834,6 +1837,9 @@ def write_ply(image,
18341837

18351838
mean_loc_depth = depth[depth.shape[0]//2, depth.shape[1]//2]
18361839

1840+
pbar = tqdm.tqdm(total = 7 if config['extrapolate_border'] is True else 6)
1841+
pbar.set_description("Creating mesh")
1842+
18371843
depth = depth.astype(np.float64)
18381844
input_mesh, xy2depth, image, depth = create_mesh(depth, image, int_mtx, config)
18391845

@@ -1856,6 +1862,9 @@ def write_ply(image,
18561862

18571863
mesh, info_on_pix, depth = fill_missing_node(input_mesh, info_on_pix, image, depth)
18581864
if config['extrapolate_border'] is True:
1865+
pbar.update(1)
1866+
pbar.set_description("Extrapolating border")
1867+
18591868
pre_depth = depth.copy()
18601869
input_mesh, info_on_pix, depth = refresh_bord_depth(input_mesh, info_on_pix, image, depth)
18611870
input_mesh = remove_node_feat(input_mesh, 'edge_id')
@@ -1899,6 +1908,10 @@ def write_ply(image,
18991908
depth_edge_model, depth_feat_model, rgb_model, config, direc="left-up")
19001909
info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs,
19011910
depth_edge_model, depth_feat_model, rgb_model, config, direc="left-down")
1911+
1912+
pbar.update(1)
1913+
pbar.set_description("Context and holes")
1914+
19021915
specific_edge_loc = None
19031916
specific_edge_id = []
19041917
vis_edge_id = None
@@ -1912,6 +1925,10 @@ def write_ply(image,
19121925
depth_feat_model,
19131926
inpaint_iter=0,
19141927
vis_edge_id=vis_edge_id)
1928+
1929+
pbar.update(1)
1930+
pbar.set_description("Inpaint 1")
1931+
19151932
edge_canvas = np.zeros((H, W))
19161933
mask = np.zeros((H, W))
19171934
context = np.zeros((H, W))
@@ -1946,6 +1963,10 @@ def write_ply(image,
19461963
specific_edge_id,
19471964
specific_edge_loc,
19481965
inpaint_iter=0)
1966+
1967+
pbar.update(1)
1968+
pbar.set_description("Inpaint 2")
1969+
19491970
specific_edge_id = []
19501971
edge_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W']))
19511972
connect_points_ccs = [set() for _ in connect_points_ccs]
@@ -1983,6 +2004,10 @@ def write_ply(image,
19832004
specific_edge_id,
19842005
specific_edge_loc,
19852006
inpaint_iter=1)
2007+
2008+
pbar.update(1)
2009+
pbar.set_description("Reproject mesh")
2010+
19862011
vertex_id = 0
19872012
input_mesh.graph['H'], input_mesh.graph['W'] = input_mesh.graph['noext_H'], input_mesh.graph['noext_W']
19882013
background_canvas = np.zeros((input_mesh.graph['H'],
@@ -2038,30 +2063,79 @@ def write_ply(image,
20382063
else:
20392064
node_str_color.append(str_color)
20402065
node_str_point.append(str_pt)
2066+
2067+
2068+
pbar.update(1)
2069+
pbar.set_description("Generating faces")
20412070
str_faces = generate_face(input_mesh, info_on_pix, config)
2071+
pbar.update(1)
2072+
pbar.close()
2073+
20422074
if config['save_ply'] is True:
20432075
print("Writing mesh file %s ..." % ply_name)
2044-
with open(ply_name, 'w') as ply_fi:
2045-
ply_fi.write('ply\n' + 'format ascii 1.0\n')
2046-
ply_fi.write('comment H ' + str(int(input_mesh.graph['H'])) + '\n')
2047-
ply_fi.write('comment W ' + str(int(input_mesh.graph['W'])) + '\n')
2048-
ply_fi.write('comment hFov ' + str(float(input_mesh.graph['hFov'])) + '\n')
2049-
ply_fi.write('comment vFov ' + str(float(input_mesh.graph['vFov'])) + '\n')
2050-
ply_fi.write('comment meanLoc ' + str(float(mean_loc_depth)) + '\n')
2051-
ply_fi.write('element vertex ' + str(len(node_str_list)) + '\n')
2052-
ply_fi.write('property float x\n' + \
2053-
'property float y\n' + \
2054-
'property float z\n' + \
2055-
'property uchar red\n' + \
2056-
'property uchar green\n' + \
2057-
'property uchar blue\n' + \
2058-
'property uchar alpha\n')
2059-
ply_fi.write('element face ' + str(len(str_faces)) + '\n')
2060-
ply_fi.write('property list uchar int vertex_index\n')
2061-
ply_fi.write('end_header\n')
2062-
ply_fi.writelines(node_str_list)
2063-
ply_fi.writelines(str_faces)
2064-
ply_fi.close()
2076+
#bty: implement binary ply
2077+
if config['ply_fmt'] == "bin":
2078+
with open(ply_name, 'wb') as ply_fi:
2079+
if 'little' == sys.byteorder:
2080+
ply_fi.write(('ply\n' + 'format binary_little_endian 1.0\n').encode('ascii'))
2081+
else:
2082+
ply_fi.write(('ply\n' + 'format binary_big_endian 1.0\n').encode('ascii'))
2083+
ply_fi.write(('comment H ' + str(int(input_mesh.graph['H'])) + '\n').encode('ascii'))
2084+
ply_fi.write(('comment W ' + str(int(input_mesh.graph['W'])) + '\n').encode('ascii'))
2085+
ply_fi.write(('comment hFov ' + str(float(input_mesh.graph['hFov'])) + '\n').encode('ascii'))
2086+
ply_fi.write(('comment vFov ' + str(float(input_mesh.graph['vFov'])) + '\n').encode('ascii'))
2087+
ply_fi.write(('comment meanLoc ' + str(float(mean_loc_depth)) + '\n').encode('ascii'))
2088+
ply_fi.write(('element vertex ' + str(len(node_str_list)) + '\n').encode('ascii'))
2089+
ply_fi.write(('property float x\n' + \
2090+
'property float y\n' + \
2091+
'property float z\n' + \
2092+
'property uchar red\n' + \
2093+
'property uchar green\n' + \
2094+
'property uchar blue\n' + \
2095+
'property uchar alpha\n').encode('ascii'))
2096+
ply_fi.write(('element face ' + str(len(str_faces)) + '\n').encode('ascii'))
2097+
ply_fi.write(('property list uchar int vertex_index\n').encode('ascii'))
2098+
ply_fi.write(('end_header\n').encode('ascii'))
2099+
2100+
pbar = tqdm.tqdm(total = len(node_str_list)+len(str_faces))
2101+
pbar.set_description("Saving vertices")
2102+
2103+
for v in node_str_list:
2104+
x, y, z, r, g, b, a = v.split(' ')
2105+
ply_fi.write(struct.pack('fffBBBB', float(x), float(y), float(z), int(r), int(g), int(b), int(a)))
2106+
pbar.update(1)
2107+
2108+
pbar.set_description("Saving faces")
2109+
for f in str_faces:
2110+
n, a, b, c = f.split(' ')
2111+
ply_fi.write(bytearray([int(n)]))
2112+
ply_fi.write(struct.pack('III', int(a), int(b), int(c)))
2113+
pbar.update(1)
2114+
pbar.close()
2115+
ply_fi.close()
2116+
2117+
else:
2118+
with open(ply_name, 'w') as ply_fi:
2119+
ply_fi.write('ply\n' + 'format ascii 1.0\n')
2120+
ply_fi.write('comment H ' + str(int(input_mesh.graph['H'])) + '\n')
2121+
ply_fi.write('comment W ' + str(int(input_mesh.graph['W'])) + '\n')
2122+
ply_fi.write('comment hFov ' + str(float(input_mesh.graph['hFov'])) + '\n')
2123+
ply_fi.write('comment vFov ' + str(float(input_mesh.graph['vFov'])) + '\n')
2124+
ply_fi.write('comment meanLoc ' + str(float(mean_loc_depth)) + '\n')
2125+
ply_fi.write('element vertex ' + str(len(node_str_list)) + '\n')
2126+
ply_fi.write('property float x\n' + \
2127+
'property float y\n' + \
2128+
'property float z\n' + \
2129+
'property uchar red\n' + \
2130+
'property uchar green\n' + \
2131+
'property uchar blue\n' + \
2132+
'property uchar alpha\n')
2133+
ply_fi.write('element face ' + str(len(str_faces)) + '\n')
2134+
ply_fi.write('property list uchar int vertex_index\n')
2135+
ply_fi.write('end_header\n')
2136+
ply_fi.writelines(node_str_list)
2137+
ply_fi.writelines(str_faces)
2138+
ply_fi.close()
20652139
return input_mesh
20662140
else:
20672141
H = int(input_mesh.graph['H'])
@@ -2076,12 +2150,16 @@ def write_ply(image,
20762150
return node_str_point, node_str_color, str_faces, H, W, hFov, vFov
20772151

20782152
def read_ply(mesh_fi):
2079-
ply_fi = open(mesh_fi, 'r')
2153+
#bty: implement binary support (assume same endianness for now)
2154+
# read header in text mode
2155+
ply_fi = open(mesh_fi, 'r', encoding="utf8", errors='ignore') # required to readline in bin file
20802156
Height = None
20812157
Width = None
20822158
hFov = None
20832159
vFov = None
20842160
mean_loc_depth = None
2161+
isBinary = True
2162+
# read ascii header
20852163
while True:
20862164
line = ply_fi.readline().split('\n')[0]
20872165
if line.startswith('element vertex'):
@@ -2097,48 +2175,78 @@ def read_ply(mesh_fi):
20972175
hFov = float(line.split(' ')[-1].split('\n')[0])
20982176
if line.split(' ')[1] == 'vFov':
20992177
vFov = float(line.split(' ')[-1].split('\n')[0])
2100-
#bty: this was the only value for which it needed the depthmap, so it stores it in the ply when generated
2178+
#bty: this was the only value for which it needed the depthmap, so store it in the ply too
21012179
if line.split(' ')[1] == 'meanLoc':
21022180
mean_loc_depth = float(line.split(' ')[-1].split('\n')[0])
2181+
# check format
2182+
elif line.startswith('format ascii'):
2183+
isBinary = False
21032184
elif line.startswith('end_header'):
21042185
break
2105-
contents = ply_fi.readlines()
2106-
vertex_infos = contents[:num_vertex]
2107-
face_infos = contents[num_vertex:]
2108-
#bty: try to optimize by pre-allocating
2109-
#verts = []
2110-
#colors = []
2111-
#faces = []
2112-
verts = [None] * num_vertex
2113-
colors = [None] * num_vertex
2114-
faces = [None] * num_face
2115-
i = 0
2116-
for v_info in vertex_infos:
2117-
str_info = [float(v) for v in v_info.split('\n')[0].split(' ')]
2118-
if len(str_info) == 6:
2119-
vx, vy, vz, r, g, b = str_info
2120-
else:
2121-
vx, vy, vz, r, g, b, hi = str_info
2122-
#verts.append([vx, vy, vz])
2123-
#colors.append([r, g, b, hi])
2124-
verts[i] = [vx, vy, vz]
2125-
colors[i] = [r, g, b, hi]
2126-
i = i + 1
2127-
verts = np.array(verts)
2128-
#try:
2129-
colors = np.array(colors)
2130-
colors[..., :3] = colors[..., :3]/255.
2131-
#except:
2132-
# import pdb
2133-
# pdb.set_trace()
2134-
2135-
i = 0
2136-
for f_info in face_infos:
2137-
_, v1, v2, v3 = [int(f) for f in f_info.split('\n')[0].split(' ')]
2138-
#faces.append([v1, v2, v3])
2139-
faces[i] = [v1, v2, v3]
2140-
i = i + 1
2141-
faces = np.array(faces)
2186+
2187+
if isBinary:
2188+
# grab current file offset and re-open in binary mode
2189+
endheader = ply_fi.tell()
2190+
ply_fi.close()
2191+
ply_fi = open(mesh_fi, 'rb')
2192+
ply_fi.seek(endheader)
2193+
verts = [None] * num_vertex
2194+
colors = [None] * num_vertex
2195+
faces = [None] * num_face
2196+
2197+
pbar = tqdm.tqdm(total = num_vertex+num_face)
2198+
pbar.set_description("Loading vertices")
2199+
for i in range(num_vertex):
2200+
x, y, z, r, g, b, a = struct.unpack('fffBBBB', ply_fi.read(16))
2201+
verts[i] = [x, y, z]
2202+
colors[i] = [float(r), float(g), float(b), float(a)]
2203+
pbar.update(1)
2204+
verts = np.array(verts)
2205+
colors = np.array(colors)
2206+
colors[..., :3] = colors[..., :3] / 255.
2207+
2208+
pbar.set_description("Loading faces")
2209+
for i in range(num_face):
2210+
c = int.from_bytes(ply_fi.read(1), "little")
2211+
if c == 3:
2212+
v1, v2, v3 = struct.unpack('III', ply_fi.read(12))
2213+
faces[i] = [v1, v2, v3]
2214+
pbar.update(1)
2215+
faces = np.array(faces)
2216+
ply_fi.close()
2217+
pbar.close()
2218+
2219+
else:
2220+
# read ascii mode file
2221+
contents = ply_fi.readlines()
2222+
ply_fi.close()
2223+
vertex_infos = contents[:num_vertex]
2224+
face_infos = contents[num_vertex:]
2225+
#bty: optimize by pre-allocating
2226+
verts = [None] * num_vertex
2227+
colors = [None] * num_vertex
2228+
faces = [None] * num_face
2229+
i = 0
2230+
for v_info in vertex_infos:
2231+
str_info = [float(v) for v in v_info.split('\n')[0].split(' ')]
2232+
if len(str_info) == 6:
2233+
vx, vy, vz, r, g, b = str_info
2234+
else:
2235+
vx, vy, vz, r, g, b, hi = str_info
2236+
2237+
verts[i] = [vx, vy, vz]
2238+
colors[i] = [r, g, b, hi]
2239+
i = i + 1
2240+
verts = np.array(verts)
2241+
colors = np.array(colors)
2242+
colors[..., :3] = colors[..., :3]/255.
2243+
2244+
i = 0
2245+
for f_info in face_infos:
2246+
_, v1, v2, v3 = [int(f) for f in f_info.split('\n')[0].split(' ')]
2247+
faces[i] = [v1, v2, v3]
2248+
i = i + 1
2249+
faces = np.array(faces)
21422250

21432251
return verts, colors, faces, Height, Width, hFov, vFov, mean_loc_depth
21442252

@@ -2257,6 +2365,7 @@ def output_3d_photo(verts, colors, faces, Height, Width, hFov, vFov, tgt_poses,
22572365
plane_width = np.tan(fov_in_rad/2.) * np.abs(mean_loc_depth)
22582366
fn_saved = []
22592367
for video_pose, video_traj_type in zip(videos_poses, video_traj_types):
2368+
print("Rendering frames ..")
22602369
stereos = []
22612370
#tops = []; buttoms = []; lefts = []; rights = []
22622371
for tp_id, tp in enumerate(video_pose):

0 commit comments

Comments
 (0)