Skip to content

Commit d98a05a

Browse files
davidmartinriussemjon00
authored andcommitted
Add video depth generation endpoint
This commit adds a new API endpoint for generating depth maps from input images in a video format. The endpoint supports various depth model options, including different pre-trained models. It also validates and processes video parameters such as number of frames, frames per second, trajectory, shift, border, dolly, format, and super-sampling anti-aliasing. The commit includes error handling for missing input images, invalid model types, and required video parameters. Additionally, it checks if a mesh file already exists, and if not, it generates a new one. The generated mesh is then used to create a depth video based on the specified parameters.
1 parent b8120b4 commit d98a05a

File tree

2 files changed

+121
-21
lines changed

2 files changed

+121
-21
lines changed

scripts/depthmap_api.py

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# Non-public API. Don't host publicly - SECURITY RISKS!
2-
# (will only be on with --api starting option)
3-
# Currently no API stability guarantees are provided - API may break on any new commit.
1+
# DO NOT HOST PUBLICLY - SECURITY RISKS!
2+
# (the API will only be on with --api starting option)
3+
# Currently no API stability guarantees are provided - API may break on any new commit (but hopefully won't).
44

55
import numpy as np
66
from fastapi import FastAPI, Body
@@ -70,12 +70,110 @@ async def process(
7070
if not isinstance(result, Image.Image):
7171
continue
7272
results_based += [encode_to_base64(result)]
73+
7374
return {"images": results_based, "info": "Success"}
7475

76+
@app.post("/depth/generate/video")
77+
async def process_video(
78+
depth_input_images: List[str] = Body([], title='Input Images'),
79+
options: Dict[str, object] = Body("options", title='Generation options'),
80+
):
81+
if len(depth_input_images) == 0:
82+
raise HTTPException(status_code=422, detail="No images supplied")
83+
print(f"Processing {str(len(depth_input_images))} images trough the API")
84+
85+
available_models = {
86+
'res101': 0,
87+
'dpt_beit_large_512': 1, #midas 3.1
88+
'dpt_beit_large_384': 2, #midas 3.1
89+
'dpt_large_384': 3, #midas 3.0
90+
'dpt_hybrid_384': 4, #midas 3.0
91+
'midas_v21': 5,
92+
'midas_v21_small': 6,
93+
'zoedepth_n': 7, #indoor
94+
'zoedepth_k': 8, #outdoor
95+
'zoedepth_nk': 9,
96+
}
97+
98+
model_type = options["model_type"]
99+
100+
model_id = None
101+
if isinstance(model_type, str):
102+
# Check if the string is in the available_models dictionary
103+
if model_type in available_models:
104+
model_id = available_models[model_type]
105+
else:
106+
available_strings = list(available_models.keys())
107+
raise HTTPException(status_code=400, detail={'error': 'Invalid model string', 'available_models': available_strings})
108+
elif isinstance(model_type, int):
109+
model_id = model_type
110+
else:
111+
raise HTTPException(status_code=400, detail={'error': 'Invalid model parameter type'})
112+
113+
options["model_type"] = model_id
114+
115+
video_parameters = options["video_parameters"]
116+
117+
required_params = ["vid_numframes", "vid_fps", "vid_traj", "vid_shift", "vid_border", "dolly", "vid_format", "vid_ssaa", "output_filename"]
118+
119+
missing_params = [param for param in required_params if param not in video_parameters]
120+
121+
if missing_params:
122+
raise HTTPException(status_code=400, detail={'error': f"Missing required parameter(s): {', '.join(missing_params)}"})
123+
124+
vid_numframes = video_parameters["vid_numframes"]
125+
vid_fps = video_parameters["vid_fps"]
126+
vid_traj = video_parameters["vid_traj"]
127+
vid_shift = video_parameters["vid_shift"]
128+
vid_border = video_parameters["vid_border"]
129+
dolly = video_parameters["dolly"]
130+
vid_format = video_parameters["vid_format"]
131+
vid_ssaa = int(video_parameters["vid_ssaa"])
132+
133+
output_filename = video_parameters["output_filename"]
134+
output_path = os.path.dirname(output_filename)
135+
basename, extension = os.path.splitext(os.path.basename(output_filename))
136+
137+
# Comparing video_format with the extension
138+
if vid_format != extension[1:]:
139+
raise HTTPException(status_code=400, detail={'error': f"Video format '{vid_format}' does not match with the extension '{extension}'."})
140+
141+
pil_images = []
142+
for input_image in depth_input_images:
143+
pil_images.append(to_base64_PIL(input_image))
144+
outpath = backbone.get_outpath()
145+
146+
mesh_fi_filename = video_parameters.get('mesh_fi_filename', None)
147+
148+
if mesh_fi_filename and os.path.exists(mesh_fi_filename):
149+
mesh_fi = mesh_fi_filename
150+
print("Loaded existing mesh from: ", mesh_fi)
151+
else:
152+
#If there is no mesh file generate it.
153+
options["GEN_INPAINTED_MESH"] = True
154+
155+
gen_obj = core_generation_funnel(outpath, pil_images, None, None, options)
156+
157+
mesh_fi = None
158+
for count, type, result in gen_obj:
159+
if type == 'inpainted_mesh':
160+
mesh_fi = result
161+
break
162+
163+
if mesh_fi:
164+
print("Created mesh in: ", mesh_fi)
165+
else:
166+
raise HTTPException(status_code=400, detail={'error': "The mesh has not been created"})
167+
168+
run_makevideo(mesh_fi, vid_numframes, vid_fps, vid_traj, vid_shift, vid_border, dolly, vid_format, vid_ssaa, output_path, basename)
169+
170+
return {"info": "Success"}
171+
75172

76173
try:
77174
import modules.script_callbacks as script_callbacks
78175
if backbone.get_cmd_opt('api', False):
79176
script_callbacks.on_app_started(depth_api)
177+
print("Started the depthmap API. DO NOT HOST PUBLICLY - SECURITY RISKS!")
80178
except:
81179
print('DepthMap API could not start')

src/core.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -578,9 +578,8 @@ def run_3dphoto_videos(mesh_fi, basename, outpath, num_frames, fps, crop_border,
578578
fnExt=vid_format)
579579
return fn_saved
580580

581-
582-
# called from gen vid tab button
583-
def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_border, dolly, vid_format, vid_ssaa):
581+
def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_border, dolly, vid_format, vid_ssaa,
582+
outpath=None, basename=None):
584583
if len(fn_mesh) == 0 or not os.path.exists(fn_mesh):
585584
raise Exception("Could not open mesh.")
586585

@@ -608,20 +607,24 @@ def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_bord
608607
raise Exception("Crop Border requires 4 elements.")
609608
crop_border = [float(borders[0]), float(borders[1]), float(borders[2]), float(borders[3])]
610609

611-
# output path and filename mess ..
612-
basename = Path(fn_mesh).stem
613-
outpath = backbone.get_outpath()
614-
# unique filename
615-
basecount = backbone.get_next_sequence_number(outpath, basename)
616-
if basecount > 0: basecount = basecount - 1
617-
fullfn = None
618-
for i in range(500):
619-
fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}"
620-
fullfn = os.path.join(outpath, f"{fn}_." + vid_format)
621-
if not os.path.exists(fullfn):
622-
break
623-
basename = Path(fullfn).stem
624-
basename = basename[:-1]
610+
if not outpath:
611+
outpath = backbone.get_outpath()
612+
613+
if not basename:
614+
# output path and filename mess ..
615+
basename = Path(fn_mesh).stem
616+
617+
# unique filename
618+
basecount = backbone.get_next_sequence_number(outpath, basename)
619+
if basecount > 0: basecount = basecount - 1
620+
fullfn = None
621+
for i in range(500):
622+
fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}"
623+
fullfn = os.path.join(outpath, f"{fn}_." + vid_format)
624+
if not os.path.exists(fullfn):
625+
break
626+
basename = Path(fullfn).stem
627+
basename = basename[:-1]
625628

626629
print("Loading mesh ..")
627630

@@ -630,7 +633,6 @@ def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_bord
630633

631634
return fn_saved[-1], fn_saved[-1], ''
632635

633-
634636
def unload_models():
635637
model_holder.unload_models()
636638

0 commit comments

Comments
 (0)