api changes updates

graemeniedermayer · graemeniedermayer · commit 3fddffb97b8a · 2023-07-29T02:35:20.000-06:00
Video mode fixes

Video mode fixes
diff --git a/scripts/depthmap_api.py b/scripts/depthmap_api.py
@@ -2,80 +2,12 @@
 # (will only be on with --api starting option)
 # Currently no API stability guarantees are provided - API may break on any new commit.
 
-import numpy as np
-from fastapi import FastAPI, Body
-from fastapi.exceptions import HTTPException
-from PIL import Image
-
-import gradio as gr
-
-from modules.api.models import List, Dict
-from modules.api import api
-
-from src.core import core_generation_funnel
-from src.misc import SCRIPT_VERSION
 from src import backbone
-from src.common_constants import GenerationOptions as go
-
-
-def encode_to_base64(image):
-    if type(image) is str:
-        return image
-    elif type(image) is Image.Image:
-        return api.encode_pil_to_base64(image)
-    elif type(image) is np.ndarray:
-        return encode_np_to_base64(image)
-    else:
-        return ""
-
-
-def encode_np_to_base64(image):
-    pil = Image.fromarray(image)
-    return api.encode_pil_to_base64(pil)
-
-
-def to_base64_PIL(encoding: str):
-    return Image.fromarray(np.array(api.decode_base64_to_image(encoding)).astype('uint8'))
-
-
-def depth_api(_: gr.Blocks, app: FastAPI):
-    @app.get("/depth/version")
-    async def version():
-        return {"version": SCRIPT_VERSION}
-
-    @app.get("/depth/get_options")
-    async def get_options():
-        return {"options": sorted([x.name.lower() for x in go])}
-
-    # TODO: some potential inputs not supported (like custom depthmaps)
-    @app.post("/depth/generate")
-    async def process(
-        depth_input_images: List[str] = Body([], title='Input Images'),
-        options: Dict[str, object] = Body("options", title='Generation options'),
-    ):
-        # TODO: restrict mesh options
-
-        if len(depth_input_images) == 0:
-            raise HTTPException(status_code=422, detail="No images supplied")
-        print(f"Processing {str(len(depth_input_images))} images trough the API")
-
-        pil_images = []
-        for input_image in depth_input_images:
-            pil_images.append(to_base64_PIL(input_image))
-        outpath = backbone.get_outpath()
-        gen_obj = core_generation_funnel(outpath, pil_images, None, None, options)
-
-        results_based = []
-        for count, type, result in gen_obj:
-            if not isinstance(result, Image.Image):
-                continue
-            results_based += [encode_to_base64(result)]
-        return {"images": results_based, "info": "Success"}
-
+from src.api import api_extension
 
 try:
     import modules.script_callbacks as script_callbacks
     if backbone.get_cmd_opt('api', False):
-        script_callbacks.on_app_started(depth_api)
+        script_callbacks.on_app_started(api_extension.depth_api)
 except:
     print('DepthMap API could not start')
diff --git a/src/api/api_constants.py b/src/api/api_constants.py
@@ -0,0 +1,32 @@
+
+api_options = {
+    'outputs': ["depth"], # list of outputs to send in response. examples ["depth", "normalmap", 'heatmap', "normal", 'background_removed'] etc
+    #'conversions': "", #TODO implement. it's a good idea to give some options serverside for because often that's challenging in js/clientside 
+    'save':"" #TODO implement. To save on local machine. Can be very helpful for debugging.
+}
+
+# TODO: These are intended to be temporary
+api_defaults={
+    "BOOST": False,
+    "NET_SIZE_MATCH": True
+}
+
+#These are enforced after user inputs
+api_forced={
+    "GEN_SIMPLE_MESH": False,
+    "GEN_INPAINTED_MESH": False
+}
+
+#model diction TODO find a way to remove without forcing people do know indexes of models
+models_to_index = {
+    'res101':0, 
+    'dpt_beit_large_512 (midas 3.1)':1,
+    'dpt_beit_large_384 (midas 3.1)':2, 
+    'dpt_large_384 (midas 3.0)':3,
+    'dpt_hybrid_384 (midas 3.0)':4,
+    'midas_v21':5, 
+    'midas_v21_small':6,
+    'zoedepth_n (indoor)':7, 
+    'zoedepth_k (outdoor)':8, 
+    'zoedepth_nk':9
+}
diff --git a/src/api/api_extension.py b/src/api/api_extension.py
@@ -0,0 +1,114 @@
+# Non-public API. Don't host publicly - SECURITY RISKS!
+# (will only be on with --api starting option)
+# Currently no API stability guarantees are provided - API may break on any new commit.
+
+import numpy as np
+from fastapi import FastAPI, Body
+from fastapi.exceptions import HTTPException
+from PIL import Image
+from itertools import tee
+import json
+
+import gradio as gr
+
+from modules.api.models import List, Dict
+from modules.api import api
+
+from src.common_constants import GenerationOptions as go
+from src.core import core_generation_funnel, CoreGenerationFunnelInp
+from src import backbone
+from src.misc import SCRIPT_VERSION
+from src.api.api_constants import api_defaults, api_forced, api_options, models_to_index
+
+def encode_to_base64(image):
+    if type(image) is str:
+        return image
+    elif type(image) is Image.Image:
+        return api.encode_pil_to_base64(image)
+    elif type(image) is np.ndarray:
+        return encode_np_to_base64(image)
+    else:
+        return ""
+
+def encode_np_to_base64(image):
+    pil = Image.fromarray(image)
+    return api.encode_pil_to_base64(pil)
+
+def to_base64_PIL(encoding: str):
+    return Image.fromarray(np.array(api.decode_base64_to_image(encoding)).astype('uint8'))
+
+
+def api_gen(input_images, client_options):
+
+    default_options = CoreGenerationFunnelInp(api_defaults).values
+
+    #TODO try-catch type errors here
+    for key, value in client_options.items():
+        if key == "model_type":
+            default_options[key] = models_to_index(value)
+            continue
+        default_options[key] = value
+
+    for key, value in api_forced.items():
+        default_options[key.lower()] = value
+        
+    print(f"Processing {str(len(input_images))} images through the API")
+
+    print(default_options)
+
+    pil_images = []
+    for input_image in input_images:
+        pil_images.append(to_base64_PIL(input_image))
+    outpath = backbone.get_outpath()
+    gen_obj = core_generation_funnel(outpath, pil_images, None, None, default_options)
+    return gen_obj
+
+def depth_api(_: gr.Blocks, app: FastAPI):
+    @app.get("/depth/version")
+    async def version():
+        return {"version": SCRIPT_VERSION}
+
+    @app.get("/depth/get_options")
+    async def get_options():
+        return {
+            "gen_options": [x.name.lower() for x in go],
+            "api_options": api_options
+        }
+
+    @app.post("/depth/generate")
+    async def process(
+        input_images: List[str] = Body([], title='Input Images'),
+        generate_options: Dict[str, object] = Body({}, title='Generation options', options= [x.name.lower() for x in go]),
+        api_options: Dict[str, object] = Body({'outputs': ["depth"]}, title='Api options', options= api_options)
+    ):
+        
+        if len(input_images)==0:
+            raise HTTPException(status_code=422, detail="No images supplied")
+
+        gen_obj = api_gen(input_images, generate_options)
+
+        #NOTE Work around yield. (Might not be necessary, not sure if yield caches)
+        _, gen_obj = tee (gen_obj)
+
+        # If no outputs are specified assume depthmap is expected
+        if len(api_options["outputs"])==0:
+            api_options["outputs"] = ["depth"]
+
+        results_based = {}
+        for output_type in api_options["outputs"]:
+            results_per_type = []
+
+            for count, img_type, result in gen_obj:
+                if img_type == output_type:
+                    results_per_type.append( encode_to_base64(result) )
+            
+            # simpler output for simpler request.
+            if api_options["outputs"] == ["depth"]:
+                return {"images": results_per_type, "info": "Success"}
+
+            if len(results_per_type)==0:
+                results_based[output_type] = "Check options. no img-type of " + str(type) + " where generated"
+            else:
+                results_based[output_type] = results_per_type
+        return {"images": results_based, "info": "Success"}
+        
diff --git a/src/api/api_standalone.py b/src/api/api_standalone.py
diff --git a/src/core.py b/src/core.py
@@ -44,7 +44,7 @@ def convert_to_i16(arr):
     # uint16 conversion uses round-down, therefore values should be [0; 2**16)
     numbytes = 2
     max_val = (2 ** (8 * numbytes))
-    out = np.clip(arr * max_val, 0, max_val - 0.1)  # -0.1 from above is needed to avoid overflowing
+    out = np.clip(arr * max_val + 0.0001, 0, max_val - 0.1)  # -0.1 from above is needed to avoid overflowing
     return out.astype("uint16")
 
 def convert_i16_to_rgb(image, like):
@@ -252,7 +252,7 @@ def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp
                         yield count, 'depth', Image.fromarray(img_output)
 
             if inp[go.GEN_STEREO]:
-                print("Generating stereoscopic images..")
+                # print("Generating stereoscopic image(s)..")
                 stereoimages = create_stereoimages(
                     inputimages[count], img_output,
                     inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION],
diff --git a/src/video_mode.py b/src/video_mode.py
@@ -21,22 +21,25 @@ def open_path_as_images(path, maybe_depthvideo=False):
             frames.append(img.convert('RGB'))
         return 1000 / img.info['duration'], frames
     if suffix in ['.avi'] and maybe_depthvideo:
-        import imageio_ffmpeg
-        gen = imageio_ffmpeg.read_frames(path)
         try:
+            import imageio_ffmpeg
+            # Suppose there are in fact 16 bits per pixel
+            # If this is not the case, this is not a 16-bit depthvideo, so no need to process it this way
+            gen = imageio_ffmpeg.read_frames(path, pix_fmt='gray16le', bits_per_pixel=16)
             video_info = next(gen)
             if video_info['pix_fmt'] == 'gray16le':
                 width, height = video_info['size']
                 frames = []
                 for frame in gen:
                     # Not sure if this is implemented somewhere else
                     result = np.frombuffer(frame, dtype='uint16')
-                    result.shape = (height, width * 3 // 2)  # Why does it work? I don't remotely have any idea.
+                    result.shape = (height, width)  # Why does it work? I don't remotely have any idea.
                     frames += [Image.fromarray(result)]
                     # TODO: Wrapping frames into Pillow objects is wasteful
                 return video_info['fps'], frames
         finally:
-            gen.close()
+            if 'gen' in locals():
+                gen.close()
     if suffix in ['.webm', '.mp4', '.avi']:
         from moviepy.video.io.VideoFileClip import VideoFileClip
         clip = VideoFileClip(path)
@@ -45,7 +48,7 @@ def open_path_as_images(path, maybe_depthvideo=False):
         return clip.fps, frames
     else:
         try:
-            return 1000, [Image.open(path)]
+            return 1, [Image.open(path)]
         except Exception as e:
             raise Exception(f"Probably an unsupported file format: {suffix}") from e
 
@@ -128,8 +131,8 @@ def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None,
         first_pass_inp[go.DO_OUTPUT_DEPTH.name] = False
 
         gen_obj = core.core_generation_funnel(None, input_images, None, None, first_pass_inp)
-        predictions = [x[2] for x in list(gen_obj)]
-        input_depths = process_predicitons(predictions, smoothening)
+        input_depths = [x[2] for x in list(gen_obj)]
+        input_depths = process_predicitons(input_depths, smoothening)
     else:
         print('Using custom depthmap video')
         cdm_fps, input_depths = open_path_as_images(os.path.abspath(custom_depthmap.name), maybe_depthvideo=True)
@@ -153,4 +156,5 @@ def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None,
         frames_to_video(fps, imgs, outpath, f"depthmap-{backbone.get_next_sequence_number()}-{basename}",
                         colorvids_bitrate)
     print('All done. Video(s) saved!')
-    return 'Video generated!' if len(gens) == 1 else 'Videos generated!'
+    return '<h3>Videos generated</h3>' if len(gens) > 1 else '<h3>Video generated</h3>' if len(gens) == 1 \
+        else '<h3>Nothing generated - please check the settings and try again</h3>'