add docs for ONNX upscaling, rename lookup table

ssube · ssube · commit 3d102b0a5ca1 · 2023-02-16T22:17:02.000-06:00
diff --git a/docs/source/en/optimization/onnx.mdx b/docs/source/en/optimization/onnx.mdx
@@ -37,6 +37,37 @@ prompt = "a photo of an astronaut riding a horse on mars"
 image = pipe(prompt).images[0]
 ```
 
+The snippet below demonstrates how to use the ONNX runtime with the Stable Diffusion upscaling pipeline.
+
+```python
+from diffusers import StableDiffusionOnnxPipeline
+
+prompt = "a photo of an astronaut riding a horse on mars"
+steps = 50
+
+txt2img = StableDiffusionOnnxPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5",
+    revision="onnx",
+    provider="CUDAExecutionProvider",
+)
+small_image = txt2img(
+    prompt,
+    num_inference_steps=steps,
+).images[0]
+
+generator = torch.manual_seed(0)
+upscale = OnnxStableDiffusionUpscalePipeline.from_pretrained(
+    "ssube/stable-diffusion-x4-upscaler-onnx",
+    provider="CUDAExecutionProvider",
+)
+large_image = upscale(
+    prompt,
+    small_image,
+    generator=generator,
+    num_inference_steps=steps,
+).images[0]
+```
+
 ## Known Issues
 
 - Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching.
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py
@@ -17,7 +17,7 @@
 NUM_LATENT_CHANNELS = 4
 NUM_UNET_INPUT_CHANNELS = 7
 
-TORCH_DTYPES = {
+ORT_TO_PT_TYPE = {
     "float16": torch.float16,
     "float32": torch.float32,
 }
@@ -91,7 +91,7 @@ def __call__(
             prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
         )
 
-        latents_dtype = TORCH_DTYPES[str(text_embeddings.dtype)]
+        latents_dtype = ORT_TO_PT_TYPE[str(text_embeddings.dtype)]
 
         # 4. Preprocess image
         image = preprocess(image)

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`NUM_LATENT_CHANNELS = 4`
`18`	`18`	`NUM_UNET_INPUT_CHANNELS = 7`
`19`	`19`
`20`		`-TORCH_DTYPES = {`
	`20`	`+ORT_TO_PT_TYPE = {`
`21`	`21`	`"float16": torch.float16,`
`22`	`22`	`"float32": torch.float32,`
`23`	`23`	`}`
`@@ -91,7 +91,7 @@ def __call__(`
`91`	`91`	`prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt`
`92`	`92`	`)`
`93`	`93`
`94`		`- latents_dtype = TORCH_DTYPES[str(text_embeddings.dtype)]`
	`94`	`+ latents_dtype = ORT_TO_PT_TYPE[str(text_embeddings.dtype)]`
`95`	`95`
`96`	`96`	`# 4. Preprocess image`
`97`	`97`	`image = preprocess(image)`