Closed
Description
Describe the bug
diffusers\loaders\unet.py", line 780, in _load_ip_adapter_weights
num_image_text_embeds = state_dict["image_proj"]["latents"].shape[1]
KeyError: 'latents'
Reproduction
import cv2
from insightface.app import FaceAnalysis
import numpy as np
from PIL import Image
import torch
from diffusers import StableDiffusionPipeline, DDIMScheduler
from diffusers.utils import load_image
def image_grid(imgs, rows, cols):
assert len(imgs) == rows * cols
w, h = imgs[0].size
grid = Image.new('RGB', size=(cols * w, rows * h))
grid_w, grid_h = grid.size
for i, img in enumerate(imgs):
grid.paste(img, box=(i % cols * w, i // cols * h))
return grid
noise_scheduler = DDIMScheduler(
num_train_timesteps=1000,
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
clip_sample=False,
set_alpha_to_one=False,
steps_offset=1
)
pipeline = StableDiffusionPipeline.from_single_file(
r"Realistic_Vision_V4.0.safetensors",
torch_dtype=torch.float16,
scheduler=noise_scheduler,
feature_extractor=None,
load_safety_checker=False
).to("cuda")
generator = torch.Generator(device="cpu").manual_seed(42)
num_images = 4
image = load_image("zly.jpg")
app = FaceAnalysis(name="buffalo_l", providers=['CPUExecutionProvider'], root='./')
app.prepare(ctx_id=0, det_size=(640, 640))
image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
faces = app.get(image)
image = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
pipeline.load_ip_adapter(r'./data/models/ip_adapter/', subfolder='models', weight_name="ip-adapter-faceid_sd15.bin")
# pipeline.load_ip_adapter("h94/IP-Adapter-FaceID", subfolder='',weight_name="ip-adapter-faceid_sd15.bin")
pipeline.set_ip_adapter_scale(0.7)
Logs
No response
System Info
Diffusers 0.25.0