1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00
This commit is contained in:
sayakpaul
2025-12-18 11:37:01 +05:30
parent 2f0b35fd84
commit f70010ca5d

View File

@@ -882,27 +882,21 @@ the image\n<|vision_start|><|image_pad|><|vision_end|><|im_end|>\n<|im_start|>as
latents = latents / latents_std + latents_mean
b, c, f, h, w = latents.shape
latents = latents[:, :, 1:] # remove the first frame as it is the orgin input
frames = latents.shape[2]
latents = latents.permute(0, 2, 1, 3, 4).view(-1, c, 1, h, w)
latents = latents.permute(0, 2, 1, 3, 4).reshape(-1, c, 1, h, w)
img = self.vae.decode(latents, return_dict=False)[0] # (b f) c 1 h w
img = img.squeeze(2)
image = self.vae.decode(latents, return_dict=False)[0] # (b f) c 1 h w
image = image.squeeze(2)
image = self.image_processor.postprocess(image, output_type=output_type)
images = []
img = self.image_processor.postprocess(img, output_type=output_type)
image = []
for bidx in range(b):
start = bidx * frames
end = (bidx + 1) * frames
images.append(image[start:end])
image.append(img[bidx * f : (bidx + 1) * f])
# Offload all models
self.maybe_free_model_hooks()
if not return_dict:
return (images,)
return (image,)
return QwenImagePipelineOutput(images=images)
return QwenImagePipelineOutput(images=image)