From f70010ca5d4ad39f7d211a9742949bb804d08b00 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 18 Dec 2025 11:37:01 +0530 Subject: [PATCH] up --- .../qwenimage/pipeline_qwenimage_layered.py | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_layered.py b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_layered.py index 8f24c8a2a3..ef63aafbde 100644 --- a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_layered.py +++ b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage_layered.py @@ -882,27 +882,21 @@ the image\n<|vision_start|><|image_pad|><|vision_end|><|im_end|>\n<|im_start|>as latents = latents / latents_std + latents_mean b, c, f, h, w = latents.shape - latents = latents[:, :, 1:] # remove the first frame as it is the orgin input - frames = latents.shape[2] + latents = latents.permute(0, 2, 1, 3, 4).view(-1, c, 1, h, w) - latents = latents.permute(0, 2, 1, 3, 4).reshape(-1, c, 1, h, w) + img = self.vae.decode(latents, return_dict=False)[0] # (b f) c 1 h w + img = img.squeeze(2) - image = self.vae.decode(latents, return_dict=False)[0] # (b f) c 1 h w - - image = image.squeeze(2) - - image = self.image_processor.postprocess(image, output_type=output_type) - images = [] + img = self.image_processor.postprocess(img, output_type=output_type) + image = [] for bidx in range(b): - start = bidx * frames - end = (bidx + 1) * frames - images.append(image[start:end]) + image.append(img[bidx * f : (bidx + 1) * f]) # Offload all models self.maybe_free_model_hooks() if not return_dict: - return (images,) + return (image,) - return QwenImagePipelineOutput(images=images) + return QwenImagePipelineOutput(images=image)