From f0707751efd8e47883282861d5305604b320ac32 Mon Sep 17 00:00:00 2001 From: Aryan Date: Fri, 21 Feb 2025 03:37:07 +0530 Subject: [PATCH] Some consistency-related fixes for HunyuanVideo (#10835) * update * update --- .../pipelines/hunyuan_video/pipeline_hunyuan_video.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py b/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py index d15ef18e14..bafe8c8834 100644 --- a/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +++ b/src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py @@ -387,7 +387,7 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin): def prepare_latents( self, batch_size: int, - num_channels_latents: 32, + num_channels_latents: int = 32, height: int = 720, width: int = 1280, num_frames: int = 129, @@ -402,7 +402,7 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin): shape = ( batch_size, num_channels_latents, - num_frames, + (num_frames - 1) // self.vae_scale_factor_temporal + 1, int(height) // self.vae_scale_factor_spatial, int(width) // self.vae_scale_factor_spatial, ) @@ -624,13 +624,12 @@ class HunyuanVideoPipeline(DiffusionPipeline, HunyuanVideoLoraLoaderMixin): # 5. Prepare latent variables num_channels_latents = self.transformer.config.in_channels - num_latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1 latents = self.prepare_latents( batch_size * num_videos_per_prompt, num_channels_latents, height, width, - num_latent_frames, + num_frames, torch.float32, device, generator,