From f295e2eefcebf21781f888b407eefadb5e121f7b Mon Sep 17 00:00:00 2001 From: Hanch Han <51526347+hanchchch@users.noreply.github.com> Date: Tue, 28 Jan 2025 10:21:27 +0900 Subject: [PATCH] [fix] refer use_framewise_encoding on AutoencoderKLHunyuanVideo._encode (#10600) * fix: refer to use_framewise_encoding on AutoencoderKLHunyuanVideo._encode * fix: comment about tile_sample_min_num_frames --------- Co-authored-by: Aryan --- .../models/autoencoders/autoencoder_kl_hunyuan_video.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py b/src/diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py index e2236a7f20..9be79cfe7d 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py @@ -786,7 +786,7 @@ class AutoencoderKLHunyuanVideo(ModelMixin, ConfigMixin): self.use_tiling = False # When decoding temporally long video latents, the memory requirement is very high. By decoding latent frames - # at a fixed frame batch size (based on `self.num_latent_frames_batch_sizes`), the memory requirement can be lowered. + # at a fixed frame batch size (based on `self.tile_sample_min_num_frames`), the memory requirement can be lowered. self.use_framewise_encoding = True self.use_framewise_decoding = True @@ -868,7 +868,7 @@ class AutoencoderKLHunyuanVideo(ModelMixin, ConfigMixin): def _encode(self, x: torch.Tensor) -> torch.Tensor: batch_size, num_channels, num_frames, height, width = x.shape - if self.use_framewise_decoding and num_frames > self.tile_sample_min_num_frames: + if self.use_framewise_encoding and num_frames > self.tile_sample_min_num_frames: return self._temporal_tiled_encode(x) if self.use_tiling and (width > self.tile_sample_min_width or height > self.tile_sample_min_height):