mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
[fix] refer use_framewise_encoding on AutoencoderKLHunyuanVideo._encode (#10600)
* fix: refer to use_framewise_encoding on AutoencoderKLHunyuanVideo._encode * fix: comment about tile_sample_min_num_frames --------- Co-authored-by: Aryan <aryan@huggingface.co>
This commit is contained in:
@@ -786,7 +786,7 @@ class AutoencoderKLHunyuanVideo(ModelMixin, ConfigMixin):
|
||||
self.use_tiling = False
|
||||
|
||||
# When decoding temporally long video latents, the memory requirement is very high. By decoding latent frames
|
||||
# at a fixed frame batch size (based on `self.num_latent_frames_batch_sizes`), the memory requirement can be lowered.
|
||||
# at a fixed frame batch size (based on `self.tile_sample_min_num_frames`), the memory requirement can be lowered.
|
||||
self.use_framewise_encoding = True
|
||||
self.use_framewise_decoding = True
|
||||
|
||||
@@ -868,7 +868,7 @@ class AutoencoderKLHunyuanVideo(ModelMixin, ConfigMixin):
|
||||
def _encode(self, x: torch.Tensor) -> torch.Tensor:
|
||||
batch_size, num_channels, num_frames, height, width = x.shape
|
||||
|
||||
if self.use_framewise_decoding and num_frames > self.tile_sample_min_num_frames:
|
||||
if self.use_framewise_encoding and num_frames > self.tile_sample_min_num_frames:
|
||||
return self._temporal_tiled_encode(x)
|
||||
|
||||
if self.use_tiling and (width > self.tile_sample_min_width or height > self.tile_sample_min_height):
|
||||
|
||||
Reference in New Issue
Block a user