From 5b950d6fefae4035d835e539c7b2676008ba43fc Mon Sep 17 00:00:00 2001 From: Daniel Gu Date: Wed, 17 Dec 2025 11:30:15 +0100 Subject: [PATCH] Add initial LTX 2.0 video VAE tests --- src/diffusers/models/autoencoders/autoencoder_kl_ltx2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_ltx2.py b/src/diffusers/models/autoencoders/autoencoder_kl_ltx2.py index 9f65c9980d..755b92c10a 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_ltx2.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_ltx2.py @@ -633,7 +633,7 @@ class LTX2VideoUpBlock3d(nn.Module): # Like LTX 1.0 LTXVideoEncoder3d but with different default args - the spatiotemporal downsampling pattern is # different, as is the layers_per_block (the 2.0 VAE is bigger) -class LTXVideoEncoder3d(nn.Module): +class LTX2VideoEncoder3d(nn.Module): r""" The `LTXVideoEncoder3d` layer of a variational autoencoder that encodes input video samples to its latent representation. @@ -779,7 +779,7 @@ class LTXVideoEncoder3d(nn.Module): # Like LTX 1.0 LTXVideoDecoder3d, but has only 3 symmetric up blocks which are causal and residual with upsample_factor 2 -class LTXVideoDecoder3d(nn.Module): +class LTX2VideoDecoder3d(nn.Module): r""" The `LTXVideoDecoder3d` layer of a variational autoencoder that decodes its latent representation into an output sample. @@ -1011,7 +1011,7 @@ class AutoencoderKLLTX2Video(ModelMixin, AutoencoderMixin, ConfigMixin, FromOrig ) -> None: super().__init__() - self.encoder = LTXVideoEncoder3d( + self.encoder = LTX2VideoEncoder3d( in_channels=in_channels, out_channels=latent_channels, block_out_channels=block_out_channels, @@ -1024,7 +1024,7 @@ class AutoencoderKLLTX2Video(ModelMixin, AutoencoderMixin, ConfigMixin, FromOrig resnet_norm_eps=resnet_norm_eps, is_causal=encoder_causal, ) - self.decoder = LTXVideoDecoder3d( + self.decoder = LTX2VideoDecoder3d( in_channels=latent_channels, out_channels=out_channels, block_out_channels=decoder_block_out_channels,