Add initial LTX 2.0 video VAE tests

2026-01-27 17:22:53 +03:00 · 2025-12-17 11:30:15 +01:00
parent baf23e2da3
commit 5b950d6fef
1 changed files with 4 additions and 4 deletions
--- a/src/diffusers/models/autoencoders/autoencoder_kl_ltx2.py
+++ b/src/diffusers/models/autoencoders/autoencoder_kl_ltx2.py
@@ -633,7 +633,7 @@ class LTX2VideoUpBlock3d(nn.Module):

 # Like LTX 1.0 LTXVideoEncoder3d but with different default args - the spatiotemporal downsampling pattern is
 # different, as is the layers_per_block (the 2.0 VAE is bigger)
-class LTXVideoEncoder3d(nn.Module):
+class LTX2VideoEncoder3d(nn.Module):
    r"""
    The `LTXVideoEncoder3d` layer of a variational autoencoder that encodes input video samples to its latent
    representation.
@@ -779,7 +779,7 @@ class LTXVideoEncoder3d(nn.Module):


 # Like LTX 1.0 LTXVideoDecoder3d, but has only 3 symmetric up blocks which are causal and residual with upsample_factor 2
-class LTXVideoDecoder3d(nn.Module):
+class LTX2VideoDecoder3d(nn.Module):
    r"""
    The `LTXVideoDecoder3d` layer of a variational autoencoder that decodes its latent representation into an output
    sample.
@@ -1011,7 +1011,7 @@ class AutoencoderKLLTX2Video(ModelMixin, AutoencoderMixin, ConfigMixin, FromOrig
    ) -> None:
        super().__init__()

-        self.encoder = LTXVideoEncoder3d(
+        self.encoder = LTX2VideoEncoder3d(
            in_channels=in_channels,
            out_channels=latent_channels,
            block_out_channels=block_out_channels,
@@ -1024,7 +1024,7 @@ class AutoencoderKLLTX2Video(ModelMixin, AutoencoderMixin, ConfigMixin, FromOrig
            resnet_norm_eps=resnet_norm_eps,
            is_causal=encoder_causal,
        )
-        self.decoder = LTXVideoDecoder3d(
+        self.decoder = LTX2VideoDecoder3d(
            in_channels=latent_channels,
            out_channels=out_channels,
            block_out_channels=decoder_block_out_channels,