mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
Remove CogVideoX mentions from single file docs; Test updates (#9444)
* remove mentions from single file * update tests * update
This commit is contained in:
@@ -22,9 +22,6 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load:
|
||||
|
||||
## Supported pipelines
|
||||
|
||||
- [`CogVideoXPipeline`]
|
||||
- [`CogVideoXImageToVideoPipeline`]
|
||||
- [`CogVideoXVideoToVideoPipeline`]
|
||||
- [`StableDiffusionPipeline`]
|
||||
- [`StableDiffusionImg2ImgPipeline`]
|
||||
- [`StableDiffusionInpaintPipeline`]
|
||||
@@ -52,7 +49,6 @@ The [`~loaders.FromSingleFileMixin.from_single_file`] method allows you to load:
|
||||
- [`UNet2DConditionModel`]
|
||||
- [`StableCascadeUNet`]
|
||||
- [`AutoencoderKL`]
|
||||
- [`AutoencoderKLCogVideoX`]
|
||||
- [`ControlNetModel`]
|
||||
- [`SD3Transformer2DModel`]
|
||||
- [`FluxTransformer2DModel`]
|
||||
|
||||
@@ -57,6 +57,7 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
"callback_on_step_end_tensor_inputs",
|
||||
]
|
||||
)
|
||||
test_xformers_attention = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
@@ -71,8 +72,8 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
time_embed_dim=2,
|
||||
text_embed_dim=32, # Must match with tiny-random-t5
|
||||
num_layers=1,
|
||||
sample_width=16, # latent width: 2 -> final width: 16
|
||||
sample_height=16, # latent height: 2 -> final height: 16
|
||||
sample_width=2, # latent width: 2 -> final width: 16
|
||||
sample_height=2, # latent height: 2 -> final height: 16
|
||||
sample_frames=9, # latent frames: (9 - 1) / 4 + 1 = 3 -> final frames: 9
|
||||
patch_size=2,
|
||||
temporal_compression_ratio=4,
|
||||
@@ -280,10 +281,6 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
"VAE tiling should not affect the inference results",
|
||||
)
|
||||
|
||||
@unittest.skip("xformers attention processor does not exist for CogVideoX")
|
||||
def test_xformers_attention_forwardGenerator_pass(self):
|
||||
pass
|
||||
|
||||
def test_fused_qkv_projections(self):
|
||||
device = "cpu" # ensure determinism for the device-dependent torch.Generator
|
||||
components = self.get_dummy_components()
|
||||
|
||||
@@ -269,8 +269,9 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
generator_device = "cpu"
|
||||
components = self.get_dummy_components()
|
||||
|
||||
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions.
|
||||
# See the if-statement on "self.use_learned_positional_embeddings"
|
||||
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initalization.
|
||||
# This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
|
||||
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
|
||||
components["transformer"] = CogVideoXTransformer3DModel.from_config(
|
||||
components["transformer"].config,
|
||||
sample_height=16,
|
||||
|
||||
@@ -51,6 +51,7 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
"callback_on_step_end_tensor_inputs",
|
||||
]
|
||||
)
|
||||
test_xformers_attention = False
|
||||
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
@@ -65,8 +66,8 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
time_embed_dim=2,
|
||||
text_embed_dim=32, # Must match with tiny-random-t5
|
||||
num_layers=1,
|
||||
sample_width=16, # latent width: 2 -> final width: 16
|
||||
sample_height=16, # latent height: 2 -> final height: 16
|
||||
sample_width=2, # latent width: 2 -> final width: 16
|
||||
sample_height=2, # latent height: 2 -> final height: 16
|
||||
sample_frames=9, # latent frames: (9 - 1) / 4 + 1 = 3 -> final frames: 9
|
||||
patch_size=2,
|
||||
temporal_compression_ratio=4,
|
||||
@@ -285,10 +286,6 @@ class CogVideoXVideoToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
"VAE tiling should not affect the inference results",
|
||||
)
|
||||
|
||||
@unittest.skip("xformers attention processor does not exist for CogVideoX")
|
||||
def test_xformers_attention_forwardGenerator_pass(self):
|
||||
pass
|
||||
|
||||
def test_fused_qkv_projections(self):
|
||||
device = "cpu" # ensure determinism for the device-dependent torch.Generator
|
||||
components = self.get_dummy_components()
|
||||
|
||||
Reference in New Issue
Block a user