diff --git a/src/diffusers/modular_pipelines/wan/modular_blocks.py b/src/diffusers/modular_pipelines/wan/modular_blocks.py index 6894d15d0f..dea61c227c 100644 --- a/src/diffusers/modular_pipelines/wan/modular_blocks.py +++ b/src/diffusers/modular_pipelines/wan/modular_blocks.py @@ -28,6 +28,21 @@ from .encoders import WanImageEncoderStep, WanTextEncoderStep, WanVaeEncoderStep logger = logging.get_logger(__name__) # pylint: disable=invalid-name +class WanAutoImageEncoderStep(AutoPipelineBlocks): + block_classes = [WanImageEncoderStep] + block_names = ["image_encoder"] + block_trigger_inputs = ["image"] + + @property + def description(self): + return ( + "Image encoder step that encodes the image inputs into a conditioning embedding.\n" + + "This is an auto pipeline block that works for both first-frame and first-last-frame conditioning tasks.\n" + + " - `WanImageEncoderStep` (image_encoder) is used when `image`, and possibly `last_image` is provided." + + " - if `image` is not provided, this step will be skipped." + ) + + class WanAutoVaeEncoderStep(AutoPipelineBlocks): block_classes = [WanVaeEncoderStep] block_names = ["img2vid"] @@ -39,7 +54,7 @@ class WanAutoVaeEncoderStep(AutoPipelineBlocks): "Vae encoder step that encode the image inputs into their latent representations.\n" + "This is an auto pipeline block that works for both first-frame and first-last-frame conditioning tasks.\n" + " - `WanVaeEncoderStep` (img2vid) is used when `image`, and possibly `last_image` is provided." - + " - if `image` is provided, this step will be skipped." + + " - if `image` is not provided, this step will be skipped." ) @@ -215,7 +230,7 @@ IMAGE2VIDEO_BLOCKS = InsertableDict( AUTO_BLOCKS = InsertableDict( [ ("text_encoder", WanTextEncoderStep), - ("image_encoder", WanImageEncoderStep), + ("image_encoder", WanAutoImageEncoderStep), ("vae_encoder", WanAutoVaeEncoderStep), ("before_denoise", WanAutoBeforeDenoiseStep), ("denoise", WanAutoDenoiseStep),