make style

2026-01-27 17:22:53 +03:00 · 2026-01-19 09:27:40 +01:00
parent 8d45ff5bf6
commit f056af1fbb
10 changed files with 497 additions and 432 deletions
--- a/src/diffusers/modular_pipelines/modular_pipeline_utils.py
+++ b/src/diffusers/modular_pipelines/modular_pipeline_utils.py
@@ -438,7 +438,7 @@ INPUT_PARAM_TEMPLATES = {
        "description": "Number of layers to extract from the image",
    },
    # common intermediate inputs
-    "prompt_embeds":{
+    "prompt_embeds": {
        "type_hint": torch.Tensor,
        "required": True,
        "description": "text embeddings used to guide the image generation. Can be generated from text_encoder step.",
@@ -531,16 +531,16 @@ class InputParam:
            raise ValueError(f"InputParam template for {template_name} not found")

        template_kwargs = INPUT_PARAM_TEMPLATES[template_name].copy()
-        
+
        # Determine the actual param name:
        # 1. From overrides if provided
        # 2. From template if present
        # 3. Fall back to template_name
        name = overrides.pop("name", template_kwargs.pop("name", template_name))
-        
+
        if note and "description" in template_kwargs:
            template_kwargs["description"] = f"{template_kwargs['description']} ({note})"
-        
+
        template_kwargs.update(overrides)
        return cls(name=name, **template_kwargs)

@@ -564,18 +564,18 @@ class OutputParam:
        """Get template for name if exists, otherwise raise ValueError."""
        if template_name not in OUTPUT_PARAM_TEMPLATES:
            raise ValueError(f"OutputParam template for {template_name} not found")
-        
+
        template_kwargs = OUTPUT_PARAM_TEMPLATES[template_name].copy()
-        
+
        # Determine the actual param name:
        # 1. From overrides if provided
        # 2. From template if present
        # 3. Fall back to template_name
        name = overrides.pop("name", template_kwargs.pop("name", template_name))
-        
+
        if note and "description" in template_kwargs:
            template_kwargs["description"] = f"{template_kwargs['description']} ({note})"
-        
+
        template_kwargs.update(overrides)
        return cls(name=name, **template_kwargs)

@@ -913,4 +913,4 @@ def make_doc_string(
    output += "\n\n"
    output += format_output_params(outputs, indent_level=2)

-    return output
+    return output
--- a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
+++ b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
@@ -117,6 +117,7 @@ def get_timesteps(scheduler, num_inference_steps, strength):
 # 1. PREPARE LATENTS
 # ====================

+
 # auto_docstring
 class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
    """
@@ -137,8 +138,8 @@ class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
          generator (`Generator`, *optional*):
              Torch generator for deterministic generation.
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          dtype (`dtype`, *optional*, defaults to torch.float32):
              The dtype of the model inputs, can be generated in input step.

@@ -150,6 +151,7 @@ class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
          latents (`Tensor`):
              The initial latents to use for the denoising process
    """
+
    model_name = "qwenimage"

    @property
@@ -254,8 +256,8 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
          generator (`Generator`, *optional*):
              Torch generator for deterministic generation.
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          dtype (`dtype`, *optional*, defaults to torch.float32):
              The dtype of the model inputs, can be generated in input step.

@@ -267,6 +269,7 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
          latents (`Tensor`):
              The initial latents to use for the denoising process
    """
+
    model_name = "qwenimage-layered"

    @property
@@ -353,7 +356,8 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
    """
-    Step that adds noise to image latents for image-to-image/inpainting. Should be run after set_timesteps, prepare_latents. Both noise and image latents should alreadybe patchified.
+    Step that adds noise to image latents for image-to-image/inpainting. Should be run after set_timesteps,
+    prepare_latents. Both noise and image latents should alreadybe patchified.

      Components:
          scheduler (`FlowMatchEulerDiscreteScheduler`)
@@ -362,8 +366,8 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
          latents (`Tensor`):
              The initial random noised, can be generated in prepare latent step.
          image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
-              vae encoder and updated in input step.)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
+              generated from vae encoder and updated in input step.)
          timesteps (`Tensor`):
              The timesteps to use for the denoising process. Can be generated in set_timesteps step.

@@ -373,6 +377,7 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
          latents (`Tensor`):
              The scaled noisy latents to use for inpainting/image-to-image denoising.
    """
+
    model_name = "qwenimage"

    @property
@@ -396,10 +401,10 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
            ),
            InputParam.template("image_latents", note="Can be generated from vae encoder and updated in input step."),
            InputParam(
-                name="timesteps", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+                name="timesteps",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
            ),
        ]

@@ -475,6 +480,7 @@ class QwenImageCreateMaskLatentsStep(ModularPipelineBlocks):
          mask (`Tensor`):
              The mask to use for the inpainting process.
    """
+
    model_name = "qwenimage"

    @property
@@ -541,10 +547,12 @@ class QwenImageCreateMaskLatentsStep(ModularPipelineBlocks):
 # 2. SET TIMESTEPS
 # ====================

+
 # auto_docstring
 class QwenImageSetTimestepsStep(ModularPipelineBlocks):
    """
-    Step that sets the the scheduler's timesteps for text-to-image generation. Should be run after prepare latents step.
+    Step that sets the the scheduler's timesteps for text-to-image generation. Should be run after prepare latents
+    step.

      Components:
          scheduler (`FlowMatchEulerDiscreteScheduler`)
@@ -561,6 +569,7 @@ class QwenImageSetTimestepsStep(ModularPipelineBlocks):
          timesteps (`Tensor`):
              The timesteps to use for the denoising process
    """
+
    model_name = "qwenimage"

    @property
@@ -579,10 +588,10 @@ class QwenImageSetTimestepsStep(ModularPipelineBlocks):
            InputParam.template("num_inference_steps"),
            InputParam.template("sigmas"),
            InputParam(
-                name="latents", 
+                name="latents",
                required=True,
                type_hint=torch.Tensor,
-                description="The initial random noised latents for the denoising process. Can be generated in prepare latents step."
+                description="The initial random noised latents for the denoising process. Can be generated in prepare latents step.",
            ),
        ]

@@ -640,13 +649,14 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
          sigmas (`List`, *optional*):
              Custom sigmas for the denoising process.
          image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
-              vae encoder and packed in input step.)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
+              generated from vae encoder and packed in input step.)

      Outputs:
          timesteps (`Tensor`):
              The timesteps to use for the denoising process.
    """
+
    model_name = "qwenimage-layered"

    @property
@@ -671,9 +681,7 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
    def intermediate_outputs(self) -> List[OutputParam]:
        return [
            OutputParam(
-                name="timesteps", 
-                type_hint=torch.Tensor, 
-                description="The timesteps to use for the denoising process."
+                name="timesteps", type_hint=torch.Tensor, description="The timesteps to use for the denoising process."
            ),
        ]

@@ -711,7 +719,8 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
    """
-    Step that sets the the scheduler's timesteps for image-to-image generation, and inpainting. Should be run after prepare latents step.
+    Step that sets the the scheduler's timesteps for image-to-image generation, and inpainting. Should be run after
+    prepare latents step.

      Components:
          scheduler (`FlowMatchEulerDiscreteScheduler`)
@@ -732,6 +741,7 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
          num_inference_steps (`int`):
              The number of denoising steps to perform at inference time. Updated based on strength.
    """
+
    model_name = "qwenimage"

    @property
@@ -750,10 +760,10 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
            InputParam.template("num_inference_steps"),
            InputParam.template("sigmas"),
            InputParam(
-                "latents", 
-                required=True, 
+                "latents",
+                required=True,
                type_hint=torch.Tensor,
-                description="The latents to use for the denoising process. Can be generated in prepare latents step."
+                description="The latents to use for the denoising process. Can be generated in prepare latents step.",
            ),
            InputParam.template("strength", default=0.9),
        ]
@@ -815,6 +825,7 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):

 ## RoPE inputs for denoiser

+
 # auto_docstring
 class QwenImageRoPEInputsStep(ModularPipelineBlocks):
    """
@@ -822,8 +833,8 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):

      Inputs:
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          height (`int`):
              The height in pixels of the generated image.
          width (`int`):
@@ -841,6 +852,7 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
          negative_txt_seq_lens (`List`):
              The sequence lengths of the negative prompt embeds, used for RoPE calculation
    """
+
    model_name = "qwenimage"

    @property
@@ -911,12 +923,13 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
    """
-    Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be placed after prepare_latents step
+    Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be placed after
+    prepare_latents step

      Inputs:
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          image_height (`int`):
              The height of the reference image. Can be generated in input step.
          image_width (`int`):
@@ -938,6 +951,7 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
          negative_txt_seq_lens (`List`):
              The sequence lengths of the negative prompt embeds, used for RoPE calculation
    """
+
    model_name = "qwenimage"

    @property
@@ -948,8 +962,18 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam.template("batch_size"),
-            InputParam(name="image_height", required=True, type_hint=int, description="The height of the reference image. Can be generated in input step."),
-            InputParam(name="image_width", required=True, type_hint=int, description="The width of the reference image. Can be generated in input step."),
+            InputParam(
+                name="image_height",
+                required=True,
+                type_hint=int,
+                description="The height of the reference image. Can be generated in input step.",
+            ),
+            InputParam(
+                name="image_width",
+                required=True,
+                type_hint=int,
+                description="The width of the reference image. Can be generated in input step.",
+            ),
            InputParam.template("height", required=True),
            InputParam.template("width", required=True),
            InputParam.template("prompt_embeds_mask"),
@@ -1016,13 +1040,13 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
 class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
    """
    Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit Plus.
-      Unlike Edit, Edit Plus handles lists of image_height/image_width for multiple reference images.
-      Should be placed after prepare_latents step.
+      Unlike Edit, Edit Plus handles lists of image_height/image_width for multiple reference images. Should be placed
+      after prepare_latents step.

      Inputs:
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          image_height (`List`):
              The heights of the reference images. Can be generated in input step.
          image_width (`List`):
@@ -1044,6 +1068,7 @@ class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
          negative_txt_seq_lens (`List`):
              The sequence lengths of the negative prompt embeds, used for RoPE calculation
    """
+
    model_name = "qwenimage-edit-plus"

    @property
@@ -1058,8 +1083,18 @@ class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam.template("batch_size"),
-            InputParam(name="image_height", required=True, type_hint=List[int], description="The heights of the reference images. Can be generated in input step."),
-            InputParam(name="image_width", required=True, type_hint=List[int], description="The widths of the reference images. Can be generated in input step."),
+            InputParam(
+                name="image_height",
+                required=True,
+                type_hint=List[int],
+                description="The heights of the reference images. Can be generated in input step.",
+            ),
+            InputParam(
+                name="image_width",
+                required=True,
+                type_hint=List[int],
+                description="The widths of the reference images. Can be generated in input step.",
+            ),
            InputParam.template("height", required=True),
            InputParam.template("width", required=True),
            InputParam.template("prompt_embeds_mask"),
@@ -1126,8 +1161,8 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):

      Inputs:
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          layers (`int`, *optional*, defaults to 4):
              Number of layers to extract from the image
          height (`int`):
@@ -1149,6 +1184,7 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):
          additional_t_cond (`Tensor`):
              The additional t cond, used for RoPE calculation
    """
+
    model_name = "qwenimage-layered"

    @property
@@ -1231,6 +1267,7 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):

 ## ControlNet inputs for denoiser

+
 # auto_docstring
 class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
    """
@@ -1247,7 +1284,8 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
          controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
              Scale for ControlNet conditioning.
          control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
          timesteps (`Tensor`):
              The timesteps to use for the denoising process. Can be generated in set_timesteps step.

@@ -1255,6 +1293,7 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
          controlnet_keep (`List`):
              The controlnet keep values
    """
+
    model_name = "qwenimage"

    @property
@@ -1274,16 +1313,16 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
            InputParam.template("control_guidance_end"),
            InputParam.template("controlnet_conditioning_scale"),
            InputParam(
-                name="control_image_latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step."
+                name="control_image_latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.",
            ),
            InputParam(
-                name="timesteps", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+                name="timesteps",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
            ),
        ]

--- a/src/diffusers/modular_pipelines/qwenimage/decoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/decoders.py
@@ -30,10 +30,12 @@ logger = logging.get_logger(__name__)

 # after denoising loop (unpack latents)

-#auto_docstring
+
+# auto_docstring
 class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
    """
-    Step that unpack the latents from 3D tensor (batch_size, sequence_length, channels) into 5D tensor (batch_size, channels, 1, height, width)
+    Step that unpack the latents from 3D tensor (batch_size, sequence_length, channels) into 5D tensor (batch_size,
+    channels, 1, height, width)

      Components:
          pachifier (`QwenImagePachifier`)
@@ -50,6 +52,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
          latents (`Tensor`):
              The denoisedlatents unpacked to B, C, 1, H, W
    """
+
    model_name = "qwenimage"

    @property
@@ -70,10 +73,10 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
            InputParam.template("height", required=True),
            InputParam.template("width", required=True),
            InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The latents to decode, can be generated in the denoise step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The latents to decode, can be generated in the denoise step.",
            ),
        ]

@@ -81,9 +84,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
    def intermediate_outputs(self) -> List[OutputParam]:
        return [
            OutputParam(
-                name="latents", 
-                type_hint=torch.Tensor, 
-                description="The denoisedlatents unpacked to B, C, 1, H, W"
+                name="latents", type_hint=torch.Tensor, description="The denoisedlatents unpacked to B, C, 1, H, W"
            ),
        ]

@@ -100,7 +101,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
        return components, state


-#auto_docstring
+# auto_docstring
 class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
    """
    Unpack latents from (B, seq, C*4) to (B, C, layers+1, H, W) after denoising.
@@ -122,6 +123,7 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
          latents (`Tensor`):
              Denoised latents. (unpacked to B, C, layers+1, H, W)
    """
+
    model_name = "qwenimage-layered"

    @property
@@ -138,10 +140,10 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The denoised latents to decode, can be generated in the denoise step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The denoised latents to decode, can be generated in the denoise step.",
            ),
            InputParam.template("height", required=True),
            InputParam.template("width", required=True),
@@ -173,7 +175,8 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):

 # decode step

-#auto_docstring
+
+# auto_docstring
 class QwenImageDecoderStep(ModularPipelineBlocks):
    """
    Step that decodes the latents to images
@@ -183,12 +186,14 @@ class QwenImageDecoderStep(ModularPipelineBlocks):

      Inputs:
          latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.

      Outputs:
          images (`List`):
              Generated images. (tensor output of the vae decoder.)
    """
+
    model_name = "qwenimage"

    @property
@@ -207,10 +212,10 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.",
            ),
        ]

@@ -246,18 +251,18 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
        return components, state


-#auto_docstring
+# auto_docstring
 class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
    """
    Decode unpacked latents (B, C, layers+1, H, W) into layer images.

      Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)

      Inputs:
          latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.

@@ -265,6 +270,7 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
          images (`List`):
              Generated images.
    """
+
    model_name = "qwenimage-layered"

    @property
@@ -287,10 +293,10 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.",
            ),
            InputParam.template("output_type"),
        ]
@@ -345,7 +351,8 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):

 # postprocess the decoded images

-#auto_docstring
+
+# auto_docstring
 class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
    """
    postprocess the generated image
@@ -363,6 +370,7 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
          images (`List`):
              Generated images.
    """
+
    model_name = "qwenimage"

    @property
@@ -384,10 +392,10 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="images", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="the generated image tensor from decoders step"
+                name="images",
+                required=True,
+                type_hint=torch.Tensor,
+                description="the generated image tensor from decoders step",
            ),
            InputParam.template("output_type"),
        ]
@@ -416,7 +424,7 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
        return components, state


-#auto_docstring
+# auto_docstring
 class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
    """
    postprocess the generated image, optional apply the mask overally to the original image..
@@ -430,12 +438,14 @@ class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.
          mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.

      Outputs:
          images (`List`):
              Generated images.
    """
+
    model_name = "qwenimage"

    @property
@@ -457,16 +467,17 @@ class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="images", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="the generated image tensor from decoders step"
+                name="images",
+                required=True,
+                type_hint=torch.Tensor,
+                description="the generated image tensor from decoders step",
            ),
            InputParam.template("output_type"),
            InputParam(
-                name="mask_overlay_kwargs", 
+                name="mask_overlay_kwargs",
                type_hint=Dict[str, Any],
-                description="The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep."),
+                description="The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.",
+            ),
        ]

    @property
--- a/src/diffusers/modular_pipelines/qwenimage/denoise.py
+++ b/src/diffusers/modular_pipelines/qwenimage/denoise.py
@@ -50,10 +50,10 @@ class QwenImageLoopBeforeDenoiser(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step.",
            ),
        ]

@@ -80,10 +80,10 @@ class QwenImageEditLoopBeforeDenoiser(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step.",
            ),
            InputParam.template("image_latents"),
        ]
@@ -131,10 +131,10 @@ class QwenImageLoopBeforeDenoiserControlNet(ModularPipelineBlocks):
            ),
            InputParam.template("controlnet_conditioning_scale", note="updated in prepare_controlnet_inputs step."),
            InputParam(
-                name="controlnet_keep", 
-                required=True, 
-                type_hint=List[float], 
-                description="The controlnet keep values. Can be generated in prepare_controlnet_inputs step."
+                name="controlnet_keep",
+                required=True,
+                type_hint=List[float],
+                description="The controlnet keep values. Can be generated in prepare_controlnet_inputs step.",
            ),
        ]

@@ -467,10 +467,10 @@ class QwenImageDenoiseLoopWrapper(LoopSequentialPipelineBlocks):
    def loop_inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="timesteps", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+                name="timesteps",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
            ),
            InputParam.template("num_inference_steps", required=True),
        ]
@@ -505,21 +505,21 @@ class QwenImageDenoiseLoopWrapper(LoopSequentialPipelineBlocks):

 # Qwen Image (text2image, image2image)

+
 # auto_docstring
 class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
    """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
       - `QwenImageLoopBeforeDenoiser`
       - `QwenImageLoopDenoiser`
       - `QwenImageLoopAfterDenoiser`
      This block supports text2image and image2image tasks for QwenImage.

      Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)

      Inputs:
          timesteps (`Tensor`):
@@ -539,6 +539,7 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
          latents (`Tensor`):
              Denoised latents.
    """
+
    model_name = "qwenimage"

    block_classes = [
@@ -551,8 +552,8 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
    @property
    def description(self) -> str:
        return (
-            "Denoise step that iteratively denoise the latents. \n"
-            "Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method \n"
+            "Denoise step that iteratively denoise the latents.\n"
+            "Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method\n"
            "At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
            " - `QwenImageLoopBeforeDenoiser`\n"
            " - `QwenImageLoopDenoiser`\n"
@@ -565,9 +566,9 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
    """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
       - `QwenImageLoopBeforeDenoiser`
       - `QwenImageLoopDenoiser`
       - `QwenImageLoopAfterDenoiser`
@@ -575,9 +576,8 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
      This block supports inpainting tasks for QwenImage.

      Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)

      Inputs:
          timesteps (`Tensor`):
@@ -603,6 +603,7 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
          latents (`Tensor`):
              Denoised latents.
    """
+
    model_name = "qwenimage"
    block_classes = [
        QwenImageLoopBeforeDenoiser,
@@ -630,9 +631,9 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
    """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
       - `QwenImageLoopBeforeDenoiser`
       - `QwenImageLoopBeforeDenoiserControlNet`
       - `QwenImageLoopDenoiser`
@@ -640,10 +641,8 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
      This block supports text2img/img2img tasks with controlnet for QwenImage.

      Components:
-          guider (`ClassifierFreeGuidance`)
-          controlnet (`QwenImageControlNetModel`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) controlnet (`QwenImageControlNetModel`) transformer
+          (`QwenImageTransformer2DModel`) scheduler (`FlowMatchEulerDiscreteScheduler`)

      Inputs:
          timesteps (`Tensor`):
@@ -669,6 +668,7 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
          latents (`Tensor`):
              Denoised latents.
    """
+
    model_name = "qwenimage"
    block_classes = [
        QwenImageLoopBeforeDenoiser,
@@ -696,9 +696,9 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
    """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
       - `QwenImageLoopBeforeDenoiser`
       - `QwenImageLoopBeforeDenoiserControlNet`
       - `QwenImageLoopDenoiser`
@@ -707,10 +707,8 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
      This block supports inpainting tasks with controlnet for QwenImage.

      Components:
-          guider (`ClassifierFreeGuidance`)
-          controlnet (`QwenImageControlNetModel`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) controlnet (`QwenImageControlNetModel`) transformer
+          (`QwenImageTransformer2DModel`) scheduler (`FlowMatchEulerDiscreteScheduler`)

      Inputs:
          timesteps (`Tensor`):
@@ -742,6 +740,7 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
          latents (`Tensor`):
              Denoised latents.
    """
+
    model_name = "qwenimage"
    block_classes = [
        QwenImageLoopBeforeDenoiser,
@@ -777,18 +776,17 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
    """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
       - `QwenImageEditLoopBeforeDenoiser`
       - `QwenImageEditLoopDenoiser`
       - `QwenImageLoopAfterDenoiser`
      This block supports QwenImage Edit.

      Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)

      Inputs:
          timesteps (`Tensor`):
@@ -810,6 +808,7 @@ class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
          latents (`Tensor`):
              Denoised latents.
    """
+
    model_name = "qwenimage-edit"
    block_classes = [
        QwenImageEditLoopBeforeDenoiser,
@@ -835,9 +834,9 @@ class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
    """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
       - `QwenImageEditLoopBeforeDenoiser`
       - `QwenImageEditLoopDenoiser`
       - `QwenImageLoopAfterDenoiser`
@@ -845,9 +844,8 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
      This block supports inpainting tasks for QwenImage Edit.

      Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)

      Inputs:
          timesteps (`Tensor`):
@@ -873,6 +871,7 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
          latents (`Tensor`):
              Denoised latents.
    """
+
    model_name = "qwenimage-edit"
    block_classes = [
        QwenImageEditLoopBeforeDenoiser,
@@ -900,18 +899,17 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageLayeredDenoiseStep(QwenImageDenoiseLoopWrapper):
    """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
       - `QwenImageEditLoopBeforeDenoiser`
       - `QwenImageEditLoopDenoiser`
       - `QwenImageLoopAfterDenoiser`
      This block supports QwenImage Layered.

      Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)

      Inputs:
          timesteps (`Tensor`):
@@ -933,6 +931,7 @@ class QwenImageLayeredDenoiseStep(QwenImageDenoiseLoopWrapper):
          latents (`Tensor`):
              Denoised latents.
    """
+
    model_name = "qwenimage-layered"
    block_classes = [
        QwenImageEditLoopBeforeDenoiser,
--- a/src/diffusers/modular_pipelines/qwenimage/encoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -30,7 +30,7 @@ from ...pipelines.qwenimage.pipeline_qwenimage_edit import calculate_dimensions
 from ...utils import logging
 from ...utils.torch_utils import unwrap_module
 from ..modular_pipeline import ModularPipelineBlocks, PipelineState
-from ..modular_pipeline_utils import ComponentSpec, ConfigSpec, InputParam, OutputParam
+from ..modular_pipeline_utils import ComponentSpec, InputParam, OutputParam
 from .modular_pipeline import QwenImageModularPipeline
 from .prompt_templates import (
    QWENIMAGE_EDIT_PLUS_IMG_TEMPLATE,
@@ -277,6 +277,7 @@ def encode_vae_image(
 # In most of our other pipelines, resizing is done as part of the image preprocessing step.
 # ====================

+
 # auto_docstring
 class QwenImageEditResizeStep(ModularPipelineBlocks):
    """
@@ -293,8 +294,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
          resized_image (`List`):
              The resized images
    """
-    model_name = "qwenimage-edit"

+    model_name = "qwenimage-edit"

    @property
    def description(self) -> str:
@@ -319,8 +320,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
    def intermediate_outputs(self) -> List[OutputParam]:
        return [
            OutputParam(
-                name="resized_image", 
-                type_hint=List[PIL.Image.Image], 
+                name="resized_image",
+                type_hint=List[PIL.Image.Image],
                description="The resized images",
            ),
        ]
@@ -353,7 +354,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageLayeredResizeStep(ModularPipelineBlocks):
    """
-    Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio.
+    Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while
+    maintaining the aspect ratio.

      Components:
          image_resize_processor (`VaeImageProcessor`)
@@ -368,11 +370,12 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):
          resized_image (`List`):
              The resized images
    """
+
    model_name = "qwenimage-layered"

    @property
    def description(self) -> str:
-        return f"Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio."
+        return "Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio."

    @property
    def expected_components(self) -> List[ComponentSpec]:
@@ -399,11 +402,13 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):

    @property
    def intermediate_outputs(self) -> List[OutputParam]:
-        return [OutputParam(
-            name="resized_image", 
-            type_hint=List[PIL.Image.Image], 
-            description="The resized images",
-        )]
+        return [
+            OutputParam(
+                name="resized_image",
+                type_hint=List[PIL.Image.Image],
+                description="The resized images",
+            )
+        ]

    @staticmethod
    def check_inputs(resolution: int):
@@ -442,8 +447,8 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):
 class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
    """
    Resize images for QwenImage Edit Plus pipeline.
-      Produces two outputs: resized_image (1024x1024) for VAE encoding, resized_cond_image (384x384) for VL text encoding.
-      Each image is resized independently based on its own aspect ratio.
+      Produces two outputs: resized_image (1024x1024) for VAE encoding, resized_cond_image (384x384) for VL text
+      encoding. Each image is resized independently based on its own aspect ratio.

      Components:
          image_resize_processor (`VaeImageProcessor`)
@@ -484,7 +489,7 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        # image
-        return [InputParam.template("image")] 
+        return [InputParam.template("image")]

    @property
    def intermediate_outputs(self) -> List[OutputParam]:
@@ -518,13 +523,11 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
        resized_cond_images = []
        for image in images:
            image_width, image_height = image.size
-            
+
            # For VAE encoder (1024x1024 target area)
            vae_width, vae_height, _ = calculate_dimensions(1024 * 1024, image_width / image_height)
-            resized_images.append(
-                components.image_resize_processor.resize(image, height=vae_height, width=vae_width)
-            )
-            
+            resized_images.append(components.image_resize_processor.resize(image, height=vae_height, width=vae_width))
+
            # For VL text encoder (384x384 target area)
            vl_width, vl_height, _ = calculate_dimensions(384 * 384, image_width / image_height)
            resized_cond_images.append(
@@ -541,16 +544,16 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
 # 2. GET IMAGE PROMPT
 # ====================

+
 # auto_docstring
 class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
    """
    Auto-caption step that generates a text prompt from the input image if none is provided.
-      Uses the VL model (text_encoder) to generate a description of the image.
-      If prompt is already provided, this step passes through unchanged.
+      Uses the VL model (text_encoder) to generate a description of the image. If prompt is already provided, this step
+      passes through unchanged.

      Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`)

      Inputs:
          prompt (`str`, *optional*):
@@ -590,7 +593,9 @@ class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam.template("prompt", required=False), # it is not required for qwenimage-layered, unlike other pipelines
+            InputParam.template(
+                "prompt", required=False
+            ),  # it is not required for qwenimage-layered, unlike other pipelines
            InputParam(
                name="resized_image",
                required=True,
@@ -653,15 +658,15 @@ class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
 # 3. TEXT ENCODER
 # ====================

+
 # auto_docstring
 class QwenImageTextEncoderStep(ModularPipelineBlocks):
    """
    Text Encoder step that generates text embeddings to guide the image generation.

      Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
+          The tokenizer to use guider (`ClassifierFreeGuidance`)

      Inputs:
          prompt (`str`):
@@ -681,6 +686,7 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
          negative_prompt_embeds_mask (`Tensor`):
              The negative prompt embeddings mask.
    """
+
    model_name = "qwenimage"

    def __init__(self):
@@ -706,7 +712,6 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
            ),
        ]

-
    @property
    def inputs(self) -> List[InputParam]:
        return [
@@ -786,12 +791,12 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
    """
-    Text Encoder step that processes both prompt and image together to generate text embeddings for guiding image generation.
+    Text Encoder step that processes both prompt and image together to generate text embeddings for guiding image
+    generation.

      Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`) guider
+          (`ClassifierFreeGuidance`)

      Inputs:
          prompt (`str`):
@@ -811,6 +816,7 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
          negative_prompt_embeds_mask (`Tensor`):
              The negative prompt embeddings mask.
    """
+
    model_name = "qwenimage"

    def __init__(self):
@@ -835,7 +841,6 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
            ),
        ]

-
    @property
    def inputs(self) -> List[InputParam]:
        return [
@@ -909,12 +914,12 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
    """
-    Text Encoder step for QwenImage Edit Plus that processes prompt and multiple images together to generate text embeddings for guiding image generation.
+    Text Encoder step for QwenImage Edit Plus that processes prompt and multiple images together to generate text
+    embeddings for guiding image generation.

      Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`) guider
+          (`ClassifierFreeGuidance`)

      Inputs:
          prompt (`str`):
@@ -922,7 +927,8 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
          negative_prompt (`str`, *optional*):
              The prompt or prompts not to guide the image generation.
          resized_cond_image (`Tensor`):
-              The image(s) to encode, can be a single image or list of images, should be resized to 384x384 using resize step
+              The image(s) to encode, can be a single image or list of images, should be resized to 384x384 using
+              resize step

      Outputs:
          prompt_embeds (`Tensor`):
@@ -963,7 +969,6 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
            ),
        ]

-
    @property
    def inputs(self) -> List[InputParam]:
        return [
@@ -1042,10 +1047,12 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
 # 4. IMAGE PREPROCESS
 # ====================

+
 # auto_docstring
 class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
    """
-    Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images will be resized to the given height and width.
+    Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images will be
+    resized to the given height and width.

      Components:
          image_mask_processor (`InpaintProcessor`)
@@ -1070,6 +1077,7 @@ class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
          mask_overlay_kwargs (`Dict`):
              The kwargs for the postprocess step to apply the mask overlay
    """
+
    model_name = "qwenimage"

    @property
@@ -1152,7 +1160,8 @@ class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
    """
-    Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images should be resized first.
+    Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images should be
+    resized first.

      Components:
          image_mask_processor (`InpaintProcessor`)
@@ -1173,6 +1182,7 @@ class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
          mask_overlay_kwargs (`Dict`):
              The kwargs for the postprocess step to apply the mask overlay
    """
+
    model_name = "qwenimage-edit"

    @property
@@ -1206,11 +1216,7 @@ class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
    @property
    def intermediate_outputs(self) -> List[OutputParam]:
        return [
-            OutputParam(
-                name="processed_image", 
-                type_hint=torch.Tensor, 
-                description="The processed image"
-            ),
+            OutputParam(name="processed_image", type_hint=torch.Tensor, description="The processed image"),
            OutputParam(
                name="processed_mask_image",
                type_hint=torch.Tensor,
@@ -1263,6 +1269,7 @@ class QwenImageProcessImagesInputStep(ModularPipelineBlocks):
          processed_image (`Tensor`):
              The processed image
    """
+
    model_name = "qwenimage"

    @property
@@ -1290,11 +1297,13 @@ class QwenImageProcessImagesInputStep(ModularPipelineBlocks):

    @property
    def intermediate_outputs(self) -> List[OutputParam]:
-        return [OutputParam(
-            name="processed_image",
-            type_hint=torch.Tensor,
-            description="The processed image",
-        )]
+        return [
+            OutputParam(
+                name="processed_image",
+                type_hint=torch.Tensor,
+                description="The processed image",
+            )
+        ]

    @staticmethod
    def check_inputs(height, width, vae_scale_factor):
@@ -1340,6 +1349,7 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
          processed_image (`Tensor`):
              The processed image
    """
+
    model_name = "qwenimage-edit"

    @property
@@ -1361,7 +1371,7 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(
-                name="resized_image", 
+                name="resized_image",
                required=True,
                type_hint=List[PIL.Image.Image],
                description="The resized image. should be generated using a resize step",
@@ -1370,11 +1380,13 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):

    @property
    def intermediate_outputs(self) -> List[OutputParam]:
-        return [OutputParam(
-            name="processed_image",
-            type_hint=torch.Tensor,
-            description="The processed image",
-        )]
+        return [
+            OutputParam(
+                name="processed_image",
+                type_hint=torch.Tensor,
+                description="The processed image",
+            )
+        ]

    @torch.no_grad()
    def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
@@ -1395,7 +1407,8 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
    """
-    Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of processed images.
+    Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of
+    processed images.

      Components:
          image_processor (`VaeImageProcessor`)
@@ -1408,6 +1421,7 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
          processed_image (`Tensor`):
              The processed image
    """
+
    model_name = "qwenimage-edit-plus"

    @property
@@ -1427,20 +1441,24 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):

    @property
    def inputs(self) -> List[InputParam]:
-        return [InputParam(
-            name="resized_image",
-            required=True,
-            type_hint=List[PIL.Image.Image],
-            description="The resized image. should be generated using a resize step",
-        )]
+        return [
+            InputParam(
+                name="resized_image",
+                required=True,
+                type_hint=List[PIL.Image.Image],
+                description="The resized image. should be generated using a resize step",
+            )
+        ]

    @property
    def intermediate_outputs(self) -> List[OutputParam]:
-        return [OutputParam(
-            name="processed_image",
-            type_hint=torch.Tensor,
-            description="The processed image",
-        )]
+        return [
+            OutputParam(
+                name="processed_image",
+                type_hint=torch.Tensor,
+                description="The processed image",
+            )
+        ]

    @torch.no_grad()
    def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
@@ -1472,6 +1490,7 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
 # 5. VAE ENCODER
 # ====================

+
 # auto_docstring
 class QwenImageVaeEncoderStep(ModularPipelineBlocks):
    """
@@ -1509,7 +1528,9 @@ class QwenImageVaeEncoderStep(ModularPipelineBlocks):
            output (OutputParam, optional): Output parameter for the image latents. Defaults to "image_latents".
        """
        if input is None:
-            input = InputParam(name="processed_image", required=True, type_hint=torch.Tensor, description="The image tensor to encode")
+            input = InputParam(
+                name="processed_image", required=True, type_hint=torch.Tensor, description="The image tensor to encode"
+            )

        if output is None:
            output = OutputParam.template("image_latents")
@@ -1539,13 +1560,13 @@ class QwenImageVaeEncoderStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            self._input, # default is "processed_image"
+            self._input,  # default is "processed_image"
            InputParam.template("generator"),
        ]

    @property
    def intermediate_outputs(self) -> List[OutputParam]:
-        return [self._output] # default is "image_latents"
+        return [self._output]  # default is "image_latents"

    @torch.no_grad()
    def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -> PipelineState:
@@ -1588,9 +1609,8 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
    VAE Encoder step that converts `control_image` into latent representations control_image_latents.

      Components:
-          vae (`AutoencoderKLQwenImage`)
-          controlnet (`QwenImageControlNetModel`)
-          control_image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) controlnet (`QwenImageControlNetModel`) control_image_processor
+          (`VaeImageProcessor`)

      Inputs:
          control_image (`Image`):
@@ -1606,6 +1626,7 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
          control_image_latents (`Tensor`):
              The latents representing the control image
    """
+
    model_name = "qwenimage"

    @property
@@ -1720,6 +1741,7 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
 # 6. PERMUTE LATENTS
 # ====================

+
 # auto_docstring
 class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
    """
@@ -1733,11 +1755,12 @@ class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
          image_latents (`Tensor`):
              The latent representation of the input image. (permuted from [B, C, 1, H, W] to [B, 1, C, H, W])
    """
+
    model_name = "qwenimage-layered"

    @property
    def description(self) -> str:
-        return f"Permute image latents from (B, C, 1, H, W) to (B, 1, C, H, W) for Layered packing."
+        return "Permute image latents from (B, C, 1, H, W) to (B, 1, C, H, W) for Layered packing."

    @property
    def inputs(self) -> List[InputParam]:
@@ -1760,4 +1783,4 @@ class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
        block_state.image_latents = latents.permute(0, 2, 1, 3, 4)

        self.set_block_state(state, block_state)
-        return components, state
+        return components, state
--- a/src/diffusers/modular_pipelines/qwenimage/inputs.py
+++ b/src/diffusers/modular_pipelines/qwenimage/inputs.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from typing import List, Tuple, Optional
+from typing import List, Optional, Tuple

 import torch

@@ -117,7 +117,8 @@ class QwenImageTextInputsStep(ModularPipelineBlocks):
        1. Determines `batch_size` and `dtype` based on `prompt_embeds`
        2. Ensures all text embeddings have consistent batch sizes (batch_size * num_images_per_prompt)

-      This block should be placed after all encoder steps to process the text embeddings before they are used in subsequent pipeline steps.
+      This block should be placed after all encoder steps to process the text embeddings before they are used in
+      subsequent pipeline steps.

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -145,6 +146,7 @@ class QwenImageTextInputsStep(ModularPipelineBlocks):
          negative_prompt_embeds_mask (`Tensor`):
              The negative prompt embeddings mask. (batch-expanded)
    """
+
    model_name = "qwenimage"

    @property
@@ -271,8 +273,8 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
          num_images_per_prompt (`int`, *optional*, defaults to 1):
              The number of images to generate per prompt.
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          height (`int`, *optional*):
              The height in pixels of the generated image.
          width (`int`, *optional*):
@@ -300,7 +302,7 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
        self,
        image_latent_inputs: Optional[List[InputParam]] = None,
        additional_batch_inputs: Optional[List[InputParam]] = None,
-    ):   
+    ):
        # by default, process `image_latents`
        if image_latent_inputs is None:
            image_latent_inputs = [InputParam.template("image_latents")]
@@ -319,7 +321,9 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
        else:
            for input_param in additional_batch_inputs:
                if not isinstance(input_param, InputParam):
-                    raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
+                    raise ValueError(
+                        f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
+                    )

        self._image_latent_inputs = image_latent_inputs
        self._additional_batch_inputs = additional_batch_inputs
@@ -376,13 +380,17 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
                name="image_width",
                type_hint=int,
                description="The image width calculated from the image latents dimension",
-            )
+            ),
        ]

        # `height`/`width` are not new outputs, but they will be updated if any image latent inputs are provided
        if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
-            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
+            outputs.append(
+                OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
+            )
+            outputs.append(
+                OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
+            )

        # image latent inputs are modified in place (patchified and batch-expanded)
        for input_param in self._image_latent_inputs:
@@ -479,8 +487,8 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
          num_images_per_prompt (`int`, *optional*, defaults to 1):
              The number of images to generate per prompt.
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          height (`int`, *optional*):
              The height in pixels of the generated image.
          width (`int`, *optional*):
@@ -526,7 +534,9 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
        else:
            for input_param in additional_batch_inputs:
                if not isinstance(input_param, InputParam):
-                    raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
+                    raise ValueError(
+                        f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
+                    )

        self._image_latent_inputs = image_latent_inputs
        self._additional_batch_inputs = additional_batch_inputs
@@ -587,11 +597,15 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
                description="The image widths calculated from the image latents dimension",
            ),
        ]
-        
+
        # `height`/`width` are updated if any image latent inputs are provided
        if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
-            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
+            outputs.append(
+                OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
+            )
+            outputs.append(
+                OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
+            )

        # image latent inputs are modified in place (patchified, concatenated, and batch-expanded)
        for input_param in self._image_latent_inputs:
@@ -686,11 +700,13 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):

 # same as QwenImageAdditionalInputsStep, but with layered pachifier.

+
 # auto_docstring
 class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
    """
    Input processing step for Layered that:
-        1. For image latent inputs: Updates height/width if None, patchifies with layered pachifier, and expands batch size
+        1. For image latent inputs: Updates height/width if None, patchifies with layered pachifier, and expands batch
+           size
        2. For additional batch inputs: Expands batch dimensions to match final batch size

      Configured inputs:
@@ -705,8 +721,8 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
          num_images_per_prompt (`int`, *optional*, defaults to 1):
              The number of images to generate per prompt.
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          image_latents (`Tensor`):
              image latents used to guide the image generation. Can be generated from vae_encoder step.

@@ -720,8 +736,8 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
          width (`int`):
              if not provided, updated to image width
          image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified with layered
-              pachifier and batch-expanded)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified
+              with layered pachifier and batch-expanded)
    """

    model_name = "qwenimage-layered"
@@ -748,7 +764,9 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
        else:
            for input_param in additional_batch_inputs:
                if not isinstance(input_param, InputParam):
-                    raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
+                    raise ValueError(
+                        f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
+                    )

        self._image_latent_inputs = image_latent_inputs
        self._additional_batch_inputs = additional_batch_inputs
@@ -808,8 +826,12 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
        ]

        if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
-            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
+            outputs.append(
+                OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
+            )
+            outputs.append(
+                OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
+            )

        # Add outputs for image latent inputs (patchified with layered pachifier and batch-expanded)
        for input_param in self._image_latent_inputs:
@@ -895,10 +917,11 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):

      Inputs:
          control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
          batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
          num_images_per_prompt (`int`, *optional*, defaults to 1):
              The number of images to generate per prompt.
          height (`int`, *optional*):
@@ -914,6 +937,7 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
          width (`int`):
              if not provided, updated to control image width
    """
+
    model_name = "qwenimage"

    @property
@@ -923,17 +947,26 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
    @property
    def inputs(self) -> List[InputParam]:
        return [
-            InputParam(name="control_image_latents", required=True, type_hint=torch.Tensor, description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step."),
+            InputParam(
+                name="control_image_latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.",
+            ),
            InputParam.template("batch_size"),
            InputParam.template("num_images_per_prompt"),
            InputParam.template("height"),
            InputParam.template("width"),
        ]
-    
+
    @property
    def intermediate_outputs(self) -> List[OutputParam]:
        return [
-            OutputParam(name="control_image_latents", type_hint=torch.Tensor, description="The control image latents (patchified and batch-expanded)."),
+            OutputParam(
+                name="control_image_latents",
+                type_hint=torch.Tensor,
+                description="The control image latents (patchified and batch-expanded).",
+            ),
            OutputParam(name="height", type_hint=int, description="if not provided, updated to control image height"),
            OutputParam(name="width", type_hint=int, description="if not provided, updated to control image width"),
        ]
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
@@ -13,9 +13,10 @@
 # limitations under the License.

 import torch
+
 from ...utils import logging
 from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
+from ..modular_pipeline_utils import InputParam, InsertableDict, OutputParam
 from .before_denoise import (
    QwenImageControlNetBeforeDenoiserStep,
    QwenImageCreateMaskLatentsStep,
@@ -65,9 +66,8 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
    Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block.

      Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
+          The tokenizer to use guider (`ClassifierFreeGuidance`)

      Inputs:
          prompt (`str`, *optional*):
@@ -114,8 +114,7 @@ class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
       - Creates `image_latents`.

      Components:
-          image_mask_processor (`InpaintProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_mask_processor (`InpaintProcessor`) vae (`AutoencoderKLQwenImage`)

      Inputs:
          mask_image (`Image`):
@@ -162,8 +161,7 @@ class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
    Vae encoder step that preprocess andencode the image inputs into their latent representations.

      Components:
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_processor (`VaeImageProcessor`) vae (`AutoencoderKLQwenImage`)

      Inputs:
          image (`Union[Image, List]`):
@@ -218,9 +216,8 @@ class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
       - if `control_image` is not provided, step will be skipped.

      Components:
-          vae (`AutoencoderKLQwenImage`)
-          controlnet (`QwenImageControlNetModel`)
-          control_image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) controlnet (`QwenImageControlNetModel`) control_image_processor
+          (`VaeImageProcessor`)

      Inputs:
          control_image (`Image`, *optional*):
@@ -380,7 +377,9 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
    block_classes = [
        QwenImageTextInputsStep(),
        QwenImageAdditionalInputsStep(
-            additional_batch_inputs=[InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")]
+            additional_batch_inputs=[
+                InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")
+            ]
        ),
    ]
    block_names = ["text_inputs", "additional_inputs"]
@@ -401,15 +400,14 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
       - Create the pachified latents `mask` based on the processedmask image.

      Components:
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          pachifier (`QwenImagePachifier`)
+          scheduler (`FlowMatchEulerDiscreteScheduler`) pachifier (`QwenImagePachifier`)

      Inputs:
          latents (`Tensor`):
              The initial random noised, can be generated in prepare latent step.
          image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
-              vae encoder and updated in input step.)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
+              generated from vae encoder and updated in input step.)
          timesteps (`Tensor`):
              The timesteps to use for the denoising process. Can be generated in set_timesteps step.
          processed_mask_image (`Tensor`):
@@ -450,13 +448,12 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
    """
-    step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
+    step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
+    (timesteps, latents, rope inputs etc.).

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -524,13 +521,12 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
    """
-    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
+    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
+    task.

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -606,13 +602,12 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
    """
-    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
+    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
+    task.

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -686,14 +681,12 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
    """
-    step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
+    step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
+    (timesteps, latents, rope inputs etc.).

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          controlnet (`QwenImageControlNetModel`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
+          (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -707,7 +700,8 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
          negative_prompt_embeds_mask (`Tensor`, *optional*):
              mask for the negative text embeddings. Can be generated from text_encoder step.
          control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
          height (`int`, *optional*):
              The height in pixels of the generated image.
          width (`int`, *optional*):
@@ -773,14 +767,12 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
    """
-    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
+    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
+    task.

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          controlnet (`QwenImageControlNetModel`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
+          (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -802,7 +794,8 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
          processed_mask_image (`Tensor`, *optional*):
              The processed mask image
          control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
          latents (`Tensor`, *optional*):
              Pre-generated noisy latents for image generation.
          generator (`Generator`, *optional*):
@@ -868,14 +861,12 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
    """
-    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
+    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
+    task.

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          controlnet (`QwenImageControlNetModel`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
+          (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -895,7 +886,8 @@ class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
          image_latents (`Tensor`):
              image latents used to guide the image generation. Can be generated from vae_encoder step.
          control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
          latents (`Tensor`, *optional*):
              Pre-generated noisy latents for image generation.
          generator (`Generator`, *optional*):
@@ -1030,12 +1022,12 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
    Decode step that decodes the latents to images and postprocess the generated image.

      Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)

      Inputs:
          latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.

@@ -1057,19 +1049,21 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
    """
-    Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image.
+    Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask
+    overally to the original image.

      Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_mask_processor (`InpaintProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_mask_processor (`InpaintProcessor`)

      Inputs:
          latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.
          mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.

      Outputs:
          images (`List`):
@@ -1125,17 +1119,11 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
      - for text-to-image generation, all you need to provide is `prompt`

      Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
-          image_mask_processor (`InpaintProcessor`)
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
-          controlnet (`QwenImageControlNetModel`)
-          control_image_processor (`VaeImageProcessor`)
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          transformer (`QwenImageTransformer2DModel`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
+          The tokenizer to use guider (`ClassifierFreeGuidance`) image_mask_processor (`InpaintProcessor`) vae
+          (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`) controlnet (`QwenImageControlNetModel`)
+          control_image_processor (`VaeImageProcessor`) pachifier (`QwenImagePachifier`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          prompt (`str`, *optional*):
@@ -1185,7 +1173,8 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
          strength (`float`, *optional*, defaults to 0.9):
              Strength for img2img/inpainting.
          control_image_latents (`Tensor`, *optional*):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
          control_guidance_start (`float`, *optional*, defaults to 0.0):
              When to start applying ControlNet.
          control_guidance_end (`float`, *optional*, defaults to 1.0):
@@ -1195,7 +1184,8 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.
          mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.

      Outputs:
          images (`List`):
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
@@ -13,11 +13,12 @@
 # limitations under the License.

 from typing import Optional
+
 import torch

 from ...utils import logging
 from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
+from ..modular_pipeline_utils import InputParam, InsertableDict, OutputParam
 from .before_denoise import (
    QwenImageCreateMaskLatentsStep,
    QwenImageEditRoPEInputsStep,
@@ -63,10 +64,8 @@ class QwenImageEditVLEncoderStep(SequentialPipelineBlocks):
    QwenImage-Edit VL encoder step that encode the image and text prompts together.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`)

      Inputs:
          image (`Union[Image, List]`):
@@ -113,9 +112,8 @@ class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks):
    Vae encoder step that encode the image inputs into their latent representations.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
+          (`AutoencoderKLQwenImage`)

      Inputs:
          image (`Union[Image, List]`):
@@ -155,9 +153,8 @@ class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks):
       - create image latents.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          image_mask_processor (`InpaintProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_resize_processor (`VaeImageProcessor`) image_mask_processor (`InpaintProcessor`) vae
+          (`AutoencoderKLQwenImage`)

      Inputs:
          image (`Union[Image, List]`):
@@ -354,7 +351,10 @@ class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
    model_name = "qwenimage-edit"
    block_classes = [
        QwenImageTextInputsStep(),
-        QwenImageAdditionalInputsStep(additional_batch_inputs=[InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")]
+        QwenImageAdditionalInputsStep(
+            additional_batch_inputs=[
+                InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")
+            ]
        ),
    ]
    block_names = ["text_inputs", "additional_inputs"]
@@ -377,15 +377,14 @@ class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
       - Create the patchified latents `mask` based on the processed mask image.

      Components:
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          pachifier (`QwenImagePachifier`)
+          scheduler (`FlowMatchEulerDiscreteScheduler`) pachifier (`QwenImagePachifier`)

      Inputs:
          latents (`Tensor`):
              The initial random noised, can be generated in prepare latent step.
          image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
-              vae encoder and updated in input step.)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
+              generated from vae encoder and updated in input step.)
          timesteps (`Tensor`):
              The timesteps to use for the denoising process. Can be generated in set_timesteps step.
          processed_mask_image (`Tensor`):
@@ -426,10 +425,8 @@ class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
    Core denoising workflow for QwenImage-Edit edit (img2img) task.

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -502,10 +499,8 @@ class QwenImageEditInpaintCoreDenoiseStep(SequentialPipelineBlocks):
    Core denoising workflow for QwenImage-Edit edit inpaint task.

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -623,12 +618,12 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
    Decode step that decodes the latents to images and postprocess the generated image.

      Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)

      Inputs:
          latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.

@@ -650,19 +645,21 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
    """
-    Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask overlay to the original image.
+    Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask
+    overlay to the original image.

      Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_mask_processor (`InpaintProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_mask_processor (`InpaintProcessor`)

      Inputs:
          latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.
          mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.

      Outputs:
          images (`List`):
@@ -719,19 +716,14 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
    """
    Auto Modular pipeline for edit (img2img) and edit inpaint tasks using QwenImage-Edit.
      - for edit (img2img) generation, you need to provide `image`
-      - for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop`
+      - for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide
+        `padding_mask_crop`

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
-          image_mask_processor (`InpaintProcessor`)
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          transformer (`QwenImageTransformer2DModel`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`) image_mask_processor (`InpaintProcessor`) vae
+          (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`) pachifier (`QwenImagePachifier`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          image (`Union[Image, List]`):
@@ -771,7 +763,8 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.
          mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.

      Outputs:
          images (`List`):
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import torch
 from ...utils import logging
 from ..modular_pipeline import SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
+from ..modular_pipeline_utils import InsertableDict, OutputParam
 from .before_denoise import (
    QwenImageEditPlusRoPEInputsStep,
    QwenImagePrepareLatentsStep,
@@ -55,10 +54,8 @@ class QwenImageEditPlusVLEncoderStep(SequentialPipelineBlocks):
    QwenImage-Edit Plus VL encoder step that encodes the image and text prompts together.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`)

      Inputs:
          image (`Union[Image, List]`):
@@ -107,9 +104,8 @@ class QwenImageEditPlusVaeEncoderStep(SequentialPipelineBlocks):
      Each image is resized independently based on its own aspect ratio to 1024x1024 target area.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
+          (`AutoencoderKLQwenImage`)

      Inputs:
          image (`Union[Image, List]`):
@@ -231,10 +227,8 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
    Core denoising workflow for QwenImage-Edit Plus edit (img2img) task.

      Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -311,12 +305,12 @@ class QwenImageEditPlusDecodeStep(SequentialPipelineBlocks):
    Decode step that decodes the latents to images and postprocesses the generated image.

      Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)

      Inputs:
          latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
          output_type (`str`, *optional*, defaults to pil):
              Output format: 'pil', 'np', 'pt'.

@@ -357,14 +351,9 @@ class QwenImageEditPlusAutoBlocks(SequentialPipelineBlocks):
      - VL encoder uses 384x384 target area, VAE encoder uses 1024x1024 target area.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`) image_processor (`VaeImageProcessor`) vae
+          (`AutoencoderKLQwenImage`) pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`)
          transformer (`QwenImageTransformer2DModel`)

      Inputs:
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import torch
 from ...utils import logging
 from ..modular_pipeline import SequentialPipelineBlocks
 from ..modular_pipeline_utils import InsertableDict, OutputParam
@@ -53,14 +52,12 @@ logger = logging.get_logger(__name__)
 # auto_docstring
 class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
    """
-    QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not provided.
+    QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not
+    provided.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) tokenizer (`Qwen2Tokenizer`): The tokenizer to use guider (`ClassifierFreeGuidance`)

      Inputs:
          image (`Union[Image, List]`):
@@ -116,9 +113,8 @@ class QwenImageLayeredVaeEncoderStep(SequentialPipelineBlocks):
    Vae encoder step that encode the image inputs into their latent representations.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
+          (`AutoencoderKLQwenImage`)

      Inputs:
          image (`Union[Image, List]`):
@@ -203,8 +199,8 @@ class QwenImageLayeredInputStep(SequentialPipelineBlocks):
          width (`int`):
              if not provided, updated to image width
          image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified with layered
-              pachifier and batch-expanded)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified
+              with layered pachifier and batch-expanded)
    """

    model_name = "qwenimage-layered"
@@ -230,10 +226,8 @@ class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
    Core denoising workflow for QwenImage-Layered img2img task.

      Components:
-          pachifier (`QwenImageLayeredPachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImageLayeredPachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -317,16 +311,10 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
    Auto Modular pipeline for layered denoising tasks using QwenImage-Layered.

      Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
-          pachifier (`QwenImageLayeredPachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          transformer (`QwenImageTransformer2DModel`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) tokenizer (`Qwen2Tokenizer`): The tokenizer to use guider (`ClassifierFreeGuidance`)
+          image_processor (`VaeImageProcessor`) vae (`AutoencoderKLQwenImage`) pachifier (`QwenImageLayeredPachifier`)
+          scheduler (`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)

      Inputs:
          image (`Union[Image, List]`):