From f056af1fbb24b79c6cc5360ea782abacd63c34fd Mon Sep 17 00:00:00 2001
From: yiyixuxu <yixu310@gmail.com>
Date: Mon, 19 Jan 2026 09:27:40 +0100
Subject: [PATCH] make style

---
 .../modular_pipeline_utils.py                 |  18 +-
 .../qwenimage/before_denoise.py               | 133 ++++++++-----
 .../modular_pipelines/qwenimage/decoders.py   |  93 +++++----
 .../modular_pipelines/qwenimage/denoise.py    | 123 ++++++------
 .../modular_pipelines/qwenimage/encoders.py   | 177 ++++++++++--------
 .../modular_pipelines/qwenimage/inputs.py     |  91 ++++++---
 .../qwenimage/modular_blocks_qwenimage.py     | 136 +++++++-------
 .../modular_blocks_qwenimage_edit.py          |  81 ++++----
 .../modular_blocks_qwenimage_edit_plus.py     |  37 ++--
 .../modular_blocks_qwenimage_layered.py       |  40 ++--
 10 files changed, 497 insertions(+), 432 deletions(-)

diff --git a/src/diffusers/modular_pipelines/modular_pipeline_utils.py b/src/diffusers/modular_pipelines/modular_pipeline_utils.py
index 6f1010daf2..a57212988e 100644
--- a/src/diffusers/modular_pipelines/modular_pipeline_utils.py
+++ b/src/diffusers/modular_pipelines/modular_pipeline_utils.py
@@ -438,7 +438,7 @@ INPUT_PARAM_TEMPLATES = {
         "description": "Number of layers to extract from the image",
     },
     # common intermediate inputs
-    "prompt_embeds":{
+    "prompt_embeds": {
         "type_hint": torch.Tensor,
         "required": True,
         "description": "text embeddings used to guide the image generation. Can be generated from text_encoder step.",
@@ -531,16 +531,16 @@ class InputParam:
             raise ValueError(f"InputParam template for {template_name} not found")
 
         template_kwargs = INPUT_PARAM_TEMPLATES[template_name].copy()
-        
+
         # Determine the actual param name:
         # 1. From overrides if provided
         # 2. From template if present
         # 3. Fall back to template_name
         name = overrides.pop("name", template_kwargs.pop("name", template_name))
-        
+
         if note and "description" in template_kwargs:
             template_kwargs["description"] = f"{template_kwargs['description']} ({note})"
-        
+
         template_kwargs.update(overrides)
         return cls(name=name, **template_kwargs)
 
@@ -564,18 +564,18 @@ class OutputParam:
         """Get template for name if exists, otherwise raise ValueError."""
         if template_name not in OUTPUT_PARAM_TEMPLATES:
             raise ValueError(f"OutputParam template for {template_name} not found")
-        
+
         template_kwargs = OUTPUT_PARAM_TEMPLATES[template_name].copy()
-        
+
         # Determine the actual param name:
         # 1. From overrides if provided
         # 2. From template if present
         # 3. Fall back to template_name
         name = overrides.pop("name", template_kwargs.pop("name", template_name))
-        
+
         if note and "description" in template_kwargs:
             template_kwargs["description"] = f"{template_kwargs['description']} ({note})"
-        
+
         template_kwargs.update(overrides)
         return cls(name=name, **template_kwargs)
 
@@ -913,4 +913,4 @@ def make_doc_string(
     output += "\n\n"
     output += format_output_params(outputs, indent_level=2)
 
-    return output
\ No newline at end of file
+    return output
diff --git a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
index 0b8cd0f4b2..418d927f4f 100644
--- a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
+++ b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
@@ -117,6 +117,7 @@ def get_timesteps(scheduler, num_inference_steps, strength):
 # 1. PREPARE LATENTS
 # ====================
 
+
 # auto_docstring
 class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
     """
@@ -137,8 +138,8 @@ class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
           generator (`Generator`, *optional*):
               Torch generator for deterministic generation.
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           dtype (`dtype`, *optional*, defaults to torch.float32):
               The dtype of the model inputs, can be generated in input step.
 
@@ -150,6 +151,7 @@ class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
           latents (`Tensor`):
               The initial latents to use for the denoising process
     """
+
     model_name = "qwenimage"
 
     @property
@@ -254,8 +256,8 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
           generator (`Generator`, *optional*):
               Torch generator for deterministic generation.
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           dtype (`dtype`, *optional*, defaults to torch.float32):
               The dtype of the model inputs, can be generated in input step.
 
@@ -267,6 +269,7 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
           latents (`Tensor`):
               The initial latents to use for the denoising process
     """
+
     model_name = "qwenimage-layered"
 
     @property
@@ -353,7 +356,8 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
     """
-    Step that adds noise to image latents for image-to-image/inpainting. Should be run after set_timesteps, prepare_latents. Both noise and image latents should alreadybe patchified.
+    Step that adds noise to image latents for image-to-image/inpainting. Should be run after set_timesteps,
+    prepare_latents. Both noise and image latents should alreadybe patchified.
 
       Components:
           scheduler (`FlowMatchEulerDiscreteScheduler`)
@@ -362,8 +366,8 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
           latents (`Tensor`):
               The initial random noised, can be generated in prepare latent step.
           image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
-              vae encoder and updated in input step.)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
+              generated from vae encoder and updated in input step.)
           timesteps (`Tensor`):
               The timesteps to use for the denoising process. Can be generated in set_timesteps step.
 
@@ -373,6 +377,7 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
           latents (`Tensor`):
               The scaled noisy latents to use for inpainting/image-to-image denoising.
     """
+
     model_name = "qwenimage"
 
     @property
@@ -396,10 +401,10 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
             ),
             InputParam.template("image_latents", note="Can be generated from vae encoder and updated in input step."),
             InputParam(
-                name="timesteps", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+                name="timesteps",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
             ),
         ]
 
@@ -475,6 +480,7 @@ class QwenImageCreateMaskLatentsStep(ModularPipelineBlocks):
           mask (`Tensor`):
               The mask to use for the inpainting process.
     """
+
     model_name = "qwenimage"
 
     @property
@@ -541,10 +547,12 @@ class QwenImageCreateMaskLatentsStep(ModularPipelineBlocks):
 # 2. SET TIMESTEPS
 # ====================
 
+
 # auto_docstring
 class QwenImageSetTimestepsStep(ModularPipelineBlocks):
     """
-    Step that sets the the scheduler's timesteps for text-to-image generation. Should be run after prepare latents step.
+    Step that sets the the scheduler's timesteps for text-to-image generation. Should be run after prepare latents
+    step.
 
       Components:
           scheduler (`FlowMatchEulerDiscreteScheduler`)
@@ -561,6 +569,7 @@ class QwenImageSetTimestepsStep(ModularPipelineBlocks):
           timesteps (`Tensor`):
               The timesteps to use for the denoising process
     """
+
     model_name = "qwenimage"
 
     @property
@@ -579,10 +588,10 @@ class QwenImageSetTimestepsStep(ModularPipelineBlocks):
             InputParam.template("num_inference_steps"),
             InputParam.template("sigmas"),
             InputParam(
-                name="latents", 
+                name="latents",
                 required=True,
                 type_hint=torch.Tensor,
-                description="The initial random noised latents for the denoising process. Can be generated in prepare latents step."
+                description="The initial random noised latents for the denoising process. Can be generated in prepare latents step.",
             ),
         ]
 
@@ -640,13 +649,14 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
           sigmas (`List`, *optional*):
               Custom sigmas for the denoising process.
           image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
-              vae encoder and packed in input step.)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
+              generated from vae encoder and packed in input step.)
 
       Outputs:
           timesteps (`Tensor`):
               The timesteps to use for the denoising process.
     """
+
     model_name = "qwenimage-layered"
 
     @property
@@ -671,9 +681,7 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
             OutputParam(
-                name="timesteps", 
-                type_hint=torch.Tensor, 
-                description="The timesteps to use for the denoising process."
+                name="timesteps", type_hint=torch.Tensor, description="The timesteps to use for the denoising process."
             ),
         ]
 
@@ -711,7 +719,8 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
     """
-    Step that sets the the scheduler's timesteps for image-to-image generation, and inpainting. Should be run after prepare latents step.
+    Step that sets the the scheduler's timesteps for image-to-image generation, and inpainting. Should be run after
+    prepare latents step.
 
       Components:
           scheduler (`FlowMatchEulerDiscreteScheduler`)
@@ -732,6 +741,7 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
           num_inference_steps (`int`):
               The number of denoising steps to perform at inference time. Updated based on strength.
     """
+
     model_name = "qwenimage"
 
     @property
@@ -750,10 +760,10 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
             InputParam.template("num_inference_steps"),
             InputParam.template("sigmas"),
             InputParam(
-                "latents", 
-                required=True, 
+                "latents",
+                required=True,
                 type_hint=torch.Tensor,
-                description="The latents to use for the denoising process. Can be generated in prepare latents step."
+                description="The latents to use for the denoising process. Can be generated in prepare latents step.",
             ),
             InputParam.template("strength", default=0.9),
         ]
@@ -815,6 +825,7 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
 
 ## RoPE inputs for denoiser
 
+
 # auto_docstring
 class QwenImageRoPEInputsStep(ModularPipelineBlocks):
     """
@@ -822,8 +833,8 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
 
       Inputs:
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           height (`int`):
               The height in pixels of the generated image.
           width (`int`):
@@ -841,6 +852,7 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
           negative_txt_seq_lens (`List`):
               The sequence lengths of the negative prompt embeds, used for RoPE calculation
     """
+
     model_name = "qwenimage"
 
     @property
@@ -911,12 +923,13 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
     """
-    Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be placed after prepare_latents step
+    Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be placed after
+    prepare_latents step
 
       Inputs:
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           image_height (`int`):
               The height of the reference image. Can be generated in input step.
           image_width (`int`):
@@ -938,6 +951,7 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
           negative_txt_seq_lens (`List`):
               The sequence lengths of the negative prompt embeds, used for RoPE calculation
     """
+
     model_name = "qwenimage"
 
     @property
@@ -948,8 +962,18 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam.template("batch_size"),
-            InputParam(name="image_height", required=True, type_hint=int, description="The height of the reference image. Can be generated in input step."),
-            InputParam(name="image_width", required=True, type_hint=int, description="The width of the reference image. Can be generated in input step."),
+            InputParam(
+                name="image_height",
+                required=True,
+                type_hint=int,
+                description="The height of the reference image. Can be generated in input step.",
+            ),
+            InputParam(
+                name="image_width",
+                required=True,
+                type_hint=int,
+                description="The width of the reference image. Can be generated in input step.",
+            ),
             InputParam.template("height", required=True),
             InputParam.template("width", required=True),
             InputParam.template("prompt_embeds_mask"),
@@ -1016,13 +1040,13 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
 class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
     """
     Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit Plus.
-      Unlike Edit, Edit Plus handles lists of image_height/image_width for multiple reference images.
-      Should be placed after prepare_latents step.
+      Unlike Edit, Edit Plus handles lists of image_height/image_width for multiple reference images. Should be placed
+      after prepare_latents step.
 
       Inputs:
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           image_height (`List`):
               The heights of the reference images. Can be generated in input step.
           image_width (`List`):
@@ -1044,6 +1068,7 @@ class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
           negative_txt_seq_lens (`List`):
               The sequence lengths of the negative prompt embeds, used for RoPE calculation
     """
+
     model_name = "qwenimage-edit-plus"
 
     @property
@@ -1058,8 +1083,18 @@ class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam.template("batch_size"),
-            InputParam(name="image_height", required=True, type_hint=List[int], description="The heights of the reference images. Can be generated in input step."),
-            InputParam(name="image_width", required=True, type_hint=List[int], description="The widths of the reference images. Can be generated in input step."),
+            InputParam(
+                name="image_height",
+                required=True,
+                type_hint=List[int],
+                description="The heights of the reference images. Can be generated in input step.",
+            ),
+            InputParam(
+                name="image_width",
+                required=True,
+                type_hint=List[int],
+                description="The widths of the reference images. Can be generated in input step.",
+            ),
             InputParam.template("height", required=True),
             InputParam.template("width", required=True),
             InputParam.template("prompt_embeds_mask"),
@@ -1126,8 +1161,8 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):
 
       Inputs:
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           layers (`int`, *optional*, defaults to 4):
               Number of layers to extract from the image
           height (`int`):
@@ -1149,6 +1184,7 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):
           additional_t_cond (`Tensor`):
               The additional t cond, used for RoPE calculation
     """
+
     model_name = "qwenimage-layered"
 
     @property
@@ -1231,6 +1267,7 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):
 
 ## ControlNet inputs for denoiser
 
+
 # auto_docstring
 class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
     """
@@ -1247,7 +1284,8 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
           controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
               Scale for ControlNet conditioning.
           control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
           timesteps (`Tensor`):
               The timesteps to use for the denoising process. Can be generated in set_timesteps step.
 
@@ -1255,6 +1293,7 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
           controlnet_keep (`List`):
               The controlnet keep values
     """
+
     model_name = "qwenimage"
 
     @property
@@ -1274,16 +1313,16 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
             InputParam.template("control_guidance_end"),
             InputParam.template("controlnet_conditioning_scale"),
             InputParam(
-                name="control_image_latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step."
+                name="control_image_latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.",
             ),
             InputParam(
-                name="timesteps", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+                name="timesteps",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
             ),
         ]
 
diff --git a/src/diffusers/modular_pipelines/qwenimage/decoders.py b/src/diffusers/modular_pipelines/qwenimage/decoders.py
index 650bf34da7..1adbf6bdd3 100644
--- a/src/diffusers/modular_pipelines/qwenimage/decoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/decoders.py
@@ -30,10 +30,12 @@ logger = logging.get_logger(__name__)
 
 # after denoising loop (unpack latents)
 
-#auto_docstring
+
+# auto_docstring
 class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
     """
-    Step that unpack the latents from 3D tensor (batch_size, sequence_length, channels) into 5D tensor (batch_size, channels, 1, height, width)
+    Step that unpack the latents from 3D tensor (batch_size, sequence_length, channels) into 5D tensor (batch_size,
+    channels, 1, height, width)
 
       Components:
           pachifier (`QwenImagePachifier`)
@@ -50,6 +52,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
           latents (`Tensor`):
               The denoisedlatents unpacked to B, C, 1, H, W
     """
+
     model_name = "qwenimage"
 
     @property
@@ -70,10 +73,10 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
             InputParam.template("height", required=True),
             InputParam.template("width", required=True),
             InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The latents to decode, can be generated in the denoise step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The latents to decode, can be generated in the denoise step.",
             ),
         ]
 
@@ -81,9 +84,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
             OutputParam(
-                name="latents", 
-                type_hint=torch.Tensor, 
-                description="The denoisedlatents unpacked to B, C, 1, H, W"
+                name="latents", type_hint=torch.Tensor, description="The denoisedlatents unpacked to B, C, 1, H, W"
             ),
         ]
 
@@ -100,7 +101,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
         return components, state
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
     """
     Unpack latents from (B, seq, C*4) to (B, C, layers+1, H, W) after denoising.
@@ -122,6 +123,7 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
           latents (`Tensor`):
               Denoised latents. (unpacked to B, C, layers+1, H, W)
     """
+
     model_name = "qwenimage-layered"
 
     @property
@@ -138,10 +140,10 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The denoised latents to decode, can be generated in the denoise step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The denoised latents to decode, can be generated in the denoise step.",
             ),
             InputParam.template("height", required=True),
             InputParam.template("width", required=True),
@@ -173,7 +175,8 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
 
 # decode step
 
-#auto_docstring
+
+# auto_docstring
 class QwenImageDecoderStep(ModularPipelineBlocks):
     """
     Step that decodes the latents to images
@@ -183,12 +186,14 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
 
       Inputs:
           latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
 
       Outputs:
           images (`List`):
               Generated images. (tensor output of the vae decoder.)
     """
+
     model_name = "qwenimage"
 
     @property
@@ -207,10 +212,10 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.",
             ),
         ]
 
@@ -246,18 +251,18 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
         return components, state
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
     """
     Decode unpacked latents (B, C, layers+1, H, W) into layer images.
 
       Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)
 
       Inputs:
           latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
 
@@ -265,6 +270,7 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
           images (`List`):
               Generated images.
     """
+
     model_name = "qwenimage-layered"
 
     @property
@@ -287,10 +293,10 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.",
             ),
             InputParam.template("output_type"),
         ]
@@ -345,7 +351,8 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
 
 # postprocess the decoded images
 
-#auto_docstring
+
+# auto_docstring
 class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
     """
     postprocess the generated image
@@ -363,6 +370,7 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
           images (`List`):
               Generated images.
     """
+
     model_name = "qwenimage"
 
     @property
@@ -384,10 +392,10 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="images", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="the generated image tensor from decoders step"
+                name="images",
+                required=True,
+                type_hint=torch.Tensor,
+                description="the generated image tensor from decoders step",
             ),
             InputParam.template("output_type"),
         ]
@@ -416,7 +424,7 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
         return components, state
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
     """
     postprocess the generated image, optional apply the mask overally to the original image..
@@ -430,12 +438,14 @@ class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
           mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.
 
       Outputs:
           images (`List`):
               Generated images.
     """
+
     model_name = "qwenimage"
 
     @property
@@ -457,16 +467,17 @@ class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="images", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="the generated image tensor from decoders step"
+                name="images",
+                required=True,
+                type_hint=torch.Tensor,
+                description="the generated image tensor from decoders step",
             ),
             InputParam.template("output_type"),
             InputParam(
-                name="mask_overlay_kwargs", 
+                name="mask_overlay_kwargs",
                 type_hint=Dict[str, Any],
-                description="The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep."),
+                description="The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.",
+            ),
         ]
 
     @property
diff --git a/src/diffusers/modular_pipelines/qwenimage/denoise.py b/src/diffusers/modular_pipelines/qwenimage/denoise.py
index ff6e411d76..3b00fcb274 100644
--- a/src/diffusers/modular_pipelines/qwenimage/denoise.py
+++ b/src/diffusers/modular_pipelines/qwenimage/denoise.py
@@ -50,10 +50,10 @@ class QwenImageLoopBeforeDenoiser(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step.",
             ),
         ]
 
@@ -80,10 +80,10 @@ class QwenImageEditLoopBeforeDenoiser(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="latents", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."
+                name="latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step.",
             ),
             InputParam.template("image_latents"),
         ]
@@ -131,10 +131,10 @@ class QwenImageLoopBeforeDenoiserControlNet(ModularPipelineBlocks):
             ),
             InputParam.template("controlnet_conditioning_scale", note="updated in prepare_controlnet_inputs step."),
             InputParam(
-                name="controlnet_keep", 
-                required=True, 
-                type_hint=List[float], 
-                description="The controlnet keep values. Can be generated in prepare_controlnet_inputs step."
+                name="controlnet_keep",
+                required=True,
+                type_hint=List[float],
+                description="The controlnet keep values. Can be generated in prepare_controlnet_inputs step.",
             ),
         ]
 
@@ -467,10 +467,10 @@ class QwenImageDenoiseLoopWrapper(LoopSequentialPipelineBlocks):
     def loop_inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="timesteps", 
-                required=True, 
-                type_hint=torch.Tensor, 
-                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+                name="timesteps",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
             ),
             InputParam.template("num_inference_steps", required=True),
         ]
@@ -505,21 +505,21 @@ class QwenImageDenoiseLoopWrapper(LoopSequentialPipelineBlocks):
 
 # Qwen Image (text2image, image2image)
 
+
 # auto_docstring
 class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
     """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
        - `QwenImageLoopBeforeDenoiser`
        - `QwenImageLoopDenoiser`
        - `QwenImageLoopAfterDenoiser`
       This block supports text2image and image2image tasks for QwenImage.
 
       Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)
 
       Inputs:
           timesteps (`Tensor`):
@@ -539,6 +539,7 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage"
 
     block_classes = [
@@ -551,8 +552,8 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
     @property
     def description(self) -> str:
         return (
-            "Denoise step that iteratively denoise the latents. \n"
-            "Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method \n"
+            "Denoise step that iteratively denoise the latents.\n"
+            "Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method\n"
             "At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
             " - `QwenImageLoopBeforeDenoiser`\n"
             " - `QwenImageLoopDenoiser`\n"
@@ -565,9 +566,9 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
     """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
        - `QwenImageLoopBeforeDenoiser`
        - `QwenImageLoopDenoiser`
        - `QwenImageLoopAfterDenoiser`
@@ -575,9 +576,8 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
       This block supports inpainting tasks for QwenImage.
 
       Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)
 
       Inputs:
           timesteps (`Tensor`):
@@ -603,6 +603,7 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage"
     block_classes = [
         QwenImageLoopBeforeDenoiser,
@@ -630,9 +631,9 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
     """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
        - `QwenImageLoopBeforeDenoiser`
        - `QwenImageLoopBeforeDenoiserControlNet`
        - `QwenImageLoopDenoiser`
@@ -640,10 +641,8 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
       This block supports text2img/img2img tasks with controlnet for QwenImage.
 
       Components:
-          guider (`ClassifierFreeGuidance`)
-          controlnet (`QwenImageControlNetModel`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) controlnet (`QwenImageControlNetModel`) transformer
+          (`QwenImageTransformer2DModel`) scheduler (`FlowMatchEulerDiscreteScheduler`)
 
       Inputs:
           timesteps (`Tensor`):
@@ -669,6 +668,7 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage"
     block_classes = [
         QwenImageLoopBeforeDenoiser,
@@ -696,9 +696,9 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
     """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
        - `QwenImageLoopBeforeDenoiser`
        - `QwenImageLoopBeforeDenoiserControlNet`
        - `QwenImageLoopDenoiser`
@@ -707,10 +707,8 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
       This block supports inpainting tasks with controlnet for QwenImage.
 
       Components:
-          guider (`ClassifierFreeGuidance`)
-          controlnet (`QwenImageControlNetModel`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) controlnet (`QwenImageControlNetModel`) transformer
+          (`QwenImageTransformer2DModel`) scheduler (`FlowMatchEulerDiscreteScheduler`)
 
       Inputs:
           timesteps (`Tensor`):
@@ -742,6 +740,7 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage"
     block_classes = [
         QwenImageLoopBeforeDenoiser,
@@ -777,18 +776,17 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
     """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
        - `QwenImageEditLoopBeforeDenoiser`
        - `QwenImageEditLoopDenoiser`
        - `QwenImageLoopAfterDenoiser`
       This block supports QwenImage Edit.
 
       Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)
 
       Inputs:
           timesteps (`Tensor`):
@@ -810,6 +808,7 @@ class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageEditLoopBeforeDenoiser,
@@ -835,9 +834,9 @@ class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
     """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
        - `QwenImageEditLoopBeforeDenoiser`
        - `QwenImageEditLoopDenoiser`
        - `QwenImageLoopAfterDenoiser`
@@ -845,9 +844,8 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
       This block supports inpainting tasks for QwenImage Edit.
 
       Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)
 
       Inputs:
           timesteps (`Tensor`):
@@ -873,6 +871,7 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageEditLoopBeforeDenoiser,
@@ -900,18 +899,17 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
 # auto_docstring
 class QwenImageLayeredDenoiseStep(QwenImageDenoiseLoopWrapper):
     """
-    Denoise step that iteratively denoise the latents. 
-      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method 
-      At each iteration, it runs blocks defined in `sub_blocks` sequencially:
+    Denoise step that iteratively denoise the latents.
+      Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
+      defined in `sub_blocks` sequencially:
        - `QwenImageEditLoopBeforeDenoiser`
        - `QwenImageEditLoopDenoiser`
        - `QwenImageLoopAfterDenoiser`
       This block supports QwenImage Layered.
 
       Components:
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`)
 
       Inputs:
           timesteps (`Tensor`):
@@ -933,6 +931,7 @@ class QwenImageLayeredDenoiseStep(QwenImageDenoiseLoopWrapper):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage-layered"
     block_classes = [
         QwenImageEditLoopBeforeDenoiser,
diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py
index 083ee507cc..5e1821cca5 100644
--- a/src/diffusers/modular_pipelines/qwenimage/encoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -30,7 +30,7 @@ from ...pipelines.qwenimage.pipeline_qwenimage_edit import calculate_dimensions
 from ...utils import logging
 from ...utils.torch_utils import unwrap_module
 from ..modular_pipeline import ModularPipelineBlocks, PipelineState
-from ..modular_pipeline_utils import ComponentSpec, ConfigSpec, InputParam, OutputParam
+from ..modular_pipeline_utils import ComponentSpec, InputParam, OutputParam
 from .modular_pipeline import QwenImageModularPipeline
 from .prompt_templates import (
     QWENIMAGE_EDIT_PLUS_IMG_TEMPLATE,
@@ -277,6 +277,7 @@ def encode_vae_image(
 # In most of our other pipelines, resizing is done as part of the image preprocessing step.
 # ====================
 
+
 # auto_docstring
 class QwenImageEditResizeStep(ModularPipelineBlocks):
     """
@@ -293,8 +294,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
           resized_image (`List`):
               The resized images
     """
-    model_name = "qwenimage-edit"
 
+    model_name = "qwenimage-edit"
 
     @property
     def description(self) -> str:
@@ -319,8 +320,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
             OutputParam(
-                name="resized_image", 
-                type_hint=List[PIL.Image.Image], 
+                name="resized_image",
+                type_hint=List[PIL.Image.Image],
                 description="The resized images",
             ),
         ]
@@ -353,7 +354,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageLayeredResizeStep(ModularPipelineBlocks):
     """
-    Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio.
+    Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while
+    maintaining the aspect ratio.
 
       Components:
           image_resize_processor (`VaeImageProcessor`)
@@ -368,11 +370,12 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):
           resized_image (`List`):
               The resized images
     """
+
     model_name = "qwenimage-layered"
 
     @property
     def description(self) -> str:
-        return f"Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio."
+        return "Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio."
 
     @property
     def expected_components(self) -> List[ComponentSpec]:
@@ -399,11 +402,13 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):
 
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
-        return [OutputParam(
-            name="resized_image", 
-            type_hint=List[PIL.Image.Image], 
-            description="The resized images",
-        )]
+        return [
+            OutputParam(
+                name="resized_image",
+                type_hint=List[PIL.Image.Image],
+                description="The resized images",
+            )
+        ]
 
     @staticmethod
     def check_inputs(resolution: int):
@@ -442,8 +447,8 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):
 class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
     """
     Resize images for QwenImage Edit Plus pipeline.
-      Produces two outputs: resized_image (1024x1024) for VAE encoding, resized_cond_image (384x384) for VL text encoding.
-      Each image is resized independently based on its own aspect ratio.
+      Produces two outputs: resized_image (1024x1024) for VAE encoding, resized_cond_image (384x384) for VL text
+      encoding. Each image is resized independently based on its own aspect ratio.
 
       Components:
           image_resize_processor (`VaeImageProcessor`)
@@ -484,7 +489,7 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         # image
-        return [InputParam.template("image")] 
+        return [InputParam.template("image")]
 
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
@@ -518,13 +523,11 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
         resized_cond_images = []
         for image in images:
             image_width, image_height = image.size
-            
+
             # For VAE encoder (1024x1024 target area)
             vae_width, vae_height, _ = calculate_dimensions(1024 * 1024, image_width / image_height)
-            resized_images.append(
-                components.image_resize_processor.resize(image, height=vae_height, width=vae_width)
-            )
-            
+            resized_images.append(components.image_resize_processor.resize(image, height=vae_height, width=vae_width))
+
             # For VL text encoder (384x384 target area)
             vl_width, vl_height, _ = calculate_dimensions(384 * 384, image_width / image_height)
             resized_cond_images.append(
@@ -541,16 +544,16 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
 # 2. GET IMAGE PROMPT
 # ====================
 
+
 # auto_docstring
 class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
     """
     Auto-caption step that generates a text prompt from the input image if none is provided.
-      Uses the VL model (text_encoder) to generate a description of the image.
-      If prompt is already provided, this step passes through unchanged.
+      Uses the VL model (text_encoder) to generate a description of the image. If prompt is already provided, this step
+      passes through unchanged.
 
       Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`)
 
       Inputs:
           prompt (`str`, *optional*):
@@ -590,7 +593,9 @@ class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam.template("prompt", required=False), # it is not required for qwenimage-layered, unlike other pipelines
+            InputParam.template(
+                "prompt", required=False
+            ),  # it is not required for qwenimage-layered, unlike other pipelines
             InputParam(
                 name="resized_image",
                 required=True,
@@ -653,15 +658,15 @@ class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
 # 3. TEXT ENCODER
 # ====================
 
+
 # auto_docstring
 class QwenImageTextEncoderStep(ModularPipelineBlocks):
     """
     Text Encoder step that generates text embeddings to guide the image generation.
 
       Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
+          The tokenizer to use guider (`ClassifierFreeGuidance`)
 
       Inputs:
           prompt (`str`):
@@ -681,6 +686,7 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
           negative_prompt_embeds_mask (`Tensor`):
               The negative prompt embeddings mask.
     """
+
     model_name = "qwenimage"
 
     def __init__(self):
@@ -706,7 +712,6 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
             ),
         ]
 
-
     @property
     def inputs(self) -> List[InputParam]:
         return [
@@ -786,12 +791,12 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
     """
-    Text Encoder step that processes both prompt and image together to generate text embeddings for guiding image generation.
+    Text Encoder step that processes both prompt and image together to generate text embeddings for guiding image
+    generation.
 
       Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`) guider
+          (`ClassifierFreeGuidance`)
 
       Inputs:
           prompt (`str`):
@@ -811,6 +816,7 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
           negative_prompt_embeds_mask (`Tensor`):
               The negative prompt embeddings mask.
     """
+
     model_name = "qwenimage"
 
     def __init__(self):
@@ -835,7 +841,6 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
             ),
         ]
 
-
     @property
     def inputs(self) -> List[InputParam]:
         return [
@@ -909,12 +914,12 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
     """
-    Text Encoder step for QwenImage Edit Plus that processes prompt and multiple images together to generate text embeddings for guiding image generation.
+    Text Encoder step for QwenImage Edit Plus that processes prompt and multiple images together to generate text
+    embeddings for guiding image generation.
 
       Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`) guider
+          (`ClassifierFreeGuidance`)
 
       Inputs:
           prompt (`str`):
@@ -922,7 +927,8 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
           negative_prompt (`str`, *optional*):
               The prompt or prompts not to guide the image generation.
           resized_cond_image (`Tensor`):
-              The image(s) to encode, can be a single image or list of images, should be resized to 384x384 using resize step
+              The image(s) to encode, can be a single image or list of images, should be resized to 384x384 using
+              resize step
 
       Outputs:
           prompt_embeds (`Tensor`):
@@ -963,7 +969,6 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
             ),
         ]
 
-
     @property
     def inputs(self) -> List[InputParam]:
         return [
@@ -1042,10 +1047,12 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
 # 4. IMAGE PREPROCESS
 # ====================
 
+
 # auto_docstring
 class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
     """
-    Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images will be resized to the given height and width.
+    Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images will be
+    resized to the given height and width.
 
       Components:
           image_mask_processor (`InpaintProcessor`)
@@ -1070,6 +1077,7 @@ class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
           mask_overlay_kwargs (`Dict`):
               The kwargs for the postprocess step to apply the mask overlay
     """
+
     model_name = "qwenimage"
 
     @property
@@ -1152,7 +1160,8 @@ class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
     """
-    Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images should be resized first.
+    Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images should be
+    resized first.
 
       Components:
           image_mask_processor (`InpaintProcessor`)
@@ -1173,6 +1182,7 @@ class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
           mask_overlay_kwargs (`Dict`):
               The kwargs for the postprocess step to apply the mask overlay
     """
+
     model_name = "qwenimage-edit"
 
     @property
@@ -1206,11 +1216,7 @@ class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
-            OutputParam(
-                name="processed_image", 
-                type_hint=torch.Tensor, 
-                description="The processed image"
-            ),
+            OutputParam(name="processed_image", type_hint=torch.Tensor, description="The processed image"),
             OutputParam(
                 name="processed_mask_image",
                 type_hint=torch.Tensor,
@@ -1263,6 +1269,7 @@ class QwenImageProcessImagesInputStep(ModularPipelineBlocks):
           processed_image (`Tensor`):
               The processed image
     """
+
     model_name = "qwenimage"
 
     @property
@@ -1290,11 +1297,13 @@ class QwenImageProcessImagesInputStep(ModularPipelineBlocks):
 
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
-        return [OutputParam(
-            name="processed_image",
-            type_hint=torch.Tensor,
-            description="The processed image",
-        )]
+        return [
+            OutputParam(
+                name="processed_image",
+                type_hint=torch.Tensor,
+                description="The processed image",
+            )
+        ]
 
     @staticmethod
     def check_inputs(height, width, vae_scale_factor):
@@ -1340,6 +1349,7 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
           processed_image (`Tensor`):
               The processed image
     """
+
     model_name = "qwenimage-edit"
 
     @property
@@ -1361,7 +1371,7 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(
-                name="resized_image", 
+                name="resized_image",
                 required=True,
                 type_hint=List[PIL.Image.Image],
                 description="The resized image. should be generated using a resize step",
@@ -1370,11 +1380,13 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
 
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
-        return [OutputParam(
-            name="processed_image",
-            type_hint=torch.Tensor,
-            description="The processed image",
-        )]
+        return [
+            OutputParam(
+                name="processed_image",
+                type_hint=torch.Tensor,
+                description="The processed image",
+            )
+        ]
 
     @torch.no_grad()
     def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
@@ -1395,7 +1407,8 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
 # auto_docstring
 class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
     """
-    Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of processed images.
+    Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of
+    processed images.
 
       Components:
           image_processor (`VaeImageProcessor`)
@@ -1408,6 +1421,7 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
           processed_image (`Tensor`):
               The processed image
     """
+
     model_name = "qwenimage-edit-plus"
 
     @property
@@ -1427,20 +1441,24 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
 
     @property
     def inputs(self) -> List[InputParam]:
-        return [InputParam(
-            name="resized_image",
-            required=True,
-            type_hint=List[PIL.Image.Image],
-            description="The resized image. should be generated using a resize step",
-        )]
+        return [
+            InputParam(
+                name="resized_image",
+                required=True,
+                type_hint=List[PIL.Image.Image],
+                description="The resized image. should be generated using a resize step",
+            )
+        ]
 
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
-        return [OutputParam(
-            name="processed_image",
-            type_hint=torch.Tensor,
-            description="The processed image",
-        )]
+        return [
+            OutputParam(
+                name="processed_image",
+                type_hint=torch.Tensor,
+                description="The processed image",
+            )
+        ]
 
     @torch.no_grad()
     def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
@@ -1472,6 +1490,7 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
 # 5. VAE ENCODER
 # ====================
 
+
 # auto_docstring
 class QwenImageVaeEncoderStep(ModularPipelineBlocks):
     """
@@ -1509,7 +1528,9 @@ class QwenImageVaeEncoderStep(ModularPipelineBlocks):
             output (OutputParam, optional): Output parameter for the image latents. Defaults to "image_latents".
         """
         if input is None:
-            input = InputParam(name="processed_image", required=True, type_hint=torch.Tensor, description="The image tensor to encode")
+            input = InputParam(
+                name="processed_image", required=True, type_hint=torch.Tensor, description="The image tensor to encode"
+            )
 
         if output is None:
             output = OutputParam.template("image_latents")
@@ -1539,13 +1560,13 @@ class QwenImageVaeEncoderStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            self._input, # default is "processed_image"
+            self._input,  # default is "processed_image"
             InputParam.template("generator"),
         ]
 
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
-        return [self._output] # default is "image_latents"
+        return [self._output]  # default is "image_latents"
 
     @torch.no_grad()
     def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -> PipelineState:
@@ -1588,9 +1609,8 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
     VAE Encoder step that converts `control_image` into latent representations control_image_latents.
 
       Components:
-          vae (`AutoencoderKLQwenImage`)
-          controlnet (`QwenImageControlNetModel`)
-          control_image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) controlnet (`QwenImageControlNetModel`) control_image_processor
+          (`VaeImageProcessor`)
 
       Inputs:
           control_image (`Image`):
@@ -1606,6 +1626,7 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
           control_image_latents (`Tensor`):
               The latents representing the control image
     """
+
     model_name = "qwenimage"
 
     @property
@@ -1720,6 +1741,7 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
 # 6. PERMUTE LATENTS
 # ====================
 
+
 # auto_docstring
 class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
     """
@@ -1733,11 +1755,12 @@ class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
           image_latents (`Tensor`):
               The latent representation of the input image. (permuted from [B, C, 1, H, W] to [B, 1, C, H, W])
     """
+
     model_name = "qwenimage-layered"
 
     @property
     def description(self) -> str:
-        return f"Permute image latents from (B, C, 1, H, W) to (B, 1, C, H, W) for Layered packing."
+        return "Permute image latents from (B, C, 1, H, W) to (B, 1, C, H, W) for Layered packing."
 
     @property
     def inputs(self) -> List[InputParam]:
@@ -1760,4 +1783,4 @@ class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
         block_state.image_latents = latents.permute(0, 2, 1, 3, 4)
 
         self.set_block_state(state, block_state)
-        return components, state
\ No newline at end of file
+        return components, state
diff --git a/src/diffusers/modular_pipelines/qwenimage/inputs.py b/src/diffusers/modular_pipelines/qwenimage/inputs.py
index 0e03242e5e..818bbca5ed 100644
--- a/src/diffusers/modular_pipelines/qwenimage/inputs.py
+++ b/src/diffusers/modular_pipelines/qwenimage/inputs.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Tuple, Optional
+from typing import List, Optional, Tuple
 
 import torch
 
@@ -117,7 +117,8 @@ class QwenImageTextInputsStep(ModularPipelineBlocks):
         1. Determines `batch_size` and `dtype` based on `prompt_embeds`
         2. Ensures all text embeddings have consistent batch sizes (batch_size * num_images_per_prompt)
 
-      This block should be placed after all encoder steps to process the text embeddings before they are used in subsequent pipeline steps.
+      This block should be placed after all encoder steps to process the text embeddings before they are used in
+      subsequent pipeline steps.
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -145,6 +146,7 @@ class QwenImageTextInputsStep(ModularPipelineBlocks):
           negative_prompt_embeds_mask (`Tensor`):
               The negative prompt embeddings mask. (batch-expanded)
     """
+
     model_name = "qwenimage"
 
     @property
@@ -271,8 +273,8 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
           num_images_per_prompt (`int`, *optional*, defaults to 1):
               The number of images to generate per prompt.
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           height (`int`, *optional*):
               The height in pixels of the generated image.
           width (`int`, *optional*):
@@ -300,7 +302,7 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
         self,
         image_latent_inputs: Optional[List[InputParam]] = None,
         additional_batch_inputs: Optional[List[InputParam]] = None,
-    ):   
+    ):
         # by default, process `image_latents`
         if image_latent_inputs is None:
             image_latent_inputs = [InputParam.template("image_latents")]
@@ -319,7 +321,9 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
         else:
             for input_param in additional_batch_inputs:
                 if not isinstance(input_param, InputParam):
-                    raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
+                    raise ValueError(
+                        f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
+                    )
 
         self._image_latent_inputs = image_latent_inputs
         self._additional_batch_inputs = additional_batch_inputs
@@ -376,13 +380,17 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
                 name="image_width",
                 type_hint=int,
                 description="The image width calculated from the image latents dimension",
-            )
+            ),
         ]
 
         # `height`/`width` are not new outputs, but they will be updated if any image latent inputs are provided
         if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
-            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
+            outputs.append(
+                OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
+            )
+            outputs.append(
+                OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
+            )
 
         # image latent inputs are modified in place (patchified and batch-expanded)
         for input_param in self._image_latent_inputs:
@@ -479,8 +487,8 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
           num_images_per_prompt (`int`, *optional*, defaults to 1):
               The number of images to generate per prompt.
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           height (`int`, *optional*):
               The height in pixels of the generated image.
           width (`int`, *optional*):
@@ -526,7 +534,9 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
         else:
             for input_param in additional_batch_inputs:
                 if not isinstance(input_param, InputParam):
-                    raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
+                    raise ValueError(
+                        f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
+                    )
 
         self._image_latent_inputs = image_latent_inputs
         self._additional_batch_inputs = additional_batch_inputs
@@ -587,11 +597,15 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
                 description="The image widths calculated from the image latents dimension",
             ),
         ]
-        
+
         # `height`/`width` are updated if any image latent inputs are provided
         if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
-            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
+            outputs.append(
+                OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
+            )
+            outputs.append(
+                OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
+            )
 
         # image latent inputs are modified in place (patchified, concatenated, and batch-expanded)
         for input_param in self._image_latent_inputs:
@@ -686,11 +700,13 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
 
 # same as QwenImageAdditionalInputsStep, but with layered pachifier.
 
+
 # auto_docstring
 class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
     """
     Input processing step for Layered that:
-        1. For image latent inputs: Updates height/width if None, patchifies with layered pachifier, and expands batch size
+        1. For image latent inputs: Updates height/width if None, patchifies with layered pachifier, and expands batch
+           size
         2. For additional batch inputs: Expands batch dimensions to match final batch size
 
       Configured inputs:
@@ -705,8 +721,8 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
           num_images_per_prompt (`int`, *optional*, defaults to 1):
               The number of images to generate per prompt.
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           image_latents (`Tensor`):
               image latents used to guide the image generation. Can be generated from vae_encoder step.
 
@@ -720,8 +736,8 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
           width (`int`):
               if not provided, updated to image width
           image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified with layered
-              pachifier and batch-expanded)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified
+              with layered pachifier and batch-expanded)
     """
 
     model_name = "qwenimage-layered"
@@ -748,7 +764,9 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
         else:
             for input_param in additional_batch_inputs:
                 if not isinstance(input_param, InputParam):
-                    raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
+                    raise ValueError(
+                        f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
+                    )
 
         self._image_latent_inputs = image_latent_inputs
         self._additional_batch_inputs = additional_batch_inputs
@@ -808,8 +826,12 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
         ]
 
         if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
-            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
+            outputs.append(
+                OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
+            )
+            outputs.append(
+                OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
+            )
 
         # Add outputs for image latent inputs (patchified with layered pachifier and batch-expanded)
         for input_param in self._image_latent_inputs:
@@ -895,10 +917,11 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
 
       Inputs:
           control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
           batch_size (`int`, *optional*, defaults to 1):
-              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
-              generated in input step.
+              Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
+              be generated in input step.
           num_images_per_prompt (`int`, *optional*, defaults to 1):
               The number of images to generate per prompt.
           height (`int`, *optional*):
@@ -914,6 +937,7 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
           width (`int`):
               if not provided, updated to control image width
     """
+
     model_name = "qwenimage"
 
     @property
@@ -923,17 +947,26 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam(name="control_image_latents", required=True, type_hint=torch.Tensor, description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step."),
+            InputParam(
+                name="control_image_latents",
+                required=True,
+                type_hint=torch.Tensor,
+                description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.",
+            ),
             InputParam.template("batch_size"),
             InputParam.template("num_images_per_prompt"),
             InputParam.template("height"),
             InputParam.template("width"),
         ]
-    
+
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
-            OutputParam(name="control_image_latents", type_hint=torch.Tensor, description="The control image latents (patchified and batch-expanded)."),
+            OutputParam(
+                name="control_image_latents",
+                type_hint=torch.Tensor,
+                description="The control image latents (patchified and batch-expanded).",
+            ),
             OutputParam(name="height", type_hint=int, description="if not provided, updated to control image height"),
             OutputParam(name="width", type_hint=int, description="if not provided, updated to control image width"),
         ]
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
index b50e41bb50..5837799d34 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
@@ -13,9 +13,10 @@
 # limitations under the License.
 
 import torch
+
 from ...utils import logging
 from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
+from ..modular_pipeline_utils import InputParam, InsertableDict, OutputParam
 from .before_denoise import (
     QwenImageControlNetBeforeDenoiserStep,
     QwenImageCreateMaskLatentsStep,
@@ -65,9 +66,8 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
     Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block.
 
       Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
+          The tokenizer to use guider (`ClassifierFreeGuidance`)
 
       Inputs:
           prompt (`str`, *optional*):
@@ -114,8 +114,7 @@ class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
        - Creates `image_latents`.
 
       Components:
-          image_mask_processor (`InpaintProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_mask_processor (`InpaintProcessor`) vae (`AutoencoderKLQwenImage`)
 
       Inputs:
           mask_image (`Image`):
@@ -162,8 +161,7 @@ class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
     Vae encoder step that preprocess andencode the image inputs into their latent representations.
 
       Components:
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_processor (`VaeImageProcessor`) vae (`AutoencoderKLQwenImage`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -218,9 +216,8 @@ class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
        - if `control_image` is not provided, step will be skipped.
 
       Components:
-          vae (`AutoencoderKLQwenImage`)
-          controlnet (`QwenImageControlNetModel`)
-          control_image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) controlnet (`QwenImageControlNetModel`) control_image_processor
+          (`VaeImageProcessor`)
 
       Inputs:
           control_image (`Image`, *optional*):
@@ -380,7 +377,9 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
     block_classes = [
         QwenImageTextInputsStep(),
         QwenImageAdditionalInputsStep(
-            additional_batch_inputs=[InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")]
+            additional_batch_inputs=[
+                InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")
+            ]
         ),
     ]
     block_names = ["text_inputs", "additional_inputs"]
@@ -401,15 +400,14 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
        - Create the pachified latents `mask` based on the processedmask image.
 
       Components:
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          pachifier (`QwenImagePachifier`)
+          scheduler (`FlowMatchEulerDiscreteScheduler`) pachifier (`QwenImagePachifier`)
 
       Inputs:
           latents (`Tensor`):
               The initial random noised, can be generated in prepare latent step.
           image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
-              vae encoder and updated in input step.)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
+              generated from vae encoder and updated in input step.)
           timesteps (`Tensor`):
               The timesteps to use for the denoising process. Can be generated in set_timesteps step.
           processed_mask_image (`Tensor`):
@@ -450,13 +448,12 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
     """
-    step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
+    step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
+    (timesteps, latents, rope inputs etc.).
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -524,13 +521,12 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     """
-    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
+    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
+    task.
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -606,13 +602,12 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
     """
-    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
+    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
+    task.
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -686,14 +681,12 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
     """
-    step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
+    step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
+    (timesteps, latents, rope inputs etc.).
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          controlnet (`QwenImageControlNetModel`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
+          (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -707,7 +700,8 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
           negative_prompt_embeds_mask (`Tensor`, *optional*):
               mask for the negative text embeddings. Can be generated from text_encoder step.
           control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
           height (`int`, *optional*):
               The height in pixels of the generated image.
           width (`int`, *optional*):
@@ -773,14 +767,12 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     """
-    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
+    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
+    task.
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          controlnet (`QwenImageControlNetModel`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
+          (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -802,7 +794,8 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
           processed_mask_image (`Tensor`, *optional*):
               The processed mask image
           control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
           latents (`Tensor`, *optional*):
               Pre-generated noisy latents for image generation.
           generator (`Generator`, *optional*):
@@ -868,14 +861,12 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
     """
-    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
+    Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
+    task.
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          controlnet (`QwenImageControlNetModel`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
+          (`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -895,7 +886,8 @@ class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
           image_latents (`Tensor`):
               image latents used to guide the image generation. Can be generated from vae_encoder step.
           control_image_latents (`Tensor`):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
           latents (`Tensor`, *optional*):
               Pre-generated noisy latents for image generation.
           generator (`Generator`, *optional*):
@@ -1030,12 +1022,12 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
     Decode step that decodes the latents to images and postprocess the generated image.
 
       Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)
 
       Inputs:
           latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
 
@@ -1057,19 +1049,21 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
     """
-    Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image.
+    Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask
+    overally to the original image.
 
       Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_mask_processor (`InpaintProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_mask_processor (`InpaintProcessor`)
 
       Inputs:
           latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
           mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.
 
       Outputs:
           images (`List`):
@@ -1125,17 +1119,11 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
       - for text-to-image generation, all you need to provide is `prompt`
 
       Components:
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
-          image_mask_processor (`InpaintProcessor`)
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
-          controlnet (`QwenImageControlNetModel`)
-          control_image_processor (`VaeImageProcessor`)
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          transformer (`QwenImageTransformer2DModel`)
+          text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
+          The tokenizer to use guider (`ClassifierFreeGuidance`) image_mask_processor (`InpaintProcessor`) vae
+          (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`) controlnet (`QwenImageControlNetModel`)
+          control_image_processor (`VaeImageProcessor`) pachifier (`QwenImagePachifier`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           prompt (`str`, *optional*):
@@ -1185,7 +1173,8 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
           strength (`float`, *optional*, defaults to 0.9):
               Strength for img2img/inpainting.
           control_image_latents (`Tensor`, *optional*):
-              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
+              The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
+              step.
           control_guidance_start (`float`, *optional*, defaults to 0.0):
               When to start applying ControlNet.
           control_guidance_end (`float`, *optional*, defaults to 1.0):
@@ -1195,7 +1184,8 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
           mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.
 
       Outputs:
           images (`List`):
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
index 0c1fa00842..e1e5c43354 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 from typing import Optional
+
 import torch
 
 from ...utils import logging
 from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
+from ..modular_pipeline_utils import InputParam, InsertableDict, OutputParam
 from .before_denoise import (
     QwenImageCreateMaskLatentsStep,
     QwenImageEditRoPEInputsStep,
@@ -63,10 +64,8 @@ class QwenImageEditVLEncoderStep(SequentialPipelineBlocks):
     QwenImage-Edit VL encoder step that encode the image and text prompts together.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -113,9 +112,8 @@ class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks):
     Vae encoder step that encode the image inputs into their latent representations.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
+          (`AutoencoderKLQwenImage`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -155,9 +153,8 @@ class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks):
        - create image latents.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          image_mask_processor (`InpaintProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_resize_processor (`VaeImageProcessor`) image_mask_processor (`InpaintProcessor`) vae
+          (`AutoencoderKLQwenImage`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -354,7 +351,10 @@ class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageTextInputsStep(),
-        QwenImageAdditionalInputsStep(additional_batch_inputs=[InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")]
+        QwenImageAdditionalInputsStep(
+            additional_batch_inputs=[
+                InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")
+            ]
         ),
     ]
     block_names = ["text_inputs", "additional_inputs"]
@@ -377,15 +377,14 @@ class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
        - Create the patchified latents `mask` based on the processed mask image.
 
       Components:
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          pachifier (`QwenImagePachifier`)
+          scheduler (`FlowMatchEulerDiscreteScheduler`) pachifier (`QwenImagePachifier`)
 
       Inputs:
           latents (`Tensor`):
               The initial random noised, can be generated in prepare latent step.
           image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
-              vae encoder and updated in input step.)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
+              generated from vae encoder and updated in input step.)
           timesteps (`Tensor`):
               The timesteps to use for the denoising process. Can be generated in set_timesteps step.
           processed_mask_image (`Tensor`):
@@ -426,10 +425,8 @@ class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
     Core denoising workflow for QwenImage-Edit edit (img2img) task.
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -502,10 +499,8 @@ class QwenImageEditInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     Core denoising workflow for QwenImage-Edit edit inpaint task.
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -623,12 +618,12 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
     Decode step that decodes the latents to images and postprocess the generated image.
 
       Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)
 
       Inputs:
           latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
 
@@ -650,19 +645,21 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
 # auto_docstring
 class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
     """
-    Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask overlay to the original image.
+    Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask
+    overlay to the original image.
 
       Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_mask_processor (`InpaintProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_mask_processor (`InpaintProcessor`)
 
       Inputs:
           latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
           mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.
 
       Outputs:
           images (`List`):
@@ -719,19 +716,14 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
     """
     Auto Modular pipeline for edit (img2img) and edit inpaint tasks using QwenImage-Edit.
       - for edit (img2img) generation, you need to provide `image`
-      - for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop`
+      - for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide
+        `padding_mask_crop`
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
-          image_mask_processor (`InpaintProcessor`)
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          transformer (`QwenImageTransformer2DModel`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`) image_mask_processor (`InpaintProcessor`) vae
+          (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`) pachifier (`QwenImagePachifier`) scheduler
+          (`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -771,7 +763,8 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
           mask_overlay_kwargs (`Dict`, *optional*):
-              The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
+              The kwargs for the postprocess step to apply the mask overlay. generated in
+              InpaintProcessImagesInputStep.
 
       Outputs:
           images (`List`):
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
index 726c000f4b..37656cef5d 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import torch
 from ...utils import logging
 from ..modular_pipeline import SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
+from ..modular_pipeline_utils import InsertableDict, OutputParam
 from .before_denoise import (
     QwenImageEditPlusRoPEInputsStep,
     QwenImagePrepareLatentsStep,
@@ -55,10 +54,8 @@ class QwenImageEditPlusVLEncoderStep(SequentialPipelineBlocks):
     QwenImage-Edit Plus VL encoder step that encodes the image and text prompts together.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -107,9 +104,8 @@ class QwenImageEditPlusVaeEncoderStep(SequentialPipelineBlocks):
       Each image is resized independently based on its own aspect ratio to 1024x1024 target area.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
+          (`AutoencoderKLQwenImage`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -231,10 +227,8 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
     Core denoising workflow for QwenImage-Edit Plus edit (img2img) task.
 
       Components:
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -311,12 +305,12 @@ class QwenImageEditPlusDecodeStep(SequentialPipelineBlocks):
     Decode step that decodes the latents to images and postprocesses the generated image.
 
       Components:
-          vae (`AutoencoderKLQwenImage`)
-          image_processor (`VaeImageProcessor`)
+          vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)
 
       Inputs:
           latents (`Tensor`):
-              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
+              The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
+              step.
           output_type (`str`, *optional*, defaults to pil):
               Output format: 'pil', 'np', 'pt'.
 
@@ -357,14 +351,9 @@ class QwenImageEditPlusAutoBlocks(SequentialPipelineBlocks):
       - VL encoder uses 384x384 target area, VAE encoder uses 1024x1024 target area.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          guider (`ClassifierFreeGuidance`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
-          pachifier (`QwenImagePachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`) image_processor (`VaeImageProcessor`) vae
+          (`AutoencoderKLQwenImage`) pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`)
           transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
index 37a06e9af2..fdfeab0488 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import torch
 from ...utils import logging
 from ..modular_pipeline import SequentialPipelineBlocks
 from ..modular_pipeline_utils import InsertableDict, OutputParam
@@ -53,14 +52,12 @@ logger = logging.get_logger(__name__)
 # auto_docstring
 class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
     """
-    QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not provided.
+    QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not
+    provided.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) tokenizer (`Qwen2Tokenizer`): The tokenizer to use guider (`ClassifierFreeGuidance`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -116,9 +113,8 @@ class QwenImageLayeredVaeEncoderStep(SequentialPipelineBlocks):
     Vae encoder step that encode the image inputs into their latent representations.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
+          image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
+          (`AutoencoderKLQwenImage`)
 
       Inputs:
           image (`Union[Image, List]`):
@@ -203,8 +199,8 @@ class QwenImageLayeredInputStep(SequentialPipelineBlocks):
           width (`int`):
               if not provided, updated to image width
           image_latents (`Tensor`):
-              image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified with layered
-              pachifier and batch-expanded)
+              image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified
+              with layered pachifier and batch-expanded)
     """
 
     model_name = "qwenimage-layered"
@@ -230,10 +226,8 @@ class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
     Core denoising workflow for QwenImage-Layered img2img task.
 
       Components:
-          pachifier (`QwenImageLayeredPachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          guider (`ClassifierFreeGuidance`)
-          transformer (`QwenImageTransformer2DModel`)
+          pachifier (`QwenImageLayeredPachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
+          (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           num_images_per_prompt (`int`, *optional*, defaults to 1):
@@ -317,16 +311,10 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
     Auto Modular pipeline for layered denoising tasks using QwenImage-Layered.
 
       Components:
-          image_resize_processor (`VaeImageProcessor`)
-          text_encoder (`Qwen2_5_VLForConditionalGeneration`)
-          processor (`Qwen2VLProcessor`)
-          tokenizer (`Qwen2Tokenizer`): The tokenizer to use
-          guider (`ClassifierFreeGuidance`)
-          image_processor (`VaeImageProcessor`)
-          vae (`AutoencoderKLQwenImage`)
-          pachifier (`QwenImageLayeredPachifier`)
-          scheduler (`FlowMatchEulerDiscreteScheduler`)
-          transformer (`QwenImageTransformer2DModel`)
+          image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
+          (`Qwen2VLProcessor`) tokenizer (`Qwen2Tokenizer`): The tokenizer to use guider (`ClassifierFreeGuidance`)
+          image_processor (`VaeImageProcessor`) vae (`AutoencoderKLQwenImage`) pachifier (`QwenImageLayeredPachifier`)
+          scheduler (`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)
 
       Inputs:
           image (`Union[Image, List]`):