From 2a81f2ec5417efdc7773937dd7db2f675a46b66a Mon Sep 17 00:00:00 2001
From: yiyixuxu <yixu310@gmail.com>
Date: Sat, 10 Jan 2026 12:15:36 +0100
Subject: [PATCH] style

---
 .../qwenimage/modular_blocks_qwenimage.py     | 86 ++++++++++++-------
 .../modular_blocks_qwenimage_edit.py          | 46 ++++++----
 .../modular_blocks_qwenimage_edit_plus.py     | 26 +++---
 .../modular_blocks_qwenimage_layered.py       | 47 +++++-----
 4 files changed, 116 insertions(+), 89 deletions(-)

diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
index 19feffe77e..d54dca5f5a 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
@@ -58,7 +58,8 @@ logger = logging.get_logger(__name__)
 # 1. TEXT ENCODER
 # ====================
 
-#auto_docstring
+
+# auto_docstring
 class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
     """
     class QwenImageAutoTextEncoderStep
@@ -76,11 +77,8 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
       Configs:
 
           prompt_template_encode (default: <|im_start|>system
-    Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
-    <|im_start|>user
-    {}<|im_end|>
-    <|im_start|>assistant
-    )
+    Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
+    objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
 
           prompt_template_encode_start_idx (default: 34)
 
@@ -111,6 +109,7 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
           negative_prompt_embeds_mask (`Tensor`):
               The negative prompt embeddings mask
     """
+
     model_name = "qwenimage"
     block_classes = [QwenImageTextEncoderStep()]
     block_names = ["text_encoder"]
@@ -127,7 +126,8 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
 # 2. VAE ENCODER
 # ====================
 
-#auto_docstring
+
+# auto_docstring
 class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageInpaintVaeEncoderStep
@@ -175,6 +175,7 @@ class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
           image_latents (`Tensor`):
               The latents representing the reference image(s). Single tensor or list depending on input.
     """
+
     model_name = "qwenimage"
     block_classes = [QwenImageInpaintProcessImagesInputStep(), QwenImageVaeEncoderStep()]
     block_names = ["preprocess", "encode"]
@@ -189,7 +190,7 @@ class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
         )
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageImg2ImgVaeEncoderStep
@@ -223,6 +224,7 @@ class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
           image_latents (`Tensor`):
               The latents representing the reference image(s). Single tensor or list depending on input.
     """
+
     model_name = "qwenimage"
 
     block_classes = [QwenImageProcessImagesInputStep(), QwenImageVaeEncoderStep()]
@@ -250,13 +252,12 @@ class QwenImageAutoVaeEncoderStep(AutoPipelineBlocks):
 
 
 # optional controlnet vae encoder
-#auto_docstring
+# auto_docstring
 class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
     """
     class QwenImageOptionalControlNetVaeEncoderStep
 
-      Vae encoder step that encode the image inputs into their latent representations.
-      This is an auto pipeline block.
+      Vae encoder step that encode the image inputs into their latent representations. This is an auto pipeline block.
        - `QwenImageControlNetVaeEncoderStep` (controlnet) is used when `control_image` is provided.
        - if `control_image` is not provided, step will be skipped.
 
@@ -287,6 +288,7 @@ class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
           control_image_latents (`Tensor`):
               The latents representing the control image
     """
+
     block_classes = [QwenImageControlNetVaeEncoderStep]
     block_names = ["controlnet"]
     block_trigger_inputs = ["control_image"]
@@ -307,7 +309,7 @@ class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
 
 
 # assemble input steps
-#auto_docstring
+# auto_docstring
 class QwenImageImg2ImgInputStep(SequentialPipelineBlocks):
     """
     class QwenImageImg2ImgInputStep
@@ -353,6 +355,7 @@ class QwenImageImg2ImgInputStep(SequentialPipelineBlocks):
           image_width (`int`):
               The image width calculated from the image latents dimension
     """
+
     model_name = "qwenimage"
     block_classes = [QwenImageTextInputsStep(), QwenImageAdditionalInputsStep(image_latent_inputs=["image_latents"])]
     block_names = ["text_inputs", "additional_inputs"]
@@ -364,7 +367,7 @@ class QwenImageImg2ImgInputStep(SequentialPipelineBlocks):
         " - update height/width based `image_latents`, patchify `image_latents`."
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageInpaintInputStep(SequentialPipelineBlocks):
     """
     class QwenImageInpaintInputStep
@@ -412,6 +415,7 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
           image_width (`int`):
               The image width calculated from the image latents dimension
     """
+
     model_name = "qwenimage"
     block_classes = [
         QwenImageTextInputsStep(),
@@ -429,7 +433,7 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
 
 
 # assemble prepare latents steps
-#auto_docstring
+# auto_docstring
 class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
     """
     class QwenImageInpaintPrepareLatentsStep
@@ -450,7 +454,8 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
               The initial random noised, can be generated in prepare latent step.
 
           image_latents (`Tensor`):
-              The image latents to use for the denoising process. Can be generated in vae encoder and packed in input step.
+              The image latents to use for the denoising process. Can be generated in vae encoder and packed in input
+              step.
 
           timesteps (`Tensor`):
               The timesteps to use for the denoising process. Can be generated in set_timesteps step.
@@ -472,6 +477,7 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
           mask (`Tensor`):
               The mask to use for the inpainting process.
     """
+
     model_name = "qwenimage"
     block_classes = [QwenImagePrepareLatentsWithStrengthStep(), QwenImageCreateMaskLatentsStep()]
     block_names = ["add_noise_to_latents", "create_mask_latents"]
@@ -489,12 +495,13 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
 
 
 # Qwen Image (text2image)
-#auto_docstring
+# auto_docstring
 class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageCoreDenoiseStep
 
-      step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
+      step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the
+      inputs (timesteps, latents, rope inputs etc.).
 
       Components:
 
@@ -570,20 +577,22 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def description(self):
         return "step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.)."
-    
+
     @property
     def outputs(self):
         return [
             OutputParam.latents(),
         ]
 
+
 # Qwen Image (inpainting)
-#auto_docstring
+# auto_docstring
 class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageInpaintCoreDenoiseStep
 
-      Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
+      Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for
+      inpaint task.
 
       Components:
 
@@ -675,13 +684,15 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
             OutputParam.latents(),
         ]
 
+
 # Qwen Image (image2image)
-#auto_docstring
+# auto_docstring
 class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageImg2ImgCoreDenoiseStep
 
-      Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
+      Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for
+      img2img task.
 
       Components:
 
@@ -771,13 +782,15 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
             OutputParam.latents(),
         ]
 
+
 # Qwen Image (text2image) with controlnet
-#auto_docstring
+# auto_docstring
 class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageControlNetCoreDenoiseStep
 
-      step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
+      step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the
+      inputs (timesteps, latents, rope inputs etc.).
 
       Components:
 
@@ -871,20 +884,22 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def description(self):
         return "step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.)."
- 
+
     @property
     def outputs(self):
         return [
             OutputParam.latents(),
         ]
 
+
 # Qwen Image (inpainting) with controlnet
-#auto_docstring
+# auto_docstring
 class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageControlNetInpaintCoreDenoiseStep
 
-      Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
+      Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for
+      inpaint task.
 
       Components:
 
@@ -996,12 +1011,13 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
 
 
 # Qwen Image (image2image) with controlnet
-#auto_docstring
+# auto_docstring
 class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageControlNetImg2ImgCoreDenoiseStep
 
-      Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
+      Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for
+      img2img task.
 
       Components:
 
@@ -1102,13 +1118,14 @@ class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def description(self):
         return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task."
-    
+
     @property
     def outputs(self):
         return [
             OutputParam.latents(),
         ]
 
+
 # Auto denoise step for QwenImage
 class QwenImageAutoCoreDenoiseStep(ConditionalPipelineBlocks):
     block_classes = [
@@ -1176,7 +1193,7 @@ class QwenImageAutoCoreDenoiseStep(ConditionalPipelineBlocks):
 
 
 # standard decode step works for most tasks except for inpaint
-#auto_docstring
+# auto_docstring
 class QwenImageDecodeStep(SequentialPipelineBlocks):
     """
     class QwenImageDecodeStep
@@ -1202,6 +1219,7 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
           images (`List`):
               Generated images.
     """
+
     model_name = "qwenimage"
     block_classes = [QwenImageDecoderStep(), QwenImageProcessImagesOutputStep()]
     block_names = ["decode", "postprocess"]
@@ -1212,12 +1230,13 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
 
 
 # Inpaint decode step
-#auto_docstring
+# auto_docstring
 class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
     """
     class QwenImageInpaintDecodeStep
 
-      Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image.
+      Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask
+      overally to the original image.
 
       Components:
 
@@ -1240,6 +1259,7 @@ class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
           images (`List`):
               Generated images.
     """
+
     model_name = "qwenimage"
     block_classes = [QwenImageDecoderStep(), QwenImageInpaintProcessImagesOutputStep()]
     block_names = ["decode", "postprocess"]
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
index cae6236eb5..37a438ea1f 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
@@ -55,7 +55,8 @@ logger = logging.get_logger(__name__)
 # 1. TEXT ENCODER
 # ====================
 
-#auto_docstring
+
+# auto_docstring
 class QwenImageEditVLEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageEditVLEncoderStep
@@ -75,11 +76,10 @@ class QwenImageEditVLEncoderStep(SequentialPipelineBlocks):
       Configs:
 
           prompt_template_encode (default: <|im_start|>system
-    Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
-    <|im_start|>user
-    <|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>
-    <|im_start|>assistant
-    )
+    Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
+    the user's text instruction should alter or modify the image. Generate a new image that meets the user's
+    requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
+    <|vision_start|><|image_pad|><|vision_end|>{}<|im_end|> <|im_start|>assistant )
 
           prompt_template_encode_start_idx (default: 64)
 
@@ -130,7 +130,7 @@ class QwenImageEditVLEncoderStep(SequentialPipelineBlocks):
 
 
 # Edit VAE encoder
-#auto_docstring
+# auto_docstring
 class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageEditVaeEncoderStep
@@ -163,6 +163,7 @@ class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks):
           image_latents (`Tensor`):
               The latents representing the reference image(s). Single tensor or list depending on input.
     """
+
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageEditResizeStep(),
@@ -177,7 +178,7 @@ class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks):
 
 
 # Edit Inpaint VAE encoder
-#auto_docstring
+# auto_docstring
 class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageEditInpaintVaeEncoderStep
@@ -224,6 +225,7 @@ class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks):
           image_latents (`Tensor`):
               The latents representing the reference image(s). Single tensor or list depending on input.
     """
+
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageEditResizeStep(),
@@ -265,7 +267,7 @@ class QwenImageEditAutoVaeEncoderStep(AutoPipelineBlocks):
 
 
 # assemble input steps
-#auto_docstring
+# auto_docstring
 class QwenImageEditInputStep(SequentialPipelineBlocks):
     """
     class QwenImageEditInputStep
@@ -313,6 +315,7 @@ class QwenImageEditInputStep(SequentialPipelineBlocks):
           image_width (`int`):
               The image width calculated from the image latents dimension
     """
+
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageTextInputsStep(),
@@ -329,7 +332,7 @@ class QwenImageEditInputStep(SequentialPipelineBlocks):
         )
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
     """
     class QwenImageEditInpaintInputStep
@@ -379,6 +382,7 @@ class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
           image_width (`int`):
               The image width calculated from the image latents dimension
     """
+
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageTextInputsStep(),
@@ -398,7 +402,7 @@ class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
 
 
 # assemble prepare latents steps
-#auto_docstring
+# auto_docstring
 class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
     """
     class QwenImageEditInpaintPrepareLatentsStep
@@ -419,7 +423,8 @@ class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
               The initial random noised, can be generated in prepare latent step.
 
           image_latents (`Tensor`):
-              The image latents to use for the denoising process. Can be generated in vae encoder and packed in input step.
+              The image latents to use for the denoising process. Can be generated in vae encoder and packed in input
+              step.
 
           timesteps (`Tensor`):
               The timesteps to use for the denoising process. Can be generated in set_timesteps step.
@@ -441,6 +446,7 @@ class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
           mask (`Tensor`):
               The mask to use for the inpainting process.
     """
+
     model_name = "qwenimage-edit"
     block_classes = [QwenImagePrepareLatentsWithStrengthStep(), QwenImageCreateMaskLatentsStep()]
     block_names = ["add_noise_to_latents", "create_mask_latents"]
@@ -455,7 +461,7 @@ class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
 
 
 # Qwen Image Edit (image2image) core denoise step
-#auto_docstring
+# auto_docstring
 class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageEditCoreDenoiseStep
@@ -547,7 +553,7 @@ class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
 
 
 # Qwen Image Edit (inpainting) core denoise step
-#auto_docstring
+# auto_docstring
 class QwenImageEditInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageEditInpaintCoreDenoiseStep
@@ -671,20 +677,21 @@ class QwenImageEditAutoCoreDenoiseStep(ConditionalPipelineBlocks):
             " - `QwenImageEditCoreDenoiseStep` when `image_latents` is provided\n"
             "Supports edit (img2img) and edit inpainting tasks for QwenImage-Edit."
         )
-    
+
     @property
     def outputs(self):
         return [
             OutputParam.latents(),
         ]
 
+
 # ====================
 # 4. DECODE
 # ====================
 
 
 # Decode step (standard)
-#auto_docstring
+# auto_docstring
 class QwenImageEditDecodeStep(SequentialPipelineBlocks):
     """
     class QwenImageEditDecodeStep
@@ -710,6 +717,7 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
           images (`List`):
               Generated images.
     """
+
     model_name = "qwenimage-edit"
     block_classes = [QwenImageDecoderStep(), QwenImageProcessImagesOutputStep()]
     block_names = ["decode", "postprocess"]
@@ -720,12 +728,13 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
 
 
 # Inpaint decode step
-#auto_docstring
+# auto_docstring
 class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
     """
     class QwenImageEditInpaintDecodeStep
 
-      Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask overlay to the original image.
+      Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask
+      overlay to the original image.
 
       Components:
 
@@ -748,6 +757,7 @@ class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
           images (`List`):
               Generated images.
     """
+
     model_name = "qwenimage-edit"
     block_classes = [QwenImageDecoderStep(), QwenImageInpaintProcessImagesOutputStep()]
     block_names = ["decode", "postprocess"]
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
index 2fcd633f0d..851b69f232 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
@@ -49,7 +49,7 @@ logger = logging.get_logger(__name__)
 # ====================
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageEditPlusVLEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageEditPlusVLEncoderStep
@@ -69,11 +69,10 @@ class QwenImageEditPlusVLEncoderStep(SequentialPipelineBlocks):
       Configs:
 
           prompt_template_encode (default: <|im_start|>system
-    Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
-    <|im_start|>user
-    {}<|im_end|>
-    <|im_start|>assistant
-    )
+    Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
+    the user's text instruction should alter or modify the image. Generate a new image that meets the user's
+    requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
+    {}<|im_end|> <|im_start|>assistant )
 
           img_template_encode (default: Picture {}: <|vision_start|><|image_pad|><|vision_end|>)
 
@@ -125,13 +124,13 @@ class QwenImageEditPlusVLEncoderStep(SequentialPipelineBlocks):
 # ====================
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageEditPlusVaeEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageEditPlusVaeEncoderStep
 
-      VAE encoder step that encodes image inputs into latent representations.
-      Each image is resized independently based on its own aspect ratio to 1024x1024 target area.
+      VAE encoder step that encodes image inputs into latent representations. Each image is resized independently based
+      on its own aspect ratio to 1024x1024 target area.
 
       Components:
 
@@ -182,7 +181,7 @@ class QwenImageEditPlusVaeEncoderStep(SequentialPipelineBlocks):
 
 
 # assemble input steps
-#auto_docstring
+# auto_docstring
 class QwenImageEditPlusInputStep(SequentialPipelineBlocks):
     """
     class QwenImageEditPlusInputStep
@@ -232,6 +231,7 @@ class QwenImageEditPlusInputStep(SequentialPipelineBlocks):
           image_width (`List`):
               The image widths calculated from the image latents dimension
     """
+
     model_name = "qwenimage-edit-plus"
     block_classes = [
         QwenImageTextInputsStep(),
@@ -251,7 +251,7 @@ class QwenImageEditPlusInputStep(SequentialPipelineBlocks):
 
 
 # Qwen Image Edit Plus (image2image) core denoise step
-#auto_docstring
+# auto_docstring
 class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageEditPlusCoreDenoiseStep
@@ -312,6 +312,7 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage-edit-plus"
     block_classes = [
         QwenImageEditPlusInputStep(),
@@ -346,7 +347,7 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
 # ====================
 
 
-#auto_docstring
+# auto_docstring
 class QwenImageEditPlusDecodeStep(SequentialPipelineBlocks):
     """
     class QwenImageEditPlusDecodeStep
@@ -372,6 +373,7 @@ class QwenImageEditPlusDecodeStep(SequentialPipelineBlocks):
           images (`List`):
               Generated images.
     """
+
     model_name = "qwenimage-edit-plus"
     block_classes = [QwenImageDecoderStep(), QwenImageProcessImagesOutputStep()]
     block_names = ["decode", "postprocess"]
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
index f647f16868..56fa1345a5 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
@@ -49,12 +49,14 @@ logger = logging.get_logger(__name__)
 # 1. TEXT ENCODER
 # ====================
 
-#auto_docstring
+
+# auto_docstring
 class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageLayeredTextEncoderStep
 
-      QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not provided.
+      QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not
+      provided.
 
       Components:
 
@@ -71,28 +73,23 @@ class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
       Configs:
 
           image_caption_prompt_en (default: <|im_start|>system
-    You are a helpful assistant.<|im_end|>
-    <|im_start|>user
-    # Image Annotator
-    You are a professional image annotator. Please write an image caption based on the input image:
+    You are a helpful assistant.<|im_end|> <|im_start|>user # Image Annotator You are a professional image annotator.
+    Please write an image caption based on the input image:
     1. Write the caption using natural, descriptive language without structured formats or rich text.
     2. Enrich caption details by including:
      - Object attributes, such as quantity, color, shape, size, material, state, position, actions, and so on
-     - Vision Relations between objects, such as spatial relations, functional relations, possessive relations, attachment relations, action relations, comparative relations, causal relations, and so on
+     - Vision Relations between objects, such as spatial relations, functional relations, possessive relations,
+       attachment relations, action relations, comparative relations, causal relations, and so on
      - Environmental details, such as weather, lighting, colors, textures, atmosphere, and so on
-     - Identify the text clearly visible in the image, without translation or explanation, and highlight it in the caption with quotation marks
+     - Identify the text clearly visible in the image, without translation or explanation, and highlight it in the
+       caption with quotation marks
     3. Maintain authenticity and accuracy:
      - Avoid generalizations
      - Describe all visible information in the image, while do not add information not explicitly shown in the image
-    <|vision_start|><|image_pad|><|vision_end|><|im_end|>
-    <|im_start|>assistant
-    )
+    <|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
 
           image_caption_prompt_cn (default: <|im_start|>system
-    You are a helpful assistant.<|im_end|>
-    <|im_start|>user
-    # 图像标注器
-    你是一个专业的图像标注器。请基于输入图像，撰写图注:
+    You are a helpful assistant.<|im_end|> <|im_start|>user # 图像标注器 你是一个专业的图像标注器。请基于输入图像，撰写图注:
     1. 使用自然、描述性的语言撰写图注，不要使用结构化形式或富文本形式。
     2. 通过加入以下内容，丰富图注细节：
      - 对象的属性：如数量、颜色、形状、大小、位置、材质、状态、动作等
@@ -102,16 +99,11 @@ class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
     3. 保持真实性与准确性：
      - 不要使用笼统的描述
      - 描述图像中所有可见的信息，但不要加入没有在图像中出现的内容
-    <|vision_start|><|image_pad|><|vision_end|><|im_end|>
-    <|im_start|>assistant
-    )
+    <|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
 
           prompt_template_encode (default: <|im_start|>system
-    Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
-    <|im_start|>user
-    {}<|im_end|>
-    <|im_start|>assistant
-    )
+    Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
+    objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
 
           prompt_template_encode_start_idx (default: 34)
 
@@ -174,7 +166,7 @@ class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
 
 
 # Edit VAE encoder
-#auto_docstring
+# auto_docstring
 class QwenImageLayeredVaeEncoderStep(SequentialPipelineBlocks):
     """
     class QwenImageLayeredVaeEncoderStep
@@ -210,6 +202,7 @@ class QwenImageLayeredVaeEncoderStep(SequentialPipelineBlocks):
           image_latents (`Tensor`):
               The latents representing the reference image(s). Single tensor or list depending on input.
     """
+
     model_name = "qwenimage-layered"
     block_classes = [
         QwenImageLayeredResizeStep(),
@@ -230,7 +223,7 @@ class QwenImageLayeredVaeEncoderStep(SequentialPipelineBlocks):
 
 
 # assemble input steps
-#auto_docstring
+# auto_docstring
 class QwenImageLayeredInputStep(SequentialPipelineBlocks):
     """
     class QwenImageLayeredInputStep
@@ -278,6 +271,7 @@ class QwenImageLayeredInputStep(SequentialPipelineBlocks):
           width (`int`):
               The width of the image output
     """
+
     model_name = "qwenimage-layered"
     block_classes = [
         QwenImageTextInputsStep(),
@@ -295,7 +289,7 @@ class QwenImageLayeredInputStep(SequentialPipelineBlocks):
 
 
 # Qwen Image Layered (image2image) core denoise step
-#auto_docstring
+# auto_docstring
 class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
     """
     class QwenImageLayeredCoreDenoiseStep
@@ -353,6 +347,7 @@ class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
           latents (`Tensor`):
               Denoised latents.
     """
+
     model_name = "qwenimage-layered"
     block_classes = [
         QwenImageLayeredInputStep(),