1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

add modular_auto_docstring!

This commit is contained in:
yiyixuxu
2026-01-10 11:55:03 +01:00
parent 34a743e2dc
commit ff09bf1a63
2 changed files with 1104 additions and 6 deletions

View File

@@ -58,8 +58,59 @@ logger = logging.get_logger(__name__)
# 1. TEXT ENCODER
# ====================
#auto_docstring
class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
"""
class QwenImageAutoTextEncoderStep
Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block.
Components:
text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use [subfolder=]
tokenizer (`Qwen2Tokenizer`): The tokenizer to use [subfolder=]
guider (`ClassifierFreeGuidance`) [subfolder=]
Configs:
prompt_template_encode (default: <|im_start|>system
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
)
prompt_template_encode_start_idx (default: 34)
tokenizer_max_length (default: 1024)
Inputs:
prompt (`str`, *optional*):
The prompt or prompts to guide image generation.
negative_prompt (`str`, *optional*):
The prompt or prompts not to guide the image generation.
max_sequence_length (`int`, *optional*, defaults to 1024):
Maximum sequence length for prompt encoding.
Outputs:
prompt_embeds (`Tensor`):
The prompt embeddings
prompt_embeds_mask (`Tensor`):
The encoder attention mask
negative_prompt_embeds (`Tensor`):
The negative prompt embeddings
negative_prompt_embeds_mask (`Tensor`):
The negative prompt embeddings mask
"""
model_name = "qwenimage"
block_classes = [QwenImageTextEncoderStep()]
block_names = ["text_encoder"]
@@ -76,8 +127,54 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
# 2. VAE ENCODER
# ====================
#auto_docstring
class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
"""
class QwenImageInpaintVaeEncoderStep
This step is used for processing image and mask inputs for inpainting tasks. It:
- Resizes the image to the target size, based on `height` and `width`.
- Processes and updates `image` and `mask_image`.
- Creates `image_latents`.
Components:
image_mask_processor (`InpaintProcessor`) [subfolder=]
vae (`AutoencoderKLQwenImage`) [subfolder=]
Inputs:
mask_image (`Image`):
Mask image for inpainting.
image (`Image`):
Input image for img2img, editing, or conditioning.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
padding_mask_crop (`int`, *optional*):
Padding for mask cropping in inpainting.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
Outputs:
processed_image (`None`):
processed_mask_image (`None`):
mask_overlay_kwargs (`Dict`):
The kwargs for the postprocess step to apply the mask overlay
image_latents (`Tensor`):
The latents representing the reference image(s). Single tensor or list depending on input.
"""
model_name = "qwenimage"
block_classes = [QwenImageInpaintProcessImagesInputStep(), QwenImageVaeEncoderStep()]
block_names = ["preprocess", "encode"]
@@ -92,7 +189,40 @@ class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
)
#auto_docstring
class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
"""
class QwenImageImg2ImgVaeEncoderStep
Vae encoder step that preprocess andencode the image inputs into their latent representations.
Components:
image_processor (`VaeImageProcessor`) [subfolder=]
vae (`AutoencoderKLQwenImage`) [subfolder=]
Inputs:
image (`Image`):
Input image for img2img, editing, or conditioning.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
Outputs:
processed_image (`None`):
image_latents (`Tensor`):
The latents representing the reference image(s). Single tensor or list depending on input.
"""
model_name = "qwenimage"
block_classes = [QwenImageProcessImagesInputStep(), QwenImageVaeEncoderStep()]
@@ -103,7 +233,6 @@ class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
return "Vae encoder step that preprocess andencode the image inputs into their latent representations."
# Auto VAE encoder
class QwenImageAutoVaeEncoderStep(AutoPipelineBlocks):
block_classes = [QwenImageInpaintVaeEncoderStep, QwenImageImg2ImgVaeEncoderStep]
block_names = ["inpaint", "img2img"]
@@ -121,7 +250,43 @@ class QwenImageAutoVaeEncoderStep(AutoPipelineBlocks):
# optional controlnet vae encoder
#auto_docstring
class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
"""
class QwenImageOptionalControlNetVaeEncoderStep
Vae encoder step that encode the image inputs into their latent representations.
This is an auto pipeline block.
- `QwenImageControlNetVaeEncoderStep` (controlnet) is used when `control_image` is provided.
- if `control_image` is not provided, step will be skipped.
Components:
vae (`AutoencoderKLQwenImage`) [subfolder=]
controlnet (`QwenImageControlNetModel`) [subfolder=]
control_image_processor (`VaeImageProcessor`) [subfolder=]
Inputs:
control_image (`Image`, *optional*):
Control image for ControlNet conditioning.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
Outputs:
control_image_latents (`Tensor`):
The latents representing the control image
"""
block_classes = [QwenImageControlNetVaeEncoderStep]
block_names = ["controlnet"]
block_trigger_inputs = ["control_image"]
@@ -142,7 +307,52 @@ class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
# assemble input steps
#auto_docstring
class QwenImageImg2ImgInputStep(SequentialPipelineBlocks):
"""
class QwenImageImg2ImgInputStep
Input step that prepares the inputs for the img2img denoising step. It:
Components:
pachifier (`QwenImagePachifier`) [subfolder=]
Inputs:
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
prompt_embeds_mask (`None`):
negative_prompt_embeds (`None`, *optional*):
negative_prompt_embeds_mask (`None`, *optional*):
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
Outputs:
batch_size (`int`):
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt
dtype (`dtype`):
Data type of model tensor inputs (determined by `prompt_embeds`)
image_height (`int`):
The image height calculated from the image latents dimension
image_width (`int`):
The image width calculated from the image latents dimension
"""
model_name = "qwenimage"
block_classes = [QwenImageTextInputsStep(), QwenImageAdditionalInputsStep(image_latent_inputs=["image_latents"])]
block_names = ["text_inputs", "additional_inputs"]
@@ -154,7 +364,54 @@ class QwenImageImg2ImgInputStep(SequentialPipelineBlocks):
" - update height/width based `image_latents`, patchify `image_latents`."
#auto_docstring
class QwenImageInpaintInputStep(SequentialPipelineBlocks):
"""
class QwenImageInpaintInputStep
Input step that prepares the inputs for the inpainting denoising step. It:
Components:
pachifier (`QwenImagePachifier`) [subfolder=]
Inputs:
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
prompt_embeds_mask (`None`):
negative_prompt_embeds (`None`, *optional*):
negative_prompt_embeds_mask (`None`, *optional*):
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
processed_mask_image (`None`, *optional*):
Outputs:
batch_size (`int`):
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt
dtype (`dtype`):
Data type of model tensor inputs (determined by `prompt_embeds`)
image_height (`int`):
The image height calculated from the image latents dimension
image_width (`int`):
The image width calculated from the image latents dimension
"""
model_name = "qwenimage"
block_classes = [
QwenImageTextInputsStep(),
@@ -172,7 +429,49 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
# assemble prepare latents steps
#auto_docstring
class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
"""
class QwenImageInpaintPrepareLatentsStep
This step prepares the latents/image_latents and mask inputs for the inpainting denoising step. It:
- Add noise to the image latents to create the latents input for the denoiser.
- Create the pachified latents `mask` based on the processedmask image.
Components:
scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]
pachifier (`QwenImagePachifier`) [subfolder=]
Inputs:
latents (`Tensor`):
The initial random noised, can be generated in prepare latent step.
image_latents (`Tensor`):
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input step.
timesteps (`Tensor`):
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
processed_mask_image (`Tensor`):
The processed mask to use for the inpainting process.
height (`None`):
width (`None`):
dtype (`None`):
Outputs:
initial_noise (`Tensor`):
The initial random noised used for inpainting denoising.
mask (`Tensor`):
The mask to use for the inpainting process.
"""
model_name = "qwenimage"
block_classes = [QwenImagePrepareLatentsWithStrengthStep(), QwenImageCreateMaskLatentsStep()]
block_names = ["add_noise_to_latents", "create_mask_latents"]
@@ -190,7 +489,66 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
# Qwen Image (text2image)
#auto_docstring
class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
"""
class QwenImageCoreDenoiseStep
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
Components:
pachifier (`QwenImagePachifier`) [subfolder=]
scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]
guider (`ClassifierFreeGuidance`) [subfolder=]
transformer (`QwenImageTransformer2DModel`) [subfolder=]
Inputs:
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
prompt_embeds_mask (`None`):
negative_prompt_embeds (`None`, *optional*):
negative_prompt_embeds_mask (`None`, *optional*):
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps.
sigmas (`List`, *optional*):
Custom sigmas for the denoising process.
attention_kwargs (`Dict`, *optional*):
Additional kwargs for attention processors.
**denoiser_input_fields (`Tensor`, *optional*):
conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
Outputs:
latents (`Tensor`):
Denoised latents.
"""
model_name = "qwenimage"
block_classes = [
QwenImageTextInputsStep(),
@@ -212,10 +570,81 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
@property
def description(self):
return "step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.)."
@property
def outputs(self):
return [
OutputParam.latents(),
]
# Qwen Image (inpainting)
#auto_docstring
class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
"""
class QwenImageInpaintCoreDenoiseStep
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
Components:
pachifier (`QwenImagePachifier`) [subfolder=]
scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]
guider (`ClassifierFreeGuidance`) [subfolder=]
transformer (`QwenImageTransformer2DModel`) [subfolder=]
Inputs:
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
prompt_embeds_mask (`None`):
negative_prompt_embeds (`None`, *optional*):
negative_prompt_embeds_mask (`None`, *optional*):
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
processed_mask_image (`None`, *optional*):
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps.
sigmas (`List`, *optional*):
Custom sigmas for the denoising process.
strength (`float`, *optional*, defaults to 0.9):
Strength for img2img/inpainting.
attention_kwargs (`Dict`, *optional*):
Additional kwargs for attention processors.
**denoiser_input_fields (`Tensor`, *optional*):
conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
Outputs:
latents (`Tensor`):
Denoised latents.
"""
model_name = "qwenimage"
block_classes = [
QwenImageInpaintInputStep(),
@@ -240,9 +669,78 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
def description(self):
return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task."
@property
def outputs(self):
return [
OutputParam.latents(),
]
# Qwen Image (image2image)
#auto_docstring
class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
"""
class QwenImageImg2ImgCoreDenoiseStep
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
Components:
pachifier (`QwenImagePachifier`) [subfolder=]
scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]
guider (`ClassifierFreeGuidance`) [subfolder=]
transformer (`QwenImageTransformer2DModel`) [subfolder=]
Inputs:
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
prompt_embeds_mask (`None`):
negative_prompt_embeds (`None`, *optional*):
negative_prompt_embeds_mask (`None`, *optional*):
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps.
sigmas (`List`, *optional*):
Custom sigmas for the denoising process.
strength (`float`, *optional*, defaults to 0.9):
Strength for img2img/inpainting.
attention_kwargs (`Dict`, *optional*):
Additional kwargs for attention processors.
**denoiser_input_fields (`Tensor`, *optional*):
conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
Outputs:
latents (`Tensor`):
Denoised latents.
"""
model_name = "qwenimage"
block_classes = [
QwenImageImg2ImgInputStep(),
@@ -267,9 +765,87 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
def description(self):
return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task."
@property
def outputs(self):
return [
OutputParam.latents(),
]
# Qwen Image (text2image) with controlnet
#auto_docstring
class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
"""
class QwenImageControlNetCoreDenoiseStep
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
Components:
pachifier (`QwenImagePachifier`) [subfolder=]
scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]
controlnet (`QwenImageControlNetModel`) [subfolder=]
guider (`ClassifierFreeGuidance`) [subfolder=]
transformer (`QwenImageTransformer2DModel`) [subfolder=]
Inputs:
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
prompt_embeds_mask (`None`):
negative_prompt_embeds (`None`, *optional*):
negative_prompt_embeds_mask (`None`, *optional*):
control_image_latents (`None`):
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps.
sigmas (`List`, *optional*):
Custom sigmas for the denoising process.
control_guidance_start (`float`, *optional*, defaults to 0.0):
When to start applying ControlNet.
control_guidance_end (`float`, *optional*, defaults to 1.0):
When to stop applying ControlNet.
controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
Scale for ControlNet conditioning.
**denoiser_input_fields (`None`, *optional*):
All conditional model inputs for the denoiser. It should contain prompt_embeds/negative_prompt_embeds,
txt_seq_lens/negative_txt_seq_lens.
attention_kwargs (`Dict`, *optional*):
Additional kwargs for attention processors.
Outputs:
latents (`Tensor`):
Denoised latents.
"""
model_name = "qwenimage"
block_classes = [
QwenImageTextInputsStep(),
@@ -295,10 +871,95 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
@property
def description(self):
return "step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.)."
@property
def outputs(self):
return [
OutputParam.latents(),
]
# Qwen Image (inpainting) with controlnet
#auto_docstring
class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
"""
class QwenImageControlNetInpaintCoreDenoiseStep
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
Components:
pachifier (`QwenImagePachifier`) [subfolder=]
scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]
controlnet (`QwenImageControlNetModel`) [subfolder=]
guider (`ClassifierFreeGuidance`) [subfolder=]
transformer (`QwenImageTransformer2DModel`) [subfolder=]
Inputs:
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
prompt_embeds_mask (`None`):
negative_prompt_embeds (`None`, *optional*):
negative_prompt_embeds_mask (`None`, *optional*):
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
processed_mask_image (`None`, *optional*):
control_image_latents (`None`):
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps.
sigmas (`List`, *optional*):
Custom sigmas for the denoising process.
strength (`float`, *optional*, defaults to 0.9):
Strength for img2img/inpainting.
control_guidance_start (`float`, *optional*, defaults to 0.0):
When to start applying ControlNet.
control_guidance_end (`float`, *optional*, defaults to 1.0):
When to stop applying ControlNet.
controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
Scale for ControlNet conditioning.
**denoiser_input_fields (`None`, *optional*):
All conditional model inputs for the denoiser. It should contain prompt_embeds/negative_prompt_embeds,
txt_seq_lens/negative_txt_seq_lens.
attention_kwargs (`Dict`, *optional*):
Additional kwargs for attention processors.
Outputs:
latents (`Tensor`):
Denoised latents.
"""
model_name = "qwenimage"
block_classes = [
QwenImageInpaintInputStep(),
@@ -327,9 +988,93 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
def description(self):
return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task."
@property
def outputs(self):
return [
OutputParam.latents(),
]
# Qwen Image (image2image) with controlnet
#auto_docstring
class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
"""
class QwenImageControlNetImg2ImgCoreDenoiseStep
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
Components:
pachifier (`QwenImagePachifier`) [subfolder=]
scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]
controlnet (`QwenImageControlNetModel`) [subfolder=]
guider (`ClassifierFreeGuidance`) [subfolder=]
transformer (`QwenImageTransformer2DModel`) [subfolder=]
Inputs:
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
prompt_embeds_mask (`None`):
negative_prompt_embeds (`None`, *optional*):
negative_prompt_embeds_mask (`None`, *optional*):
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
control_image_latents (`None`):
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
Torch generator for deterministic generation.
num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps.
sigmas (`List`, *optional*):
Custom sigmas for the denoising process.
strength (`float`, *optional*, defaults to 0.9):
Strength for img2img/inpainting.
control_guidance_start (`float`, *optional*, defaults to 0.0):
When to start applying ControlNet.
control_guidance_end (`float`, *optional*, defaults to 1.0):
When to stop applying ControlNet.
controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
Scale for ControlNet conditioning.
**denoiser_input_fields (`None`, *optional*):
All conditional model inputs for the denoiser. It should contain prompt_embeds/negative_prompt_embeds,
txt_seq_lens/negative_txt_seq_lens.
attention_kwargs (`Dict`, *optional*):
Additional kwargs for attention processors.
Outputs:
latents (`Tensor`):
Denoised latents.
"""
model_name = "qwenimage"
block_classes = [
QwenImageImg2ImgInputStep(),
@@ -357,7 +1102,12 @@ class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
@property
def description(self):
return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task."
@property
def outputs(self):
return [
OutputParam.latents(),
]
# Auto denoise step for QwenImage
class QwenImageAutoCoreDenoiseStep(ConditionalPipelineBlocks):
@@ -426,7 +1176,32 @@ class QwenImageAutoCoreDenoiseStep(ConditionalPipelineBlocks):
# standard decode step works for most tasks except for inpaint
#auto_docstring
class QwenImageDecodeStep(SequentialPipelineBlocks):
"""
class QwenImageDecodeStep
Decode step that decodes the latents to images and postprocess the generated image.
Components:
vae (`AutoencoderKLQwenImage`) [subfolder=]
image_processor (`VaeImageProcessor`) [subfolder=]
Inputs:
latents (`Tensor`):
The latents to decode, can be generated in the denoise step
output_type (`str`, *optional*, defaults to pil):
Output format: 'pil', 'np', 'pt''.
Outputs:
images (`List`):
Generated images.
"""
model_name = "qwenimage"
block_classes = [QwenImageDecoderStep(), QwenImageProcessImagesOutputStep()]
block_names = ["decode", "postprocess"]
@@ -437,7 +1212,34 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
# Inpaint decode step
#auto_docstring
class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
"""
class QwenImageInpaintDecodeStep
Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image.
Components:
vae (`AutoencoderKLQwenImage`) [subfolder=]
image_mask_processor (`InpaintProcessor`) [subfolder=]
Inputs:
latents (`Tensor`):
The latents to decode, can be generated in the denoise step
output_type (`str`, *optional*, defaults to pil):
Output format: 'pil', 'np', 'pt''.
mask_overlay_kwargs (`None`, *optional*):
Outputs:
images (`List`):
Generated images.
"""
model_name = "qwenimage"
block_classes = [QwenImageDecoderStep(), QwenImageInpaintProcessImagesOutputStep()]
block_names = ["decode", "postprocess"]

View File

@@ -0,0 +1,296 @@
# coding=utf-8
# Copyright 2025 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Auto Docstring Generator for Modular Pipeline Blocks
This script scans Python files for classes that have `# auto_docstring` comment above them
and inserts/updates the docstring from the class's `doc` property.
Run from the root of the repo:
python utils/modular_auto_docstring.py [path] [--fix_and_overwrite]
Examples:
# Check for auto_docstring markers (will error if found without proper docstring)
python utils/modular_auto_docstring.py
# Check specific directory
python utils/modular_auto_docstring.py src/diffusers/modular_pipelines/
# Fix and overwrite the docstrings
python utils/modular_auto_docstring.py --fix_and_overwrite
Usage in code:
# auto_docstring
class QwenImageAutoVaeEncoderStep(AutoPipelineBlocks):
# docstring will be automatically inserted here
@property
def doc(self):
return "Your docstring content..."
"""
import argparse
import ast
import glob
import importlib
import os
import re
import sys
# All paths are set with the intent you should run this script from the root of the repo
DIFFUSERS_PATH = "src/diffusers"
REPO_PATH = "."
# Pattern to match the auto_docstring comment
AUTO_DOCSTRING_PATTERN = re.compile(r"^\s*#\s*auto_docstring\s*$")
def setup_diffusers_import():
"""Setup import path to use the local diffusers module."""
src_path = os.path.join(REPO_PATH, "src")
if src_path not in sys.path:
sys.path.insert(0, src_path)
def get_module_from_filepath(filepath: str) -> str:
"""Convert a filepath to a module name."""
filepath = os.path.normpath(filepath)
if filepath.startswith("src" + os.sep):
filepath = filepath[4:]
if filepath.endswith(".py"):
filepath = filepath[:-3]
module_name = filepath.replace(os.sep, ".")
return module_name
def load_module(filepath: str):
"""Load a module from filepath."""
setup_diffusers_import()
module_name = get_module_from_filepath(filepath)
try:
module = importlib.import_module(module_name)
return module
except Exception as e:
print(f"Warning: Could not import module {module_name}: {e}")
return None
def get_doc_from_class(module, class_name: str) -> str:
"""Get the doc property from an instantiated class."""
if module is None:
return None
cls = getattr(module, class_name, None)
if cls is None:
return None
try:
instance = cls()
if hasattr(instance, "doc"):
return instance.doc
except Exception as e:
print(f"Warning: Could not instantiate {class_name}: {e}")
return None
def find_auto_docstring_classes(filepath: str) -> list:
"""
Find all classes in a file that have # auto_docstring comment above them.
Returns list of (class_name, class_line_number, has_existing_docstring, docstring_end_line)
"""
with open(filepath, "r", encoding="utf-8", newline="\n") as f:
lines = f.readlines()
# Parse AST to find class locations and their docstrings
content = "".join(lines)
try:
tree = ast.parse(content)
except SyntaxError as e:
print(f"Syntax error in {filepath}: {e}")
return []
# Build a map of class_name -> (class_line, has_docstring, docstring_end_line)
class_info = {}
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
has_docstring = False
docstring_end_line = node.lineno # default to class line
if node.body and isinstance(node.body[0], ast.Expr):
first_stmt = node.body[0]
if isinstance(first_stmt.value, ast.Constant) and isinstance(first_stmt.value.value, str):
has_docstring = True
docstring_end_line = first_stmt.end_lineno or first_stmt.lineno
class_info[node.name] = (node.lineno, has_docstring, docstring_end_line)
# Now scan for # auto_docstring comments
classes_to_update = []
for i, line in enumerate(lines):
if AUTO_DOCSTRING_PATTERN.match(line):
# Found the marker, look for class definition on next non-empty, non-comment line
j = i + 1
while j < len(lines):
next_line = lines[j].strip()
if next_line and not next_line.startswith("#"):
break
j += 1
if j < len(lines) and lines[j].strip().startswith("class "):
# Extract class name
match = re.match(r"class\s+(\w+)", lines[j].strip())
if match:
class_name = match.group(1)
if class_name in class_info:
class_line, has_docstring, docstring_end_line = class_info[class_name]
classes_to_update.append((
class_name,
class_line,
has_docstring,
docstring_end_line
))
return classes_to_update
def format_docstring(doc: str, indent: str = " ") -> str:
"""Format a doc string as a properly indented docstring."""
lines = doc.strip().split("\n")
if len(lines) == 1:
return f'{indent}"""{lines[0]}"""\n'
else:
result = [f'{indent}"""\n']
for line in lines:
if line.strip():
result.append(f"{indent}{line}\n")
else:
result.append("\n")
result.append(f'{indent}"""\n')
return "".join(result)
def process_file(filepath: str, overwrite: bool = False) -> list:
"""
Process a file and find/insert docstrings for # auto_docstring marked classes.
Returns list of classes that need updating.
"""
classes_to_update = find_auto_docstring_classes(filepath)
if not classes_to_update:
return []
if not overwrite:
# Just return the list of classes that need updating
return [(filepath, cls_name, line) for cls_name, line, _, _ in classes_to_update]
# Load the module to get doc properties
module = load_module(filepath)
with open(filepath, "r", encoding="utf-8", newline="\n") as f:
lines = f.readlines()
# Process in reverse order to maintain line numbers
updated = False
for class_name, class_line, has_docstring, docstring_end_line in reversed(classes_to_update):
doc = get_doc_from_class(module, class_name)
if doc is None:
print(f"Warning: Could not get doc for {class_name} in {filepath}")
continue
# Format the new docstring with 4-space indent
new_docstring = format_docstring(doc, " ")
if has_docstring:
# Replace existing docstring (line after class definition to docstring_end_line)
# class_line is 1-indexed, we want to replace from class_line+1 to docstring_end_line
lines = lines[:class_line] + [new_docstring] + lines[docstring_end_line:]
else:
# Insert new docstring right after class definition line
# class_line is 1-indexed, so lines[class_line-1] is the class line
# Insert at position class_line (which is right after the class line)
lines = lines[:class_line] + [new_docstring] + lines[class_line:]
updated = True
print(f"Updated docstring for {class_name} in {filepath}")
if updated:
with open(filepath, "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines)
return [(filepath, cls_name, line) for cls_name, line, _, _ in classes_to_update]
def check_auto_docstrings(path: str = None, overwrite: bool = False):
"""
Check all files for # auto_docstring markers and optionally fix them.
"""
if path is None:
path = DIFFUSERS_PATH
if os.path.isfile(path):
all_files = [path]
else:
all_files = glob.glob(os.path.join(path, "**/*.py"), recursive=True)
all_markers = []
for filepath in all_files:
markers = process_file(filepath, overwrite)
all_markers.extend(markers)
if not overwrite and len(all_markers) > 0:
message = "\n".join([f"- {f}: {cls} at line {line}" for f, cls, line in all_markers])
raise ValueError(
f"Found the following # auto_docstring markers that need docstrings:\n{message}\n\n"
f"Run `python utils/modular_auto_docstring.py --fix_and_overwrite` to fix them."
)
if overwrite and len(all_markers) > 0:
print(f"\nUpdated {len(all_markers)} docstring(s).")
elif len(all_markers) == 0:
print("No # auto_docstring markers found.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Check and fix # auto_docstring markers in modular pipeline blocks",
)
parser.add_argument(
"path",
nargs="?",
default=None,
help="File or directory to process (default: src/diffusers)"
)
parser.add_argument(
"--fix_and_overwrite",
action="store_true",
help="Whether to fix the docstrings by inserting them from doc property.",
)
args = parser.parse_args()
check_auto_docstrings(args.path, args.fix_and_overwrite)