1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

add TODO in the description for empty docstring

This commit is contained in:
yiyixuxu
2026-01-17 09:57:56 +01:00
parent aea0d046f6
commit 25c968a38f
7 changed files with 184 additions and 86 deletions

View File

@@ -708,6 +708,8 @@ def format_params(params, header="Args", indent_level=4, max_line_length=115):
desc = re.sub(r"\[(.*?)\]\((https?://[^\s\)]+)\)", r"[\1](\2)", param.description)
wrapped_desc = wrap_text(desc, desc_indent, max_line_length)
param_str += f"\n{desc_indent}{wrapped_desc}"
else:
param_str += f"\n{desc_indent}TODO: Add description."
formatted_params.append(param_str)

View File

@@ -1324,7 +1324,8 @@ class QwenImageVaeEncoderStep(ModularPipelineBlocks):
@property
def inputs(self) -> List[InputParam]:
return [
InputParam.template(self._image_input_name) or InputParam(name=self._image_input_name, required=True),
InputParam.template(self._image_input_name)
or InputParam(name=self._image_input_name, required=True, description="The image tensor to encode"),
InputParam.generator(),
]

View File

@@ -75,11 +75,8 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
Configs:
prompt_template_encode (default: <|im_start|>system
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
)
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
prompt_template_encode_start_idx (default: 34)
@@ -151,7 +148,9 @@ class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
Outputs:
processed_image (`None`):
TODO: Add description.
processed_mask_image (`None`):
TODO: Add description.
mask_overlay_kwargs (`Dict`):
The kwargs for the postprocess step to apply the mask overlay
image_latents (`Tensor`):
@@ -195,6 +194,7 @@ class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
Outputs:
processed_image (`None`):
TODO: Add description.
image_latents (`Tensor`):
The latents representing the reference image(s). Single tensor or list depending on input.
"""
@@ -290,14 +290,19 @@ class QwenImageImg2ImgInputStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
Outputs:
batch_size (`int`):
@@ -334,15 +339,21 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
processed_mask_image (`None`, *optional*):
TODO: Add description.
Outputs:
batch_size (`int`):
@@ -389,14 +400,18 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
latents (`Tensor`):
The initial random noised, can be generated in prepare latent step.
image_latents (`Tensor`):
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input step.
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input
step.
timesteps (`Tensor`):
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
processed_mask_image (`Tensor`):
The processed mask to use for the inpainting process.
height (`None`):
TODO: Add description.
width (`None`):
TODO: Add description.
dtype (`None`):
TODO: Add description.
Outputs:
initial_noise (`Tensor`):
@@ -425,7 +440,8 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
# auto_docstring
class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
"""
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
(timesteps, latents, rope inputs etc.).
Components:
@@ -441,9 +457,13 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
height (`int`, *optional*):
@@ -499,7 +519,8 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
# auto_docstring
class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
"""
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
task.
Components:
@@ -515,15 +536,21 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
processed_mask_image (`None`, *optional*):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
@@ -579,7 +606,8 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
# auto_docstring
class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
"""
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
task.
Components:
@@ -595,14 +623,19 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
@@ -658,7 +691,8 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
# auto_docstring
class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
"""
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
(timesteps, latents, rope inputs etc.).
Components:
@@ -676,10 +710,15 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
control_image_latents (`None`):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
@@ -746,7 +785,8 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
# auto_docstring
class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
"""
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
task.
Components:
@@ -764,16 +804,23 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
processed_mask_image (`None`, *optional*):
TODO: Add description.
control_image_latents (`None`):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
@@ -840,7 +887,8 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
# auto_docstring
class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
"""
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
task.
Components:
@@ -858,15 +906,21 @@ class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
control_image_latents (`None`):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
@@ -1031,7 +1085,8 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
# auto_docstring
class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
"""
Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image.
Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask
overally to the original image.
Components:
@@ -1045,6 +1100,7 @@ class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
output_type (`str`, *optional*, defaults to pil):
Output format: 'pil', 'np', 'pt''.
mask_overlay_kwargs (`None`, *optional*):
TODO: Add description.
Outputs:
images (`List`):
@@ -1126,11 +1182,8 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
Configs:
prompt_template_encode (default: <|im_start|>system
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
)
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
prompt_template_encode_start_idx (default: 34)
@@ -1160,9 +1213,13 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
latents (`Tensor`):
Pre-generated noisy latents for image generation.
num_inference_steps (`int`):
@@ -1174,10 +1231,13 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
**denoiser_input_fields (`Tensor`, *optional*):
conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
image_latents (`None`, *optional*):
TODO: Add description.
processed_mask_image (`None`, *optional*):
TODO: Add description.
strength (`float`, *optional*, defaults to 0.9):
Strength for img2img/inpainting.
control_image_latents (`None`, *optional*):
TODO: Add description.
control_guidance_start (`float`, *optional*, defaults to 0.0):
When to start applying ControlNet.
control_guidance_end (`float`, *optional*, defaults to 1.0):
@@ -1187,6 +1247,7 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
output_type (`str`, *optional*, defaults to pil):
Output format: 'pil', 'np', 'pt''.
mask_overlay_kwargs (`None`, *optional*):
TODO: Add description.
Outputs:
images (`List`):

View File

@@ -74,11 +74,10 @@ class QwenImageEditVLEncoderStep(SequentialPipelineBlocks):
Configs:
prompt_template_encode (default: <|im_start|>system
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
<|im_start|>user
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>
<|im_start|>assistant
)
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
the user's text instruction should alter or modify the image. Generate a new image that meets the user's
requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|> <|im_start|>assistant )
prompt_template_encode_start_idx (default: 64)
@@ -144,6 +143,7 @@ class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks):
resized_image (`List`):
The resized images
processed_image (`None`):
TODO: Add description.
image_latents (`Tensor`):
The latents representing the reference image(s). Single tensor or list depending on input.
"""
@@ -192,7 +192,9 @@ class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks):
resized_image (`List`):
The resized images
processed_image (`None`):
TODO: Add description.
processed_mask_image (`None`):
TODO: Add description.
mask_overlay_kwargs (`Dict`):
The kwargs for the postprocess step to apply the mask overlay
image_latents (`Tensor`):
@@ -255,14 +257,19 @@ class QwenImageEditInputStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
Outputs:
batch_size (`int`):
@@ -306,15 +313,21 @@ class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
processed_mask_image (`None`, *optional*):
TODO: Add description.
Outputs:
batch_size (`int`):
@@ -363,14 +376,18 @@ class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
latents (`Tensor`):
The initial random noised, can be generated in prepare latent step.
image_latents (`Tensor`):
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input step.
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input
step.
timesteps (`Tensor`):
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
processed_mask_image (`Tensor`):
The processed mask to use for the inpainting process.
height (`None`):
TODO: Add description.
width (`None`):
TODO: Add description.
dtype (`None`):
TODO: Add description.
Outputs:
initial_noise (`Tensor`):
@@ -412,14 +429,19 @@ class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
@@ -487,15 +509,21 @@ class QwenImageEditInpaintCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
processed_mask_image (`None`, *optional*):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
@@ -622,7 +650,8 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
# auto_docstring
class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
"""
Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask overlay to the original image.
Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask
overlay to the original image.
Components:
@@ -636,6 +665,7 @@ class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
output_type (`str`, *optional*, defaults to pil):
Output format: 'pil', 'np', 'pt''.
mask_overlay_kwargs (`None`, *optional*):
TODO: Add description.
Outputs:
images (`List`):
@@ -692,7 +722,8 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
"""
Auto Modular pipeline for edit (img2img) and edit inpaint tasks using QwenImage-Edit.
- for edit (img2img) generation, you need to provide `image`
- for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop`
- for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide
`padding_mask_crop`
Components:
@@ -719,11 +750,10 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
Configs:
prompt_template_encode (default: <|im_start|>system
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
<|im_start|>user
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>
<|im_start|>assistant
)
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
the user's text instruction should alter or modify the image. Generate a new image that meets the user's
requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|> <|im_start|>assistant )
prompt_template_encode_start_idx (default: 64)
@@ -747,7 +777,9 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
width (`int`):
The width in pixels of the generated image.
image_latents (`None`):
TODO: Add description.
processed_mask_image (`None`, *optional*):
TODO: Add description.
latents (`Tensor`):
Pre-generated noisy latents for image generation.
num_inference_steps (`int`):
@@ -763,6 +795,7 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
output_type (`str`, *optional*, defaults to pil):
Output format: 'pil', 'np', 'pt''.
mask_overlay_kwargs (`None`, *optional*):
TODO: Add description.
Outputs:
images (`List`):

View File

@@ -67,11 +67,10 @@ class QwenImageEditPlusVLEncoderStep(SequentialPipelineBlocks):
Configs:
prompt_template_encode (default: <|im_start|>system
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
)
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
the user's text instruction should alter or modify the image. Generate a new image that meets the user's
requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
{}<|im_end|> <|im_start|>assistant )
img_template_encode (default: Picture {}: <|vision_start|><|image_pad|><|vision_end|>)
@@ -139,6 +138,7 @@ class QwenImageEditPlusVaeEncoderStep(SequentialPipelineBlocks):
resized_image (`List`):
The resized images
processed_image (`None`):
TODO: Add description.
image_latents (`Tensor`):
The latents representing the reference image(s). Single tensor or list depending on input.
"""
@@ -182,14 +182,19 @@ class QwenImageEditPlusInputStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
Outputs:
batch_size (`int`):
@@ -240,14 +245,19 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
height (`int`, *optional*):
The height in pixels of the generated image.
width (`int`, *optional*):
The width in pixels of the generated image.
image_latents (`None`, *optional*):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
generator (`Generator`, *optional*):
@@ -376,11 +386,10 @@ class QwenImageEditPlusAutoBlocks(SequentialPipelineBlocks):
Configs:
prompt_template_encode (default: <|im_start|>system
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
)
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
the user's text instruction should alter or modify the image. Generate a new image that meets the user's
requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
{}<|im_end|> <|im_start|>assistant )
img_template_encode (default: Picture {}: <|vision_start|><|image_pad|><|vision_end|>)

View File

@@ -53,7 +53,8 @@ logger = logging.get_logger(__name__)
# auto_docstring
class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
"""
QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not provided.
QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not
provided.
Components:
@@ -70,28 +71,23 @@ class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
Configs:
image_caption_prompt_en (default: <|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
# Image Annotator
You are a professional image annotator. Please write an image caption based on the input image:
You are a helpful assistant.<|im_end|> <|im_start|>user # Image Annotator You are a professional image annotator.
Please write an image caption based on the input image:
1. Write the caption using natural, descriptive language without structured formats or rich text.
2. Enrich caption details by including:
- Object attributes, such as quantity, color, shape, size, material, state, position, actions, and so on
- Vision Relations between objects, such as spatial relations, functional relations, possessive relations, attachment relations, action relations, comparative relations, causal relations, and so on
- Vision Relations between objects, such as spatial relations, functional relations, possessive relations,
attachment relations, action relations, comparative relations, causal relations, and so on
- Environmental details, such as weather, lighting, colors, textures, atmosphere, and so on
- Identify the text clearly visible in the image, without translation or explanation, and highlight it in the caption with quotation marks
- Identify the text clearly visible in the image, without translation or explanation, and highlight it in the
caption with quotation marks
3. Maintain authenticity and accuracy:
- Avoid generalizations
- Describe all visible information in the image, while do not add information not explicitly shown in the image
<|vision_start|><|image_pad|><|vision_end|><|im_end|>
<|im_start|>assistant
)
<|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
image_caption_prompt_cn (default: <|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
# 图像标注器
你是一个专业的图像标注器。请基于输入图像,撰写图注:
You are a helpful assistant.<|im_end|> <|im_start|>user # 图像标注器 你是一个专业的图像标注器。请基于输入图像,撰写图注:
1. 使用自然、描述性的语言撰写图注,不要使用结构化形式或富文本形式。
2. 通过加入以下内容,丰富图注细节:
- 对象的属性:如数量、颜色、形状、大小、位置、材质、状态、动作等
@@ -101,16 +97,11 @@ class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
3. 保持真实性与准确性:
- 不要使用笼统的描述
- 描述图像中所有可见的信息,但不要加入没有在图像中出现的内容
<|vision_start|><|image_pad|><|vision_end|><|im_end|>
<|im_start|>assistant
)
<|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
prompt_template_encode (default: <|im_start|>system
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
)
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
prompt_template_encode_start_idx (default: 34)
@@ -187,6 +178,7 @@ class QwenImageLayeredVaeEncoderStep(SequentialPipelineBlocks):
resized_image (`List`):
The resized images
processed_image (`None`):
TODO: Add description.
image_latents (`Tensor`):
The latents representing the reference image(s). Single tensor or list depending on input.
"""
@@ -226,10 +218,15 @@ class QwenImageLayeredInputStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
image_latents (`None`, *optional*):
TODO: Add description.
Outputs:
batch_size (`int`):
@@ -282,10 +279,15 @@ class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
num_images_per_prompt (`int`, *optional*, defaults to 1):
The number of images to generate per prompt.
prompt_embeds (`None`):
TODO: Add description.
prompt_embeds_mask (`None`):
TODO: Add description.
negative_prompt_embeds (`None`, *optional*):
TODO: Add description.
negative_prompt_embeds_mask (`None`, *optional*):
TODO: Add description.
image_latents (`None`, *optional*):
TODO: Add description.
latents (`Tensor`, *optional*):
Pre-generated noisy latents for image generation.
layers (`int`, *optional*, defaults to 4):
@@ -379,28 +381,23 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
Configs:
image_caption_prompt_en (default: <|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
# Image Annotator
You are a professional image annotator. Please write an image caption based on the input image:
You are a helpful assistant.<|im_end|> <|im_start|>user # Image Annotator You are a professional image annotator.
Please write an image caption based on the input image:
1. Write the caption using natural, descriptive language without structured formats or rich text.
2. Enrich caption details by including:
- Object attributes, such as quantity, color, shape, size, material, state, position, actions, and so on
- Vision Relations between objects, such as spatial relations, functional relations, possessive relations, attachment relations, action relations, comparative relations, causal relations, and so on
- Vision Relations between objects, such as spatial relations, functional relations, possessive relations,
attachment relations, action relations, comparative relations, causal relations, and so on
- Environmental details, such as weather, lighting, colors, textures, atmosphere, and so on
- Identify the text clearly visible in the image, without translation or explanation, and highlight it in the caption with quotation marks
- Identify the text clearly visible in the image, without translation or explanation, and highlight it in the
caption with quotation marks
3. Maintain authenticity and accuracy:
- Avoid generalizations
- Describe all visible information in the image, while do not add information not explicitly shown in the image
<|vision_start|><|image_pad|><|vision_end|><|im_end|>
<|im_start|>assistant
)
<|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
image_caption_prompt_cn (default: <|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
# 图像标注器
你是一个专业的图像标注器。请基于输入图像,撰写图注:
You are a helpful assistant.<|im_end|> <|im_start|>user # 图像标注器 你是一个专业的图像标注器。请基于输入图像,撰写图注:
1. 使用自然、描述性的语言撰写图注,不要使用结构化形式或富文本形式。
2. 通过加入以下内容,丰富图注细节:
- 对象的属性:如数量、颜色、形状、大小、位置、材质、状态、动作等
@@ -410,16 +407,11 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
3. 保持真实性与准确性:
- 不要使用笼统的描述
- 描述图像中所有可见的信息,但不要加入没有在图像中出现的内容
<|vision_start|><|image_pad|><|vision_end|><|im_end|>
<|im_start|>assistant
)
<|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
prompt_template_encode (default: <|im_start|>system
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
)
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
prompt_template_encode_start_idx (default: 34)

View File

@@ -297,4 +297,4 @@ if __name__ == "__main__":
args = parser.parse_args()
check_auto_docstrings(args.path, args.fix_and_overwrite)
check_auto_docstrings(args.path, args.fix_and_overwrite)