mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
add TODO in the description for empty docstring
This commit is contained in:
@@ -708,6 +708,8 @@ def format_params(params, header="Args", indent_level=4, max_line_length=115):
|
||||
desc = re.sub(r"\[(.*?)\]\((https?://[^\s\)]+)\)", r"[\1](\2)", param.description)
|
||||
wrapped_desc = wrap_text(desc, desc_indent, max_line_length)
|
||||
param_str += f"\n{desc_indent}{wrapped_desc}"
|
||||
else:
|
||||
param_str += f"\n{desc_indent}TODO: Add description."
|
||||
|
||||
formatted_params.append(param_str)
|
||||
|
||||
|
||||
@@ -1324,7 +1324,8 @@ class QwenImageVaeEncoderStep(ModularPipelineBlocks):
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam.template(self._image_input_name) or InputParam(name=self._image_input_name, required=True),
|
||||
InputParam.template(self._image_input_name)
|
||||
or InputParam(name=self._image_input_name, required=True, description="The image tensor to encode"),
|
||||
InputParam.generator(),
|
||||
]
|
||||
|
||||
|
||||
@@ -75,11 +75,8 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
|
||||
Configs:
|
||||
|
||||
prompt_template_encode (default: <|im_start|>system
|
||||
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
|
||||
<|im_start|>user
|
||||
{}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
|
||||
objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
|
||||
|
||||
prompt_template_encode_start_idx (default: 34)
|
||||
|
||||
@@ -151,7 +148,9 @@ class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
|
||||
|
||||
Outputs:
|
||||
processed_image (`None`):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`):
|
||||
TODO: Add description.
|
||||
mask_overlay_kwargs (`Dict`):
|
||||
The kwargs for the postprocess step to apply the mask overlay
|
||||
image_latents (`Tensor`):
|
||||
@@ -195,6 +194,7 @@ class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
|
||||
|
||||
Outputs:
|
||||
processed_image (`None`):
|
||||
TODO: Add description.
|
||||
image_latents (`Tensor`):
|
||||
The latents representing the reference image(s). Single tensor or list depending on input.
|
||||
"""
|
||||
@@ -290,14 +290,19 @@ class QwenImageImg2ImgInputStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
batch_size (`int`):
|
||||
@@ -334,15 +339,21 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
batch_size (`int`):
|
||||
@@ -389,14 +400,18 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
|
||||
latents (`Tensor`):
|
||||
The initial random noised, can be generated in prepare latent step.
|
||||
image_latents (`Tensor`):
|
||||
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input step.
|
||||
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input
|
||||
step.
|
||||
timesteps (`Tensor`):
|
||||
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
|
||||
processed_mask_image (`Tensor`):
|
||||
The processed mask to use for the inpainting process.
|
||||
height (`None`):
|
||||
TODO: Add description.
|
||||
width (`None`):
|
||||
TODO: Add description.
|
||||
dtype (`None`):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
initial_noise (`Tensor`):
|
||||
@@ -425,7 +440,8 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
|
||||
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
|
||||
(timesteps, latents, rope inputs etc.).
|
||||
|
||||
Components:
|
||||
|
||||
@@ -441,9 +457,13 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
height (`int`, *optional*):
|
||||
@@ -499,7 +519,8 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
|
||||
task.
|
||||
|
||||
Components:
|
||||
|
||||
@@ -515,15 +536,21 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -579,7 +606,8 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
|
||||
task.
|
||||
|
||||
Components:
|
||||
|
||||
@@ -595,14 +623,19 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -658,7 +691,8 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
|
||||
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
|
||||
(timesteps, latents, rope inputs etc.).
|
||||
|
||||
Components:
|
||||
|
||||
@@ -676,10 +710,15 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
control_image_latents (`None`):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
@@ -746,7 +785,8 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
|
||||
task.
|
||||
|
||||
Components:
|
||||
|
||||
@@ -764,16 +804,23 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
control_image_latents (`None`):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -840,7 +887,8 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
|
||||
task.
|
||||
|
||||
Components:
|
||||
|
||||
@@ -858,15 +906,21 @@ class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
control_image_latents (`None`):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -1031,7 +1085,8 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image.
|
||||
Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask
|
||||
overally to the original image.
|
||||
|
||||
Components:
|
||||
|
||||
@@ -1045,6 +1100,7 @@ class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt''.
|
||||
mask_overlay_kwargs (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
@@ -1126,11 +1182,8 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
|
||||
Configs:
|
||||
|
||||
prompt_template_encode (default: <|im_start|>system
|
||||
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
|
||||
<|im_start|>user
|
||||
{}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
|
||||
objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
|
||||
|
||||
prompt_template_encode_start_idx (default: 34)
|
||||
|
||||
@@ -1160,9 +1213,13 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`):
|
||||
Pre-generated noisy latents for image generation.
|
||||
num_inference_steps (`int`):
|
||||
@@ -1174,10 +1231,13 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
|
||||
**denoiser_input_fields (`Tensor`, *optional*):
|
||||
conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
strength (`float`, *optional*, defaults to 0.9):
|
||||
Strength for img2img/inpainting.
|
||||
control_image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
control_guidance_start (`float`, *optional*, defaults to 0.0):
|
||||
When to start applying ControlNet.
|
||||
control_guidance_end (`float`, *optional*, defaults to 1.0):
|
||||
@@ -1187,6 +1247,7 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt''.
|
||||
mask_overlay_kwargs (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
|
||||
@@ -74,11 +74,10 @@ class QwenImageEditVLEncoderStep(SequentialPipelineBlocks):
|
||||
Configs:
|
||||
|
||||
prompt_template_encode (default: <|im_start|>system
|
||||
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
|
||||
<|im_start|>user
|
||||
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
|
||||
the user's text instruction should alter or modify the image. Generate a new image that meets the user's
|
||||
requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
|
||||
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|> <|im_start|>assistant )
|
||||
|
||||
prompt_template_encode_start_idx (default: 64)
|
||||
|
||||
@@ -144,6 +143,7 @@ class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks):
|
||||
resized_image (`List`):
|
||||
The resized images
|
||||
processed_image (`None`):
|
||||
TODO: Add description.
|
||||
image_latents (`Tensor`):
|
||||
The latents representing the reference image(s). Single tensor or list depending on input.
|
||||
"""
|
||||
@@ -192,7 +192,9 @@ class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks):
|
||||
resized_image (`List`):
|
||||
The resized images
|
||||
processed_image (`None`):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`):
|
||||
TODO: Add description.
|
||||
mask_overlay_kwargs (`Dict`):
|
||||
The kwargs for the postprocess step to apply the mask overlay
|
||||
image_latents (`Tensor`):
|
||||
@@ -255,14 +257,19 @@ class QwenImageEditInputStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
batch_size (`int`):
|
||||
@@ -306,15 +313,21 @@ class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
batch_size (`int`):
|
||||
@@ -363,14 +376,18 @@ class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
|
||||
latents (`Tensor`):
|
||||
The initial random noised, can be generated in prepare latent step.
|
||||
image_latents (`Tensor`):
|
||||
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input step.
|
||||
The image latents to use for the denoising process. Can be generated in vae encoder and packed in input
|
||||
step.
|
||||
timesteps (`Tensor`):
|
||||
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
|
||||
processed_mask_image (`Tensor`):
|
||||
The processed mask to use for the inpainting process.
|
||||
height (`None`):
|
||||
TODO: Add description.
|
||||
width (`None`):
|
||||
TODO: Add description.
|
||||
dtype (`None`):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
initial_noise (`Tensor`):
|
||||
@@ -412,14 +429,19 @@ class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -487,15 +509,21 @@ class QwenImageEditInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -622,7 +650,8 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask overlay to the original image.
|
||||
Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask
|
||||
overlay to the original image.
|
||||
|
||||
Components:
|
||||
|
||||
@@ -636,6 +665,7 @@ class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt''.
|
||||
mask_overlay_kwargs (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
@@ -692,7 +722,8 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
|
||||
"""
|
||||
Auto Modular pipeline for edit (img2img) and edit inpaint tasks using QwenImage-Edit.
|
||||
- for edit (img2img) generation, you need to provide `image`
|
||||
- for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop`
|
||||
- for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide
|
||||
`padding_mask_crop`
|
||||
|
||||
Components:
|
||||
|
||||
@@ -719,11 +750,10 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
|
||||
Configs:
|
||||
|
||||
prompt_template_encode (default: <|im_start|>system
|
||||
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
|
||||
<|im_start|>user
|
||||
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
|
||||
the user's text instruction should alter or modify the image. Generate a new image that meets the user's
|
||||
requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
|
||||
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|> <|im_start|>assistant )
|
||||
|
||||
prompt_template_encode_start_idx (default: 64)
|
||||
|
||||
@@ -747,7 +777,9 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
|
||||
width (`int`):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`):
|
||||
TODO: Add description.
|
||||
processed_mask_image (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`):
|
||||
Pre-generated noisy latents for image generation.
|
||||
num_inference_steps (`int`):
|
||||
@@ -763,6 +795,7 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt''.
|
||||
mask_overlay_kwargs (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
|
||||
@@ -67,11 +67,10 @@ class QwenImageEditPlusVLEncoderStep(SequentialPipelineBlocks):
|
||||
Configs:
|
||||
|
||||
prompt_template_encode (default: <|im_start|>system
|
||||
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
|
||||
<|im_start|>user
|
||||
{}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
|
||||
the user's text instruction should alter or modify the image. Generate a new image that meets the user's
|
||||
requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
|
||||
{}<|im_end|> <|im_start|>assistant )
|
||||
|
||||
img_template_encode (default: Picture {}: <|vision_start|><|image_pad|><|vision_end|>)
|
||||
|
||||
@@ -139,6 +138,7 @@ class QwenImageEditPlusVaeEncoderStep(SequentialPipelineBlocks):
|
||||
resized_image (`List`):
|
||||
The resized images
|
||||
processed_image (`None`):
|
||||
TODO: Add description.
|
||||
image_latents (`Tensor`):
|
||||
The latents representing the reference image(s). Single tensor or list depending on input.
|
||||
"""
|
||||
@@ -182,14 +182,19 @@ class QwenImageEditPlusInputStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
batch_size (`int`):
|
||||
@@ -240,14 +245,19 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
The width in pixels of the generated image.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -376,11 +386,10 @@ class QwenImageEditPlusAutoBlocks(SequentialPipelineBlocks):
|
||||
Configs:
|
||||
|
||||
prompt_template_encode (default: <|im_start|>system
|
||||
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
|
||||
<|im_start|>user
|
||||
{}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how
|
||||
the user's text instruction should alter or modify the image. Generate a new image that meets the user's
|
||||
requirements while maintaining consistency with the original input where appropriate.<|im_end|> <|im_start|>user
|
||||
{}<|im_end|> <|im_start|>assistant )
|
||||
|
||||
img_template_encode (default: Picture {}: <|vision_start|><|image_pad|><|vision_end|>)
|
||||
|
||||
|
||||
@@ -53,7 +53,8 @@ logger = logging.get_logger(__name__)
|
||||
# auto_docstring
|
||||
class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not provided.
|
||||
QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not
|
||||
provided.
|
||||
|
||||
Components:
|
||||
|
||||
@@ -70,28 +71,23 @@ class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
|
||||
Configs:
|
||||
|
||||
image_caption_prompt_en (default: <|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
<|im_start|>user
|
||||
# Image Annotator
|
||||
You are a professional image annotator. Please write an image caption based on the input image:
|
||||
You are a helpful assistant.<|im_end|> <|im_start|>user # Image Annotator You are a professional image annotator.
|
||||
Please write an image caption based on the input image:
|
||||
1. Write the caption using natural, descriptive language without structured formats or rich text.
|
||||
2. Enrich caption details by including:
|
||||
- Object attributes, such as quantity, color, shape, size, material, state, position, actions, and so on
|
||||
- Vision Relations between objects, such as spatial relations, functional relations, possessive relations, attachment relations, action relations, comparative relations, causal relations, and so on
|
||||
- Vision Relations between objects, such as spatial relations, functional relations, possessive relations,
|
||||
attachment relations, action relations, comparative relations, causal relations, and so on
|
||||
- Environmental details, such as weather, lighting, colors, textures, atmosphere, and so on
|
||||
- Identify the text clearly visible in the image, without translation or explanation, and highlight it in the caption with quotation marks
|
||||
- Identify the text clearly visible in the image, without translation or explanation, and highlight it in the
|
||||
caption with quotation marks
|
||||
3. Maintain authenticity and accuracy:
|
||||
- Avoid generalizations
|
||||
- Describe all visible information in the image, while do not add information not explicitly shown in the image
|
||||
<|vision_start|><|image_pad|><|vision_end|><|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
<|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
|
||||
|
||||
image_caption_prompt_cn (default: <|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
<|im_start|>user
|
||||
# 图像标注器
|
||||
你是一个专业的图像标注器。请基于输入图像,撰写图注:
|
||||
You are a helpful assistant.<|im_end|> <|im_start|>user # 图像标注器 你是一个专业的图像标注器。请基于输入图像,撰写图注:
|
||||
1. 使用自然、描述性的语言撰写图注,不要使用结构化形式或富文本形式。
|
||||
2. 通过加入以下内容,丰富图注细节:
|
||||
- 对象的属性:如数量、颜色、形状、大小、位置、材质、状态、动作等
|
||||
@@ -101,16 +97,11 @@ class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
|
||||
3. 保持真实性与准确性:
|
||||
- 不要使用笼统的描述
|
||||
- 描述图像中所有可见的信息,但不要加入没有在图像中出现的内容
|
||||
<|vision_start|><|image_pad|><|vision_end|><|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
<|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
|
||||
|
||||
prompt_template_encode (default: <|im_start|>system
|
||||
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
|
||||
<|im_start|>user
|
||||
{}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
|
||||
objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
|
||||
|
||||
prompt_template_encode_start_idx (default: 34)
|
||||
|
||||
@@ -187,6 +178,7 @@ class QwenImageLayeredVaeEncoderStep(SequentialPipelineBlocks):
|
||||
resized_image (`List`):
|
||||
The resized images
|
||||
processed_image (`None`):
|
||||
TODO: Add description.
|
||||
image_latents (`Tensor`):
|
||||
The latents representing the reference image(s). Single tensor or list depending on input.
|
||||
"""
|
||||
@@ -226,10 +218,15 @@ class QwenImageLayeredInputStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
|
||||
Outputs:
|
||||
batch_size (`int`):
|
||||
@@ -282,10 +279,15 @@ class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
prompt_embeds (`None`):
|
||||
TODO: Add description.
|
||||
prompt_embeds_mask (`None`):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
negative_prompt_embeds_mask (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
image_latents (`None`, *optional*):
|
||||
TODO: Add description.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
layers (`int`, *optional*, defaults to 4):
|
||||
@@ -379,28 +381,23 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
|
||||
Configs:
|
||||
|
||||
image_caption_prompt_en (default: <|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
<|im_start|>user
|
||||
# Image Annotator
|
||||
You are a professional image annotator. Please write an image caption based on the input image:
|
||||
You are a helpful assistant.<|im_end|> <|im_start|>user # Image Annotator You are a professional image annotator.
|
||||
Please write an image caption based on the input image:
|
||||
1. Write the caption using natural, descriptive language without structured formats or rich text.
|
||||
2. Enrich caption details by including:
|
||||
- Object attributes, such as quantity, color, shape, size, material, state, position, actions, and so on
|
||||
- Vision Relations between objects, such as spatial relations, functional relations, possessive relations, attachment relations, action relations, comparative relations, causal relations, and so on
|
||||
- Vision Relations between objects, such as spatial relations, functional relations, possessive relations,
|
||||
attachment relations, action relations, comparative relations, causal relations, and so on
|
||||
- Environmental details, such as weather, lighting, colors, textures, atmosphere, and so on
|
||||
- Identify the text clearly visible in the image, without translation or explanation, and highlight it in the caption with quotation marks
|
||||
- Identify the text clearly visible in the image, without translation or explanation, and highlight it in the
|
||||
caption with quotation marks
|
||||
3. Maintain authenticity and accuracy:
|
||||
- Avoid generalizations
|
||||
- Describe all visible information in the image, while do not add information not explicitly shown in the image
|
||||
<|vision_start|><|image_pad|><|vision_end|><|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
<|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
|
||||
|
||||
image_caption_prompt_cn (default: <|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
<|im_start|>user
|
||||
# 图像标注器
|
||||
你是一个专业的图像标注器。请基于输入图像,撰写图注:
|
||||
You are a helpful assistant.<|im_end|> <|im_start|>user # 图像标注器 你是一个专业的图像标注器。请基于输入图像,撰写图注:
|
||||
1. 使用自然、描述性的语言撰写图注,不要使用结构化形式或富文本形式。
|
||||
2. 通过加入以下内容,丰富图注细节:
|
||||
- 对象的属性:如数量、颜色、形状、大小、位置、材质、状态、动作等
|
||||
@@ -410,16 +407,11 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
|
||||
3. 保持真实性与准确性:
|
||||
- 不要使用笼统的描述
|
||||
- 描述图像中所有可见的信息,但不要加入没有在图像中出现的内容
|
||||
<|vision_start|><|image_pad|><|vision_end|><|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
<|vision_start|><|image_pad|><|vision_end|><|im_end|> <|im_start|>assistant )
|
||||
|
||||
prompt_template_encode (default: <|im_start|>system
|
||||
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
|
||||
<|im_start|>user
|
||||
{}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
)
|
||||
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the
|
||||
objects and background:<|im_end|> <|im_start|>user {}<|im_end|> <|im_start|>assistant )
|
||||
|
||||
prompt_template_encode_start_idx (default: 34)
|
||||
|
||||
|
||||
@@ -297,4 +297,4 @@ if __name__ == "__main__":
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
check_auto_docstrings(args.path, args.fix_and_overwrite)
|
||||
check_auto_docstrings(args.path, args.fix_and_overwrite)
|
||||
|
||||
Reference in New Issue
Block a user