mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
make style
This commit is contained in:
@@ -438,7 +438,7 @@ INPUT_PARAM_TEMPLATES = {
|
||||
"description": "Number of layers to extract from the image",
|
||||
},
|
||||
# common intermediate inputs
|
||||
"prompt_embeds":{
|
||||
"prompt_embeds": {
|
||||
"type_hint": torch.Tensor,
|
||||
"required": True,
|
||||
"description": "text embeddings used to guide the image generation. Can be generated from text_encoder step.",
|
||||
@@ -531,16 +531,16 @@ class InputParam:
|
||||
raise ValueError(f"InputParam template for {template_name} not found")
|
||||
|
||||
template_kwargs = INPUT_PARAM_TEMPLATES[template_name].copy()
|
||||
|
||||
|
||||
# Determine the actual param name:
|
||||
# 1. From overrides if provided
|
||||
# 2. From template if present
|
||||
# 3. Fall back to template_name
|
||||
name = overrides.pop("name", template_kwargs.pop("name", template_name))
|
||||
|
||||
|
||||
if note and "description" in template_kwargs:
|
||||
template_kwargs["description"] = f"{template_kwargs['description']} ({note})"
|
||||
|
||||
|
||||
template_kwargs.update(overrides)
|
||||
return cls(name=name, **template_kwargs)
|
||||
|
||||
@@ -564,18 +564,18 @@ class OutputParam:
|
||||
"""Get template for name if exists, otherwise raise ValueError."""
|
||||
if template_name not in OUTPUT_PARAM_TEMPLATES:
|
||||
raise ValueError(f"OutputParam template for {template_name} not found")
|
||||
|
||||
|
||||
template_kwargs = OUTPUT_PARAM_TEMPLATES[template_name].copy()
|
||||
|
||||
|
||||
# Determine the actual param name:
|
||||
# 1. From overrides if provided
|
||||
# 2. From template if present
|
||||
# 3. Fall back to template_name
|
||||
name = overrides.pop("name", template_kwargs.pop("name", template_name))
|
||||
|
||||
|
||||
if note and "description" in template_kwargs:
|
||||
template_kwargs["description"] = f"{template_kwargs['description']} ({note})"
|
||||
|
||||
|
||||
template_kwargs.update(overrides)
|
||||
return cls(name=name, **template_kwargs)
|
||||
|
||||
@@ -913,4 +913,4 @@ def make_doc_string(
|
||||
output += "\n\n"
|
||||
output += format_output_params(outputs, indent_level=2)
|
||||
|
||||
return output
|
||||
return output
|
||||
|
||||
@@ -117,6 +117,7 @@ def get_timesteps(scheduler, num_inference_steps, strength):
|
||||
# 1. PREPARE LATENTS
|
||||
# ====================
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
|
||||
"""
|
||||
@@ -137,8 +138,8 @@ class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
|
||||
generator (`Generator`, *optional*):
|
||||
Torch generator for deterministic generation.
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
dtype (`dtype`, *optional*, defaults to torch.float32):
|
||||
The dtype of the model inputs, can be generated in input step.
|
||||
|
||||
@@ -150,6 +151,7 @@ class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
|
||||
latents (`Tensor`):
|
||||
The initial latents to use for the denoising process
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -254,8 +256,8 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
|
||||
generator (`Generator`, *optional*):
|
||||
Torch generator for deterministic generation.
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
dtype (`dtype`, *optional*, defaults to torch.float32):
|
||||
The dtype of the model inputs, can be generated in input step.
|
||||
|
||||
@@ -267,6 +269,7 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
|
||||
latents (`Tensor`):
|
||||
The initial latents to use for the denoising process
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
|
||||
@property
|
||||
@@ -353,7 +356,8 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Step that adds noise to image latents for image-to-image/inpainting. Should be run after set_timesteps, prepare_latents. Both noise and image latents should alreadybe patchified.
|
||||
Step that adds noise to image latents for image-to-image/inpainting. Should be run after set_timesteps,
|
||||
prepare_latents. Both noise and image latents should alreadybe patchified.
|
||||
|
||||
Components:
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
@@ -362,8 +366,8 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
|
||||
latents (`Tensor`):
|
||||
The initial random noised, can be generated in prepare latent step.
|
||||
image_latents (`Tensor`):
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
|
||||
vae encoder and updated in input step.)
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
|
||||
generated from vae encoder and updated in input step.)
|
||||
timesteps (`Tensor`):
|
||||
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
|
||||
|
||||
@@ -373,6 +377,7 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
|
||||
latents (`Tensor`):
|
||||
The scaled noisy latents to use for inpainting/image-to-image denoising.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -396,10 +401,10 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
|
||||
),
|
||||
InputParam.template("image_latents", note="Can be generated from vae encoder and updated in input step."),
|
||||
InputParam(
|
||||
name="timesteps",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
|
||||
name="timesteps",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -475,6 +480,7 @@ class QwenImageCreateMaskLatentsStep(ModularPipelineBlocks):
|
||||
mask (`Tensor`):
|
||||
The mask to use for the inpainting process.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -541,10 +547,12 @@ class QwenImageCreateMaskLatentsStep(ModularPipelineBlocks):
|
||||
# 2. SET TIMESTEPS
|
||||
# ====================
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageSetTimestepsStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Step that sets the the scheduler's timesteps for text-to-image generation. Should be run after prepare latents step.
|
||||
Step that sets the the scheduler's timesteps for text-to-image generation. Should be run after prepare latents
|
||||
step.
|
||||
|
||||
Components:
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
@@ -561,6 +569,7 @@ class QwenImageSetTimestepsStep(ModularPipelineBlocks):
|
||||
timesteps (`Tensor`):
|
||||
The timesteps to use for the denoising process
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -579,10 +588,10 @@ class QwenImageSetTimestepsStep(ModularPipelineBlocks):
|
||||
InputParam.template("num_inference_steps"),
|
||||
InputParam.template("sigmas"),
|
||||
InputParam(
|
||||
name="latents",
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The initial random noised latents for the denoising process. Can be generated in prepare latents step."
|
||||
description="The initial random noised latents for the denoising process. Can be generated in prepare latents step.",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -640,13 +649,14 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
|
||||
sigmas (`List`, *optional*):
|
||||
Custom sigmas for the denoising process.
|
||||
image_latents (`Tensor`):
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
|
||||
vae encoder and packed in input step.)
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
|
||||
generated from vae encoder and packed in input step.)
|
||||
|
||||
Outputs:
|
||||
timesteps (`Tensor`):
|
||||
The timesteps to use for the denoising process.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
|
||||
@property
|
||||
@@ -671,9 +681,7 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(
|
||||
name="timesteps",
|
||||
type_hint=torch.Tensor,
|
||||
description="The timesteps to use for the denoising process."
|
||||
name="timesteps", type_hint=torch.Tensor, description="The timesteps to use for the denoising process."
|
||||
),
|
||||
]
|
||||
|
||||
@@ -711,7 +719,8 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Step that sets the the scheduler's timesteps for image-to-image generation, and inpainting. Should be run after prepare latents step.
|
||||
Step that sets the the scheduler's timesteps for image-to-image generation, and inpainting. Should be run after
|
||||
prepare latents step.
|
||||
|
||||
Components:
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
@@ -732,6 +741,7 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
|
||||
num_inference_steps (`int`):
|
||||
The number of denoising steps to perform at inference time. Updated based on strength.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -750,10 +760,10 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
|
||||
InputParam.template("num_inference_steps"),
|
||||
InputParam.template("sigmas"),
|
||||
InputParam(
|
||||
"latents",
|
||||
required=True,
|
||||
"latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The latents to use for the denoising process. Can be generated in prepare latents step."
|
||||
description="The latents to use for the denoising process. Can be generated in prepare latents step.",
|
||||
),
|
||||
InputParam.template("strength", default=0.9),
|
||||
]
|
||||
@@ -815,6 +825,7 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
|
||||
|
||||
## RoPE inputs for denoiser
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageRoPEInputsStep(ModularPipelineBlocks):
|
||||
"""
|
||||
@@ -822,8 +833,8 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
|
||||
|
||||
Inputs:
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
height (`int`):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`):
|
||||
@@ -841,6 +852,7 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
|
||||
negative_txt_seq_lens (`List`):
|
||||
The sequence lengths of the negative prompt embeds, used for RoPE calculation
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -911,12 +923,13 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be placed after prepare_latents step
|
||||
Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be placed after
|
||||
prepare_latents step
|
||||
|
||||
Inputs:
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
image_height (`int`):
|
||||
The height of the reference image. Can be generated in input step.
|
||||
image_width (`int`):
|
||||
@@ -938,6 +951,7 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
|
||||
negative_txt_seq_lens (`List`):
|
||||
The sequence lengths of the negative prompt embeds, used for RoPE calculation
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -948,8 +962,18 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam.template("batch_size"),
|
||||
InputParam(name="image_height", required=True, type_hint=int, description="The height of the reference image. Can be generated in input step."),
|
||||
InputParam(name="image_width", required=True, type_hint=int, description="The width of the reference image. Can be generated in input step."),
|
||||
InputParam(
|
||||
name="image_height",
|
||||
required=True,
|
||||
type_hint=int,
|
||||
description="The height of the reference image. Can be generated in input step.",
|
||||
),
|
||||
InputParam(
|
||||
name="image_width",
|
||||
required=True,
|
||||
type_hint=int,
|
||||
description="The width of the reference image. Can be generated in input step.",
|
||||
),
|
||||
InputParam.template("height", required=True),
|
||||
InputParam.template("width", required=True),
|
||||
InputParam.template("prompt_embeds_mask"),
|
||||
@@ -1016,13 +1040,13 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
|
||||
class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit Plus.
|
||||
Unlike Edit, Edit Plus handles lists of image_height/image_width for multiple reference images.
|
||||
Should be placed after prepare_latents step.
|
||||
Unlike Edit, Edit Plus handles lists of image_height/image_width for multiple reference images. Should be placed
|
||||
after prepare_latents step.
|
||||
|
||||
Inputs:
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
image_height (`List`):
|
||||
The heights of the reference images. Can be generated in input step.
|
||||
image_width (`List`):
|
||||
@@ -1044,6 +1068,7 @@ class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
|
||||
negative_txt_seq_lens (`List`):
|
||||
The sequence lengths of the negative prompt embeds, used for RoPE calculation
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-edit-plus"
|
||||
|
||||
@property
|
||||
@@ -1058,8 +1083,18 @@ class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam.template("batch_size"),
|
||||
InputParam(name="image_height", required=True, type_hint=List[int], description="The heights of the reference images. Can be generated in input step."),
|
||||
InputParam(name="image_width", required=True, type_hint=List[int], description="The widths of the reference images. Can be generated in input step."),
|
||||
InputParam(
|
||||
name="image_height",
|
||||
required=True,
|
||||
type_hint=List[int],
|
||||
description="The heights of the reference images. Can be generated in input step.",
|
||||
),
|
||||
InputParam(
|
||||
name="image_width",
|
||||
required=True,
|
||||
type_hint=List[int],
|
||||
description="The widths of the reference images. Can be generated in input step.",
|
||||
),
|
||||
InputParam.template("height", required=True),
|
||||
InputParam.template("width", required=True),
|
||||
InputParam.template("prompt_embeds_mask"),
|
||||
@@ -1126,8 +1161,8 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):
|
||||
|
||||
Inputs:
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
layers (`int`, *optional*, defaults to 4):
|
||||
Number of layers to extract from the image
|
||||
height (`int`):
|
||||
@@ -1149,6 +1184,7 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):
|
||||
additional_t_cond (`Tensor`):
|
||||
The additional t cond, used for RoPE calculation
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
|
||||
@property
|
||||
@@ -1231,6 +1267,7 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):
|
||||
|
||||
## ControlNet inputs for denoiser
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
|
||||
"""
|
||||
@@ -1247,7 +1284,8 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
|
||||
controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
|
||||
Scale for ControlNet conditioning.
|
||||
control_image_latents (`Tensor`):
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
|
||||
step.
|
||||
timesteps (`Tensor`):
|
||||
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
|
||||
|
||||
@@ -1255,6 +1293,7 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
|
||||
controlnet_keep (`List`):
|
||||
The controlnet keep values
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -1274,16 +1313,16 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
|
||||
InputParam.template("control_guidance_end"),
|
||||
InputParam.template("controlnet_conditioning_scale"),
|
||||
InputParam(
|
||||
name="control_image_latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step."
|
||||
name="control_image_latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.",
|
||||
),
|
||||
InputParam(
|
||||
name="timesteps",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
|
||||
name="timesteps",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -30,10 +30,12 @@ logger = logging.get_logger(__name__)
|
||||
|
||||
# after denoising loop (unpack latents)
|
||||
|
||||
#auto_docstring
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Step that unpack the latents from 3D tensor (batch_size, sequence_length, channels) into 5D tensor (batch_size, channels, 1, height, width)
|
||||
Step that unpack the latents from 3D tensor (batch_size, sequence_length, channels) into 5D tensor (batch_size,
|
||||
channels, 1, height, width)
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
@@ -50,6 +52,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
|
||||
latents (`Tensor`):
|
||||
The denoisedlatents unpacked to B, C, 1, H, W
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -70,10 +73,10 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
|
||||
InputParam.template("height", required=True),
|
||||
InputParam.template("width", required=True),
|
||||
InputParam(
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The latents to decode, can be generated in the denoise step."
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The latents to decode, can be generated in the denoise step.",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -81,9 +84,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(
|
||||
name="latents",
|
||||
type_hint=torch.Tensor,
|
||||
description="The denoisedlatents unpacked to B, C, 1, H, W"
|
||||
name="latents", type_hint=torch.Tensor, description="The denoisedlatents unpacked to B, C, 1, H, W"
|
||||
),
|
||||
]
|
||||
|
||||
@@ -100,7 +101,7 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
|
||||
return components, state
|
||||
|
||||
|
||||
#auto_docstring
|
||||
# auto_docstring
|
||||
class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Unpack latents from (B, seq, C*4) to (B, C, layers+1, H, W) after denoising.
|
||||
@@ -122,6 +123,7 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
|
||||
latents (`Tensor`):
|
||||
Denoised latents. (unpacked to B, C, layers+1, H, W)
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
|
||||
@property
|
||||
@@ -138,10 +140,10 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The denoised latents to decode, can be generated in the denoise step."
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The denoised latents to decode, can be generated in the denoise step.",
|
||||
),
|
||||
InputParam.template("height", required=True),
|
||||
InputParam.template("width", required=True),
|
||||
@@ -173,7 +175,8 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
|
||||
|
||||
# decode step
|
||||
|
||||
#auto_docstring
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageDecoderStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Step that decodes the latents to images
|
||||
@@ -183,12 +186,14 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
|
||||
step.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
Generated images. (tensor output of the vae decoder.)
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -207,10 +212,10 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step."
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -246,18 +251,18 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
|
||||
return components, state
|
||||
|
||||
|
||||
#auto_docstring
|
||||
# auto_docstring
|
||||
class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Decode unpacked latents (B, C, layers+1, H, W) into layer images.
|
||||
|
||||
Components:
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
|
||||
step.
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
|
||||
@@ -265,6 +270,7 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
|
||||
images (`List`):
|
||||
Generated images.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
|
||||
@property
|
||||
@@ -287,10 +293,10 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step."
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.",
|
||||
),
|
||||
InputParam.template("output_type"),
|
||||
]
|
||||
@@ -345,7 +351,8 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
|
||||
|
||||
# postprocess the decoded images
|
||||
|
||||
#auto_docstring
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
|
||||
"""
|
||||
postprocess the generated image
|
||||
@@ -363,6 +370,7 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
|
||||
images (`List`):
|
||||
Generated images.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -384,10 +392,10 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="images",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="the generated image tensor from decoders step"
|
||||
name="images",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="the generated image tensor from decoders step",
|
||||
),
|
||||
InputParam.template("output_type"),
|
||||
]
|
||||
@@ -416,7 +424,7 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
|
||||
return components, state
|
||||
|
||||
|
||||
#auto_docstring
|
||||
# auto_docstring
|
||||
class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
|
||||
"""
|
||||
postprocess the generated image, optional apply the mask overally to the original image..
|
||||
@@ -430,12 +438,14 @@ class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
mask_overlay_kwargs (`Dict`, *optional*):
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in
|
||||
InpaintProcessImagesInputStep.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
Generated images.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -457,16 +467,17 @@ class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="images",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="the generated image tensor from decoders step"
|
||||
name="images",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="the generated image tensor from decoders step",
|
||||
),
|
||||
InputParam.template("output_type"),
|
||||
InputParam(
|
||||
name="mask_overlay_kwargs",
|
||||
name="mask_overlay_kwargs",
|
||||
type_hint=Dict[str, Any],
|
||||
description="The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep."),
|
||||
description="The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.",
|
||||
),
|
||||
]
|
||||
|
||||
@property
|
||||
|
||||
@@ -50,10 +50,10 @@ class QwenImageLoopBeforeDenoiser(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The initial latents to use for the denoising process. Can be generated in prepare_latent step.",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -80,10 +80,10 @@ class QwenImageEditLoopBeforeDenoiser(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."
|
||||
name="latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The initial latents to use for the denoising process. Can be generated in prepare_latent step.",
|
||||
),
|
||||
InputParam.template("image_latents"),
|
||||
]
|
||||
@@ -131,10 +131,10 @@ class QwenImageLoopBeforeDenoiserControlNet(ModularPipelineBlocks):
|
||||
),
|
||||
InputParam.template("controlnet_conditioning_scale", note="updated in prepare_controlnet_inputs step."),
|
||||
InputParam(
|
||||
name="controlnet_keep",
|
||||
required=True,
|
||||
type_hint=List[float],
|
||||
description="The controlnet keep values. Can be generated in prepare_controlnet_inputs step."
|
||||
name="controlnet_keep",
|
||||
required=True,
|
||||
type_hint=List[float],
|
||||
description="The controlnet keep values. Can be generated in prepare_controlnet_inputs step.",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -467,10 +467,10 @@ class QwenImageDenoiseLoopWrapper(LoopSequentialPipelineBlocks):
|
||||
def loop_inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="timesteps",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
|
||||
name="timesteps",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The timesteps to use for the denoising process. Can be generated in set_timesteps step.",
|
||||
),
|
||||
InputParam.template("num_inference_steps", required=True),
|
||||
]
|
||||
@@ -505,21 +505,21 @@ class QwenImageDenoiseLoopWrapper(LoopSequentialPipelineBlocks):
|
||||
|
||||
# Qwen Image (text2image, image2image)
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
"""
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method
|
||||
At each iteration, it runs blocks defined in `sub_blocks` sequencially:
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
|
||||
defined in `sub_blocks` sequencially:
|
||||
- `QwenImageLoopBeforeDenoiser`
|
||||
- `QwenImageLoopDenoiser`
|
||||
- `QwenImageLoopAfterDenoiser`
|
||||
This block supports text2image and image2image tasks for QwenImage.
|
||||
|
||||
Components:
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
|
||||
(`FlowMatchEulerDiscreteScheduler`)
|
||||
|
||||
Inputs:
|
||||
timesteps (`Tensor`):
|
||||
@@ -539,6 +539,7 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
latents (`Tensor`):
|
||||
Denoised latents.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
block_classes = [
|
||||
@@ -551,8 +552,8 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Denoise step that iteratively denoise the latents. \n"
|
||||
"Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method \n"
|
||||
"Denoise step that iteratively denoise the latents.\n"
|
||||
"Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method\n"
|
||||
"At each iteration, it runs blocks defined in `sub_blocks` sequencially:\n"
|
||||
" - `QwenImageLoopBeforeDenoiser`\n"
|
||||
" - `QwenImageLoopDenoiser`\n"
|
||||
@@ -565,9 +566,9 @@ class QwenImageDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
# auto_docstring
|
||||
class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
"""
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method
|
||||
At each iteration, it runs blocks defined in `sub_blocks` sequencially:
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
|
||||
defined in `sub_blocks` sequencially:
|
||||
- `QwenImageLoopBeforeDenoiser`
|
||||
- `QwenImageLoopDenoiser`
|
||||
- `QwenImageLoopAfterDenoiser`
|
||||
@@ -575,9 +576,8 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
This block supports inpainting tasks for QwenImage.
|
||||
|
||||
Components:
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
|
||||
(`FlowMatchEulerDiscreteScheduler`)
|
||||
|
||||
Inputs:
|
||||
timesteps (`Tensor`):
|
||||
@@ -603,6 +603,7 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
latents (`Tensor`):
|
||||
Denoised latents.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
block_classes = [
|
||||
QwenImageLoopBeforeDenoiser,
|
||||
@@ -630,9 +631,9 @@ class QwenImageInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
# auto_docstring
|
||||
class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
"""
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method
|
||||
At each iteration, it runs blocks defined in `sub_blocks` sequencially:
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
|
||||
defined in `sub_blocks` sequencially:
|
||||
- `QwenImageLoopBeforeDenoiser`
|
||||
- `QwenImageLoopBeforeDenoiserControlNet`
|
||||
- `QwenImageLoopDenoiser`
|
||||
@@ -640,10 +641,8 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
This block supports text2img/img2img tasks with controlnet for QwenImage.
|
||||
|
||||
Components:
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
controlnet (`QwenImageControlNetModel`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`) controlnet (`QwenImageControlNetModel`) transformer
|
||||
(`QwenImageTransformer2DModel`) scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
|
||||
Inputs:
|
||||
timesteps (`Tensor`):
|
||||
@@ -669,6 +668,7 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
latents (`Tensor`):
|
||||
Denoised latents.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
block_classes = [
|
||||
QwenImageLoopBeforeDenoiser,
|
||||
@@ -696,9 +696,9 @@ class QwenImageControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
# auto_docstring
|
||||
class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
"""
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method
|
||||
At each iteration, it runs blocks defined in `sub_blocks` sequencially:
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
|
||||
defined in `sub_blocks` sequencially:
|
||||
- `QwenImageLoopBeforeDenoiser`
|
||||
- `QwenImageLoopBeforeDenoiserControlNet`
|
||||
- `QwenImageLoopDenoiser`
|
||||
@@ -707,10 +707,8 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
This block supports inpainting tasks with controlnet for QwenImage.
|
||||
|
||||
Components:
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
controlnet (`QwenImageControlNetModel`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`) controlnet (`QwenImageControlNetModel`) transformer
|
||||
(`QwenImageTransformer2DModel`) scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
|
||||
Inputs:
|
||||
timesteps (`Tensor`):
|
||||
@@ -742,6 +740,7 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
latents (`Tensor`):
|
||||
Denoised latents.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
block_classes = [
|
||||
QwenImageLoopBeforeDenoiser,
|
||||
@@ -777,18 +776,17 @@ class QwenImageInpaintControlNetDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
# auto_docstring
|
||||
class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
"""
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method
|
||||
At each iteration, it runs blocks defined in `sub_blocks` sequencially:
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
|
||||
defined in `sub_blocks` sequencially:
|
||||
- `QwenImageEditLoopBeforeDenoiser`
|
||||
- `QwenImageEditLoopDenoiser`
|
||||
- `QwenImageLoopAfterDenoiser`
|
||||
This block supports QwenImage Edit.
|
||||
|
||||
Components:
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
|
||||
(`FlowMatchEulerDiscreteScheduler`)
|
||||
|
||||
Inputs:
|
||||
timesteps (`Tensor`):
|
||||
@@ -810,6 +808,7 @@ class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
latents (`Tensor`):
|
||||
Denoised latents.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-edit"
|
||||
block_classes = [
|
||||
QwenImageEditLoopBeforeDenoiser,
|
||||
@@ -835,9 +834,9 @@ class QwenImageEditDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
# auto_docstring
|
||||
class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
"""
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method
|
||||
At each iteration, it runs blocks defined in `sub_blocks` sequencially:
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
|
||||
defined in `sub_blocks` sequencially:
|
||||
- `QwenImageEditLoopBeforeDenoiser`
|
||||
- `QwenImageEditLoopDenoiser`
|
||||
- `QwenImageLoopAfterDenoiser`
|
||||
@@ -845,9 +844,8 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
This block supports inpainting tasks for QwenImage Edit.
|
||||
|
||||
Components:
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
|
||||
(`FlowMatchEulerDiscreteScheduler`)
|
||||
|
||||
Inputs:
|
||||
timesteps (`Tensor`):
|
||||
@@ -873,6 +871,7 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
latents (`Tensor`):
|
||||
Denoised latents.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-edit"
|
||||
block_classes = [
|
||||
QwenImageEditLoopBeforeDenoiser,
|
||||
@@ -900,18 +899,17 @@ class QwenImageEditInpaintDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
# auto_docstring
|
||||
class QwenImageLayeredDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
"""
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method
|
||||
At each iteration, it runs blocks defined in `sub_blocks` sequencially:
|
||||
Denoise step that iteratively denoise the latents.
|
||||
Its loop logic is defined in `QwenImageDenoiseLoopWrapper.__call__` method At each iteration, it runs blocks
|
||||
defined in `sub_blocks` sequencially:
|
||||
- `QwenImageEditLoopBeforeDenoiser`
|
||||
- `QwenImageEditLoopDenoiser`
|
||||
- `QwenImageLoopAfterDenoiser`
|
||||
This block supports QwenImage Layered.
|
||||
|
||||
Components:
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`) scheduler
|
||||
(`FlowMatchEulerDiscreteScheduler`)
|
||||
|
||||
Inputs:
|
||||
timesteps (`Tensor`):
|
||||
@@ -933,6 +931,7 @@ class QwenImageLayeredDenoiseStep(QwenImageDenoiseLoopWrapper):
|
||||
latents (`Tensor`):
|
||||
Denoised latents.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
block_classes = [
|
||||
QwenImageEditLoopBeforeDenoiser,
|
||||
|
||||
@@ -30,7 +30,7 @@ from ...pipelines.qwenimage.pipeline_qwenimage_edit import calculate_dimensions
|
||||
from ...utils import logging
|
||||
from ...utils.torch_utils import unwrap_module
|
||||
from ..modular_pipeline import ModularPipelineBlocks, PipelineState
|
||||
from ..modular_pipeline_utils import ComponentSpec, ConfigSpec, InputParam, OutputParam
|
||||
from ..modular_pipeline_utils import ComponentSpec, InputParam, OutputParam
|
||||
from .modular_pipeline import QwenImageModularPipeline
|
||||
from .prompt_templates import (
|
||||
QWENIMAGE_EDIT_PLUS_IMG_TEMPLATE,
|
||||
@@ -277,6 +277,7 @@ def encode_vae_image(
|
||||
# In most of our other pipelines, resizing is done as part of the image preprocessing step.
|
||||
# ====================
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageEditResizeStep(ModularPipelineBlocks):
|
||||
"""
|
||||
@@ -293,8 +294,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
|
||||
resized_image (`List`):
|
||||
The resized images
|
||||
"""
|
||||
model_name = "qwenimage-edit"
|
||||
|
||||
model_name = "qwenimage-edit"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -319,8 +320,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(
|
||||
name="resized_image",
|
||||
type_hint=List[PIL.Image.Image],
|
||||
name="resized_image",
|
||||
type_hint=List[PIL.Image.Image],
|
||||
description="The resized images",
|
||||
),
|
||||
]
|
||||
@@ -353,7 +354,8 @@ class QwenImageEditResizeStep(ModularPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageLayeredResizeStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio.
|
||||
Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while
|
||||
maintaining the aspect ratio.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
@@ -368,11 +370,12 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):
|
||||
resized_image (`List`):
|
||||
The resized images
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return f"Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio."
|
||||
return "Image Resize step that resize the image to a target area (defined by the resolution parameter from user) while maintaining the aspect ratio."
|
||||
|
||||
@property
|
||||
def expected_components(self) -> List[ComponentSpec]:
|
||||
@@ -399,11 +402,13 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [OutputParam(
|
||||
name="resized_image",
|
||||
type_hint=List[PIL.Image.Image],
|
||||
description="The resized images",
|
||||
)]
|
||||
return [
|
||||
OutputParam(
|
||||
name="resized_image",
|
||||
type_hint=List[PIL.Image.Image],
|
||||
description="The resized images",
|
||||
)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def check_inputs(resolution: int):
|
||||
@@ -442,8 +447,8 @@ class QwenImageLayeredResizeStep(ModularPipelineBlocks):
|
||||
class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Resize images for QwenImage Edit Plus pipeline.
|
||||
Produces two outputs: resized_image (1024x1024) for VAE encoding, resized_cond_image (384x384) for VL text encoding.
|
||||
Each image is resized independently based on its own aspect ratio.
|
||||
Produces two outputs: resized_image (1024x1024) for VAE encoding, resized_cond_image (384x384) for VL text
|
||||
encoding. Each image is resized independently based on its own aspect ratio.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
@@ -484,7 +489,7 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
# image
|
||||
return [InputParam.template("image")]
|
||||
return [InputParam.template("image")]
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
@@ -518,13 +523,11 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
|
||||
resized_cond_images = []
|
||||
for image in images:
|
||||
image_width, image_height = image.size
|
||||
|
||||
|
||||
# For VAE encoder (1024x1024 target area)
|
||||
vae_width, vae_height, _ = calculate_dimensions(1024 * 1024, image_width / image_height)
|
||||
resized_images.append(
|
||||
components.image_resize_processor.resize(image, height=vae_height, width=vae_width)
|
||||
)
|
||||
|
||||
resized_images.append(components.image_resize_processor.resize(image, height=vae_height, width=vae_width))
|
||||
|
||||
# For VL text encoder (384x384 target area)
|
||||
vl_width, vl_height, _ = calculate_dimensions(384 * 384, image_width / image_height)
|
||||
resized_cond_images.append(
|
||||
@@ -541,16 +544,16 @@ class QwenImageEditPlusResizeStep(ModularPipelineBlocks):
|
||||
# 2. GET IMAGE PROMPT
|
||||
# ====================
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Auto-caption step that generates a text prompt from the input image if none is provided.
|
||||
Uses the VL model (text_encoder) to generate a description of the image.
|
||||
If prompt is already provided, this step passes through unchanged.
|
||||
Uses the VL model (text_encoder) to generate a description of the image. If prompt is already provided, this step
|
||||
passes through unchanged.
|
||||
|
||||
Components:
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`)
|
||||
|
||||
Inputs:
|
||||
prompt (`str`, *optional*):
|
||||
@@ -590,7 +593,9 @@ class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam.template("prompt", required=False), # it is not required for qwenimage-layered, unlike other pipelines
|
||||
InputParam.template(
|
||||
"prompt", required=False
|
||||
), # it is not required for qwenimage-layered, unlike other pipelines
|
||||
InputParam(
|
||||
name="resized_image",
|
||||
required=True,
|
||||
@@ -653,15 +658,15 @@ class QwenImageLayeredGetImagePromptStep(ModularPipelineBlocks):
|
||||
# 3. TEXT ENCODER
|
||||
# ====================
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageTextEncoderStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Text Encoder step that generates text embeddings to guide the image generation.
|
||||
|
||||
Components:
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
|
||||
tokenizer (`Qwen2Tokenizer`): The tokenizer to use
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
|
||||
The tokenizer to use guider (`ClassifierFreeGuidance`)
|
||||
|
||||
Inputs:
|
||||
prompt (`str`):
|
||||
@@ -681,6 +686,7 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
|
||||
negative_prompt_embeds_mask (`Tensor`):
|
||||
The negative prompt embeddings mask.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
def __init__(self):
|
||||
@@ -706,7 +712,6 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
@@ -786,12 +791,12 @@ class QwenImageTextEncoderStep(ModularPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Text Encoder step that processes both prompt and image together to generate text embeddings for guiding image generation.
|
||||
Text Encoder step that processes both prompt and image together to generate text embeddings for guiding image
|
||||
generation.
|
||||
|
||||
Components:
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`) guider
|
||||
(`ClassifierFreeGuidance`)
|
||||
|
||||
Inputs:
|
||||
prompt (`str`):
|
||||
@@ -811,6 +816,7 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
|
||||
negative_prompt_embeds_mask (`Tensor`):
|
||||
The negative prompt embeddings mask.
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
def __init__(self):
|
||||
@@ -835,7 +841,6 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
@@ -909,12 +914,12 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Text Encoder step for QwenImage Edit Plus that processes prompt and multiple images together to generate text embeddings for guiding image generation.
|
||||
Text Encoder step for QwenImage Edit Plus that processes prompt and multiple images together to generate text
|
||||
embeddings for guiding image generation.
|
||||
|
||||
Components:
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor (`Qwen2VLProcessor`) guider
|
||||
(`ClassifierFreeGuidance`)
|
||||
|
||||
Inputs:
|
||||
prompt (`str`):
|
||||
@@ -922,7 +927,8 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
|
||||
negative_prompt (`str`, *optional*):
|
||||
The prompt or prompts not to guide the image generation.
|
||||
resized_cond_image (`Tensor`):
|
||||
The image(s) to encode, can be a single image or list of images, should be resized to 384x384 using resize step
|
||||
The image(s) to encode, can be a single image or list of images, should be resized to 384x384 using
|
||||
resize step
|
||||
|
||||
Outputs:
|
||||
prompt_embeds (`Tensor`):
|
||||
@@ -963,7 +969,6 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
@@ -1042,10 +1047,12 @@ class QwenImageEditPlusTextEncoderStep(ModularPipelineBlocks):
|
||||
# 4. IMAGE PREPROCESS
|
||||
# ====================
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images will be resized to the given height and width.
|
||||
Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images will be
|
||||
resized to the given height and width.
|
||||
|
||||
Components:
|
||||
image_mask_processor (`InpaintProcessor`)
|
||||
@@ -1070,6 +1077,7 @@ class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
|
||||
mask_overlay_kwargs (`Dict`):
|
||||
The kwargs for the postprocess step to apply the mask overlay
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -1152,7 +1160,8 @@ class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images should be resized first.
|
||||
Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images should be
|
||||
resized first.
|
||||
|
||||
Components:
|
||||
image_mask_processor (`InpaintProcessor`)
|
||||
@@ -1173,6 +1182,7 @@ class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
|
||||
mask_overlay_kwargs (`Dict`):
|
||||
The kwargs for the postprocess step to apply the mask overlay
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-edit"
|
||||
|
||||
@property
|
||||
@@ -1206,11 +1216,7 @@ class QwenImageEditInpaintProcessImagesInputStep(ModularPipelineBlocks):
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(
|
||||
name="processed_image",
|
||||
type_hint=torch.Tensor,
|
||||
description="The processed image"
|
||||
),
|
||||
OutputParam(name="processed_image", type_hint=torch.Tensor, description="The processed image"),
|
||||
OutputParam(
|
||||
name="processed_mask_image",
|
||||
type_hint=torch.Tensor,
|
||||
@@ -1263,6 +1269,7 @@ class QwenImageProcessImagesInputStep(ModularPipelineBlocks):
|
||||
processed_image (`Tensor`):
|
||||
The processed image
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -1290,11 +1297,13 @@ class QwenImageProcessImagesInputStep(ModularPipelineBlocks):
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [OutputParam(
|
||||
name="processed_image",
|
||||
type_hint=torch.Tensor,
|
||||
description="The processed image",
|
||||
)]
|
||||
return [
|
||||
OutputParam(
|
||||
name="processed_image",
|
||||
type_hint=torch.Tensor,
|
||||
description="The processed image",
|
||||
)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def check_inputs(height, width, vae_scale_factor):
|
||||
@@ -1340,6 +1349,7 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
|
||||
processed_image (`Tensor`):
|
||||
The processed image
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-edit"
|
||||
|
||||
@property
|
||||
@@ -1361,7 +1371,7 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(
|
||||
name="resized_image",
|
||||
name="resized_image",
|
||||
required=True,
|
||||
type_hint=List[PIL.Image.Image],
|
||||
description="The resized image. should be generated using a resize step",
|
||||
@@ -1370,11 +1380,13 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [OutputParam(
|
||||
name="processed_image",
|
||||
type_hint=torch.Tensor,
|
||||
description="The processed image",
|
||||
)]
|
||||
return [
|
||||
OutputParam(
|
||||
name="processed_image",
|
||||
type_hint=torch.Tensor,
|
||||
description="The processed image",
|
||||
)
|
||||
]
|
||||
|
||||
@torch.no_grad()
|
||||
def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
|
||||
@@ -1395,7 +1407,8 @@ class QwenImageEditProcessImagesInputStep(ModularPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of processed images.
|
||||
Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of
|
||||
processed images.
|
||||
|
||||
Components:
|
||||
image_processor (`VaeImageProcessor`)
|
||||
@@ -1408,6 +1421,7 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
|
||||
processed_image (`Tensor`):
|
||||
The processed image
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-edit-plus"
|
||||
|
||||
@property
|
||||
@@ -1427,20 +1441,24 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
|
||||
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [InputParam(
|
||||
name="resized_image",
|
||||
required=True,
|
||||
type_hint=List[PIL.Image.Image],
|
||||
description="The resized image. should be generated using a resize step",
|
||||
)]
|
||||
return [
|
||||
InputParam(
|
||||
name="resized_image",
|
||||
required=True,
|
||||
type_hint=List[PIL.Image.Image],
|
||||
description="The resized image. should be generated using a resize step",
|
||||
)
|
||||
]
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [OutputParam(
|
||||
name="processed_image",
|
||||
type_hint=torch.Tensor,
|
||||
description="The processed image",
|
||||
)]
|
||||
return [
|
||||
OutputParam(
|
||||
name="processed_image",
|
||||
type_hint=torch.Tensor,
|
||||
description="The processed image",
|
||||
)
|
||||
]
|
||||
|
||||
@torch.no_grad()
|
||||
def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
|
||||
@@ -1472,6 +1490,7 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
|
||||
# 5. VAE ENCODER
|
||||
# ====================
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageVaeEncoderStep(ModularPipelineBlocks):
|
||||
"""
|
||||
@@ -1509,7 +1528,9 @@ class QwenImageVaeEncoderStep(ModularPipelineBlocks):
|
||||
output (OutputParam, optional): Output parameter for the image latents. Defaults to "image_latents".
|
||||
"""
|
||||
if input is None:
|
||||
input = InputParam(name="processed_image", required=True, type_hint=torch.Tensor, description="The image tensor to encode")
|
||||
input = InputParam(
|
||||
name="processed_image", required=True, type_hint=torch.Tensor, description="The image tensor to encode"
|
||||
)
|
||||
|
||||
if output is None:
|
||||
output = OutputParam.template("image_latents")
|
||||
@@ -1539,13 +1560,13 @@ class QwenImageVaeEncoderStep(ModularPipelineBlocks):
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
self._input, # default is "processed_image"
|
||||
self._input, # default is "processed_image"
|
||||
InputParam.template("generator"),
|
||||
]
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [self._output] # default is "image_latents"
|
||||
return [self._output] # default is "image_latents"
|
||||
|
||||
@torch.no_grad()
|
||||
def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -> PipelineState:
|
||||
@@ -1588,9 +1609,8 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
|
||||
VAE Encoder step that converts `control_image` into latent representations control_image_latents.
|
||||
|
||||
Components:
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
controlnet (`QwenImageControlNetModel`)
|
||||
control_image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`) controlnet (`QwenImageControlNetModel`) control_image_processor
|
||||
(`VaeImageProcessor`)
|
||||
|
||||
Inputs:
|
||||
control_image (`Image`):
|
||||
@@ -1606,6 +1626,7 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
|
||||
control_image_latents (`Tensor`):
|
||||
The latents representing the control image
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -1720,6 +1741,7 @@ class QwenImageControlNetVaeEncoderStep(ModularPipelineBlocks):
|
||||
# 6. PERMUTE LATENTS
|
||||
# ====================
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
|
||||
"""
|
||||
@@ -1733,11 +1755,12 @@ class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
|
||||
image_latents (`Tensor`):
|
||||
The latent representation of the input image. (permuted from [B, C, 1, H, W] to [B, 1, C, H, W])
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return f"Permute image latents from (B, C, 1, H, W) to (B, 1, C, H, W) for Layered packing."
|
||||
return "Permute image latents from (B, C, 1, H, W) to (B, 1, C, H, W) for Layered packing."
|
||||
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
@@ -1760,4 +1783,4 @@ class QwenImageLayeredPermuteLatentsStep(ModularPipelineBlocks):
|
||||
block_state.image_latents = latents.permute(0, 2, 1, 3, 4)
|
||||
|
||||
self.set_block_state(state, block_state)
|
||||
return components, state
|
||||
return components, state
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import List, Tuple, Optional
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
|
||||
@@ -117,7 +117,8 @@ class QwenImageTextInputsStep(ModularPipelineBlocks):
|
||||
1. Determines `batch_size` and `dtype` based on `prompt_embeds`
|
||||
2. Ensures all text embeddings have consistent batch sizes (batch_size * num_images_per_prompt)
|
||||
|
||||
This block should be placed after all encoder steps to process the text embeddings before they are used in subsequent pipeline steps.
|
||||
This block should be placed after all encoder steps to process the text embeddings before they are used in
|
||||
subsequent pipeline steps.
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -145,6 +146,7 @@ class QwenImageTextInputsStep(ModularPipelineBlocks):
|
||||
negative_prompt_embeds_mask (`Tensor`):
|
||||
The negative prompt embeddings mask. (batch-expanded)
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -271,8 +273,8 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
@@ -300,7 +302,7 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
|
||||
self,
|
||||
image_latent_inputs: Optional[List[InputParam]] = None,
|
||||
additional_batch_inputs: Optional[List[InputParam]] = None,
|
||||
):
|
||||
):
|
||||
# by default, process `image_latents`
|
||||
if image_latent_inputs is None:
|
||||
image_latent_inputs = [InputParam.template("image_latents")]
|
||||
@@ -319,7 +321,9 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
|
||||
else:
|
||||
for input_param in additional_batch_inputs:
|
||||
if not isinstance(input_param, InputParam):
|
||||
raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
|
||||
raise ValueError(
|
||||
f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
|
||||
)
|
||||
|
||||
self._image_latent_inputs = image_latent_inputs
|
||||
self._additional_batch_inputs = additional_batch_inputs
|
||||
@@ -376,13 +380,17 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
|
||||
name="image_width",
|
||||
type_hint=int,
|
||||
description="The image width calculated from the image latents dimension",
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
# `height`/`width` are not new outputs, but they will be updated if any image latent inputs are provided
|
||||
if len(self._image_latent_inputs) > 0:
|
||||
outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
|
||||
outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
|
||||
outputs.append(
|
||||
OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
|
||||
)
|
||||
outputs.append(
|
||||
OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
|
||||
)
|
||||
|
||||
# image latent inputs are modified in place (patchified and batch-expanded)
|
||||
for input_param in self._image_latent_inputs:
|
||||
@@ -479,8 +487,8 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
@@ -526,7 +534,9 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
|
||||
else:
|
||||
for input_param in additional_batch_inputs:
|
||||
if not isinstance(input_param, InputParam):
|
||||
raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
|
||||
raise ValueError(
|
||||
f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
|
||||
)
|
||||
|
||||
self._image_latent_inputs = image_latent_inputs
|
||||
self._additional_batch_inputs = additional_batch_inputs
|
||||
@@ -587,11 +597,15 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
|
||||
description="The image widths calculated from the image latents dimension",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# `height`/`width` are updated if any image latent inputs are provided
|
||||
if len(self._image_latent_inputs) > 0:
|
||||
outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
|
||||
outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
|
||||
outputs.append(
|
||||
OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
|
||||
)
|
||||
outputs.append(
|
||||
OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
|
||||
)
|
||||
|
||||
# image latent inputs are modified in place (patchified, concatenated, and batch-expanded)
|
||||
for input_param in self._image_latent_inputs:
|
||||
@@ -686,11 +700,13 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
|
||||
|
||||
# same as QwenImageAdditionalInputsStep, but with layered pachifier.
|
||||
|
||||
|
||||
# auto_docstring
|
||||
class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
|
||||
"""
|
||||
Input processing step for Layered that:
|
||||
1. For image latent inputs: Updates height/width if None, patchifies with layered pachifier, and expands batch size
|
||||
1. For image latent inputs: Updates height/width if None, patchifies with layered pachifier, and expands batch
|
||||
size
|
||||
2. For additional batch inputs: Expands batch dimensions to match final batch size
|
||||
|
||||
Configured inputs:
|
||||
@@ -705,8 +721,8 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
image_latents (`Tensor`):
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step.
|
||||
|
||||
@@ -720,8 +736,8 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
|
||||
width (`int`):
|
||||
if not provided, updated to image width
|
||||
image_latents (`Tensor`):
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified with layered
|
||||
pachifier and batch-expanded)
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified
|
||||
with layered pachifier and batch-expanded)
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
@@ -748,7 +764,9 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
|
||||
else:
|
||||
for input_param in additional_batch_inputs:
|
||||
if not isinstance(input_param, InputParam):
|
||||
raise ValueError(f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}")
|
||||
raise ValueError(
|
||||
f"additional_batch_inputs must be a list of InputParam, but got {type(input_param)}"
|
||||
)
|
||||
|
||||
self._image_latent_inputs = image_latent_inputs
|
||||
self._additional_batch_inputs = additional_batch_inputs
|
||||
@@ -808,8 +826,12 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
|
||||
]
|
||||
|
||||
if len(self._image_latent_inputs) > 0:
|
||||
outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
|
||||
outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
|
||||
outputs.append(
|
||||
OutputParam(name="height", type_hint=int, description="if not provided, updated to image height")
|
||||
)
|
||||
outputs.append(
|
||||
OutputParam(name="width", type_hint=int, description="if not provided, updated to image width")
|
||||
)
|
||||
|
||||
# Add outputs for image latent inputs (patchified with layered pachifier and batch-expanded)
|
||||
for input_param in self._image_latent_inputs:
|
||||
@@ -895,10 +917,11 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
|
||||
|
||||
Inputs:
|
||||
control_image_latents (`Tensor`):
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
|
||||
step.
|
||||
batch_size (`int`, *optional*, defaults to 1):
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can be
|
||||
generated in input step.
|
||||
Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt. Can
|
||||
be generated in input step.
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
The number of images to generate per prompt.
|
||||
height (`int`, *optional*):
|
||||
@@ -914,6 +937,7 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
|
||||
width (`int`):
|
||||
if not provided, updated to control image width
|
||||
"""
|
||||
|
||||
model_name = "qwenimage"
|
||||
|
||||
@property
|
||||
@@ -923,17 +947,26 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
|
||||
@property
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(name="control_image_latents", required=True, type_hint=torch.Tensor, description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step."),
|
||||
InputParam(
|
||||
name="control_image_latents",
|
||||
required=True,
|
||||
type_hint=torch.Tensor,
|
||||
description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.",
|
||||
),
|
||||
InputParam.template("batch_size"),
|
||||
InputParam.template("num_images_per_prompt"),
|
||||
InputParam.template("height"),
|
||||
InputParam.template("width"),
|
||||
]
|
||||
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(name="control_image_latents", type_hint=torch.Tensor, description="The control image latents (patchified and batch-expanded)."),
|
||||
OutputParam(
|
||||
name="control_image_latents",
|
||||
type_hint=torch.Tensor,
|
||||
description="The control image latents (patchified and batch-expanded).",
|
||||
),
|
||||
OutputParam(name="height", type_hint=int, description="if not provided, updated to control image height"),
|
||||
OutputParam(name="width", type_hint=int, description="if not provided, updated to control image width"),
|
||||
]
|
||||
|
||||
@@ -13,9 +13,10 @@
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
|
||||
from ...utils import logging
|
||||
from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
|
||||
from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
|
||||
from ..modular_pipeline_utils import InputParam, InsertableDict, OutputParam
|
||||
from .before_denoise import (
|
||||
QwenImageControlNetBeforeDenoiserStep,
|
||||
QwenImageCreateMaskLatentsStep,
|
||||
@@ -65,9 +66,8 @@ class QwenImageAutoTextEncoderStep(AutoPipelineBlocks):
|
||||
Text encoder step that encodes the text prompt into a text embedding. This is an auto pipeline block.
|
||||
|
||||
Components:
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
|
||||
tokenizer (`Qwen2Tokenizer`): The tokenizer to use
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
|
||||
The tokenizer to use guider (`ClassifierFreeGuidance`)
|
||||
|
||||
Inputs:
|
||||
prompt (`str`, *optional*):
|
||||
@@ -114,8 +114,7 @@ class QwenImageInpaintVaeEncoderStep(SequentialPipelineBlocks):
|
||||
- Creates `image_latents`.
|
||||
|
||||
Components:
|
||||
image_mask_processor (`InpaintProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_mask_processor (`InpaintProcessor`) vae (`AutoencoderKLQwenImage`)
|
||||
|
||||
Inputs:
|
||||
mask_image (`Image`):
|
||||
@@ -162,8 +161,7 @@ class QwenImageImg2ImgVaeEncoderStep(SequentialPipelineBlocks):
|
||||
Vae encoder step that preprocess andencode the image inputs into their latent representations.
|
||||
|
||||
Components:
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_processor (`VaeImageProcessor`) vae (`AutoencoderKLQwenImage`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -218,9 +216,8 @@ class QwenImageOptionalControlNetVaeEncoderStep(AutoPipelineBlocks):
|
||||
- if `control_image` is not provided, step will be skipped.
|
||||
|
||||
Components:
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
controlnet (`QwenImageControlNetModel`)
|
||||
control_image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`) controlnet (`QwenImageControlNetModel`) control_image_processor
|
||||
(`VaeImageProcessor`)
|
||||
|
||||
Inputs:
|
||||
control_image (`Image`, *optional*):
|
||||
@@ -380,7 +377,9 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
|
||||
block_classes = [
|
||||
QwenImageTextInputsStep(),
|
||||
QwenImageAdditionalInputsStep(
|
||||
additional_batch_inputs=[InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")]
|
||||
additional_batch_inputs=[
|
||||
InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")
|
||||
]
|
||||
),
|
||||
]
|
||||
block_names = ["text_inputs", "additional_inputs"]
|
||||
@@ -401,15 +400,14 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
|
||||
- Create the pachified latents `mask` based on the processedmask image.
|
||||
|
||||
Components:
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`) pachifier (`QwenImagePachifier`)
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The initial random noised, can be generated in prepare latent step.
|
||||
image_latents (`Tensor`):
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
|
||||
vae encoder and updated in input step.)
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
|
||||
generated from vae encoder and updated in input step.)
|
||||
timesteps (`Tensor`):
|
||||
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
|
||||
processed_mask_image (`Tensor`):
|
||||
@@ -450,13 +448,12 @@ class QwenImageInpaintPrepareLatentsStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
|
||||
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
|
||||
(timesteps, latents, rope inputs etc.).
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
|
||||
(`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -524,13 +521,12 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
|
||||
task.
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
|
||||
(`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -606,13 +602,12 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
|
||||
task.
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
|
||||
(`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -686,14 +681,12 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs (timesteps, latents, rope inputs etc.).
|
||||
step that denoise noise into image for text2image task. It includes the denoise loop, as well as prepare the inputs
|
||||
(timesteps, latents, rope inputs etc.).
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
controlnet (`QwenImageControlNetModel`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
|
||||
(`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -707,7 +700,8 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
negative_prompt_embeds_mask (`Tensor`, *optional*):
|
||||
mask for the negative text embeddings. Can be generated from text_encoder step.
|
||||
control_image_latents (`Tensor`):
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
|
||||
step.
|
||||
height (`int`, *optional*):
|
||||
The height in pixels of the generated image.
|
||||
width (`int`, *optional*):
|
||||
@@ -773,14 +767,12 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint task.
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for inpaint
|
||||
task.
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
controlnet (`QwenImageControlNetModel`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
|
||||
(`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -802,7 +794,8 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
processed_mask_image (`Tensor`, *optional*):
|
||||
The processed mask image
|
||||
control_image_latents (`Tensor`):
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
|
||||
step.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -868,14 +861,12 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img task.
|
||||
Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for img2img
|
||||
task.
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
controlnet (`QwenImageControlNetModel`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) controlnet
|
||||
(`QwenImageControlNetModel`) guider (`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -895,7 +886,8 @@ class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
image_latents (`Tensor`):
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step.
|
||||
control_image_latents (`Tensor`):
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
|
||||
step.
|
||||
latents (`Tensor`, *optional*):
|
||||
Pre-generated noisy latents for image generation.
|
||||
generator (`Generator`, *optional*):
|
||||
@@ -1030,12 +1022,12 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
|
||||
Decode step that decodes the latents to images and postprocess the generated image.
|
||||
|
||||
Components:
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
|
||||
step.
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
|
||||
@@ -1057,19 +1049,21 @@ class QwenImageDecodeStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageInpaintDecodeStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask overally to the original image.
|
||||
Decode step that decodes the latents to images and postprocess the generated image, optional apply the mask
|
||||
overally to the original image.
|
||||
|
||||
Components:
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_mask_processor (`InpaintProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`) image_mask_processor (`InpaintProcessor`)
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
|
||||
step.
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
mask_overlay_kwargs (`Dict`, *optional*):
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in
|
||||
InpaintProcessImagesInputStep.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
@@ -1125,17 +1119,11 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
|
||||
- for text-to-image generation, all you need to provide is `prompt`
|
||||
|
||||
Components:
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use
|
||||
tokenizer (`Qwen2Tokenizer`): The tokenizer to use
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
image_mask_processor (`InpaintProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
controlnet (`QwenImageControlNetModel`)
|
||||
control_image_processor (`VaeImageProcessor`)
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`): The text encoder to use tokenizer (`Qwen2Tokenizer`):
|
||||
The tokenizer to use guider (`ClassifierFreeGuidance`) image_mask_processor (`InpaintProcessor`) vae
|
||||
(`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`) controlnet (`QwenImageControlNetModel`)
|
||||
control_image_processor (`VaeImageProcessor`) pachifier (`QwenImagePachifier`) scheduler
|
||||
(`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
prompt (`str`, *optional*):
|
||||
@@ -1185,7 +1173,8 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
|
||||
strength (`float`, *optional*, defaults to 0.9):
|
||||
Strength for img2img/inpainting.
|
||||
control_image_latents (`Tensor`, *optional*):
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step.
|
||||
The control image latents to use for the denoising process. Can be generated in controlnet vae encoder
|
||||
step.
|
||||
control_guidance_start (`float`, *optional*, defaults to 0.0):
|
||||
When to start applying ControlNet.
|
||||
control_guidance_end (`float`, *optional*, defaults to 1.0):
|
||||
@@ -1195,7 +1184,8 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
mask_overlay_kwargs (`Dict`, *optional*):
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in
|
||||
InpaintProcessImagesInputStep.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
|
||||
@@ -13,11 +13,12 @@
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
|
||||
from ...utils import logging
|
||||
from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
|
||||
from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
|
||||
from ..modular_pipeline_utils import InputParam, InsertableDict, OutputParam
|
||||
from .before_denoise import (
|
||||
QwenImageCreateMaskLatentsStep,
|
||||
QwenImageEditRoPEInputsStep,
|
||||
@@ -63,10 +64,8 @@ class QwenImageEditVLEncoderStep(SequentialPipelineBlocks):
|
||||
QwenImage-Edit VL encoder step that encode the image and text prompts together.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
|
||||
(`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -113,9 +112,8 @@ class QwenImageEditVaeEncoderStep(SequentialPipelineBlocks):
|
||||
Vae encoder step that encode the image inputs into their latent representations.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
|
||||
(`AutoencoderKLQwenImage`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -155,9 +153,8 @@ class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks):
|
||||
- create image latents.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
image_mask_processor (`InpaintProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_resize_processor (`VaeImageProcessor`) image_mask_processor (`InpaintProcessor`) vae
|
||||
(`AutoencoderKLQwenImage`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -354,7 +351,10 @@ class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
|
||||
model_name = "qwenimage-edit"
|
||||
block_classes = [
|
||||
QwenImageTextInputsStep(),
|
||||
QwenImageAdditionalInputsStep(additional_batch_inputs=[InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")]
|
||||
QwenImageAdditionalInputsStep(
|
||||
additional_batch_inputs=[
|
||||
InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")
|
||||
]
|
||||
),
|
||||
]
|
||||
block_names = ["text_inputs", "additional_inputs"]
|
||||
@@ -377,15 +377,14 @@ class QwenImageEditInpaintPrepareLatentsStep(SequentialPipelineBlocks):
|
||||
- Create the patchified latents `mask` based on the processed mask image.
|
||||
|
||||
Components:
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`) pachifier (`QwenImagePachifier`)
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The initial random noised, can be generated in prepare latent step.
|
||||
image_latents (`Tensor`):
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be generated from
|
||||
vae encoder and updated in input step.)
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (Can be
|
||||
generated from vae encoder and updated in input step.)
|
||||
timesteps (`Tensor`):
|
||||
The timesteps to use for the denoising process. Can be generated in set_timesteps step.
|
||||
processed_mask_image (`Tensor`):
|
||||
@@ -426,10 +425,8 @@ class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
Core denoising workflow for QwenImage-Edit edit (img2img) task.
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
|
||||
(`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -502,10 +499,8 @@ class QwenImageEditInpaintCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
Core denoising workflow for QwenImage-Edit edit inpaint task.
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
|
||||
(`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -623,12 +618,12 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
|
||||
Decode step that decodes the latents to images and postprocess the generated image.
|
||||
|
||||
Components:
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
|
||||
step.
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
|
||||
@@ -650,19 +645,21 @@ class QwenImageEditDecodeStep(SequentialPipelineBlocks):
|
||||
# auto_docstring
|
||||
class QwenImageEditInpaintDecodeStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask overlay to the original image.
|
||||
Decode step that decodes the latents to images and postprocess the generated image, optionally apply the mask
|
||||
overlay to the original image.
|
||||
|
||||
Components:
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_mask_processor (`InpaintProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`) image_mask_processor (`InpaintProcessor`)
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
|
||||
step.
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
mask_overlay_kwargs (`Dict`, *optional*):
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in
|
||||
InpaintProcessImagesInputStep.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
@@ -719,19 +716,14 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
|
||||
"""
|
||||
Auto Modular pipeline for edit (img2img) and edit inpaint tasks using QwenImage-Edit.
|
||||
- for edit (img2img) generation, you need to provide `image`
|
||||
- for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop`
|
||||
- for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide
|
||||
`padding_mask_crop`
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
image_mask_processor (`InpaintProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
|
||||
(`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`) image_mask_processor (`InpaintProcessor`) vae
|
||||
(`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`) pachifier (`QwenImagePachifier`) scheduler
|
||||
(`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -771,7 +763,8 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
mask_overlay_kwargs (`Dict`, *optional*):
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep.
|
||||
The kwargs for the postprocess step to apply the mask overlay. generated in
|
||||
InpaintProcessImagesInputStep.
|
||||
|
||||
Outputs:
|
||||
images (`List`):
|
||||
|
||||
@@ -12,10 +12,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
from ...utils import logging
|
||||
from ..modular_pipeline import SequentialPipelineBlocks
|
||||
from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
|
||||
from ..modular_pipeline_utils import InsertableDict, OutputParam
|
||||
from .before_denoise import (
|
||||
QwenImageEditPlusRoPEInputsStep,
|
||||
QwenImagePrepareLatentsStep,
|
||||
@@ -55,10 +54,8 @@ class QwenImageEditPlusVLEncoderStep(SequentialPipelineBlocks):
|
||||
QwenImage-Edit Plus VL encoder step that encodes the image and text prompts together.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
|
||||
(`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -107,9 +104,8 @@ class QwenImageEditPlusVaeEncoderStep(SequentialPipelineBlocks):
|
||||
Each image is resized independently based on its own aspect ratio to 1024x1024 target area.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
|
||||
(`AutoencoderKLQwenImage`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -231,10 +227,8 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
Core denoising workflow for QwenImage-Edit Plus edit (img2img) task.
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
|
||||
(`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -311,12 +305,12 @@ class QwenImageEditPlusDecodeStep(SequentialPipelineBlocks):
|
||||
Decode step that decodes the latents to images and postprocesses the generated image.
|
||||
|
||||
Components:
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`) image_processor (`VaeImageProcessor`)
|
||||
|
||||
Inputs:
|
||||
latents (`Tensor`):
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step.
|
||||
The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise
|
||||
step.
|
||||
output_type (`str`, *optional*, defaults to pil):
|
||||
Output format: 'pil', 'np', 'pt'.
|
||||
|
||||
@@ -357,14 +351,9 @@ class QwenImageEditPlusAutoBlocks(SequentialPipelineBlocks):
|
||||
- VL encoder uses 384x384 target area, VAE encoder uses 1024x1024 target area.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
pachifier (`QwenImagePachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
|
||||
(`Qwen2VLProcessor`) guider (`ClassifierFreeGuidance`) image_processor (`VaeImageProcessor`) vae
|
||||
(`AutoencoderKLQwenImage`) pachifier (`QwenImagePachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
from ...utils import logging
|
||||
from ..modular_pipeline import SequentialPipelineBlocks
|
||||
from ..modular_pipeline_utils import InsertableDict, OutputParam
|
||||
@@ -53,14 +52,12 @@ logger = logging.get_logger(__name__)
|
||||
# auto_docstring
|
||||
class QwenImageLayeredTextEncoderStep(SequentialPipelineBlocks):
|
||||
"""
|
||||
QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not provided.
|
||||
QwenImage-Layered Text encoder step that encode the text prompt, will generate a prompt based on image if not
|
||||
provided.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
tokenizer (`Qwen2Tokenizer`): The tokenizer to use
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
|
||||
(`Qwen2VLProcessor`) tokenizer (`Qwen2Tokenizer`): The tokenizer to use guider (`ClassifierFreeGuidance`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -116,9 +113,8 @@ class QwenImageLayeredVaeEncoderStep(SequentialPipelineBlocks):
|
||||
Vae encoder step that encode the image inputs into their latent representations.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
image_resize_processor (`VaeImageProcessor`) image_processor (`VaeImageProcessor`) vae
|
||||
(`AutoencoderKLQwenImage`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
@@ -203,8 +199,8 @@ class QwenImageLayeredInputStep(SequentialPipelineBlocks):
|
||||
width (`int`):
|
||||
if not provided, updated to image width
|
||||
image_latents (`Tensor`):
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified with layered
|
||||
pachifier and batch-expanded)
|
||||
image latents used to guide the image generation. Can be generated from vae_encoder step. (patchified
|
||||
with layered pachifier and batch-expanded)
|
||||
"""
|
||||
|
||||
model_name = "qwenimage-layered"
|
||||
@@ -230,10 +226,8 @@ class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
Core denoising workflow for QwenImage-Layered img2img task.
|
||||
|
||||
Components:
|
||||
pachifier (`QwenImageLayeredPachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
pachifier (`QwenImageLayeredPachifier`) scheduler (`FlowMatchEulerDiscreteScheduler`) guider
|
||||
(`ClassifierFreeGuidance`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
||||
@@ -317,16 +311,10 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
|
||||
Auto Modular pipeline for layered denoising tasks using QwenImage-Layered.
|
||||
|
||||
Components:
|
||||
image_resize_processor (`VaeImageProcessor`)
|
||||
text_encoder (`Qwen2_5_VLForConditionalGeneration`)
|
||||
processor (`Qwen2VLProcessor`)
|
||||
tokenizer (`Qwen2Tokenizer`): The tokenizer to use
|
||||
guider (`ClassifierFreeGuidance`)
|
||||
image_processor (`VaeImageProcessor`)
|
||||
vae (`AutoencoderKLQwenImage`)
|
||||
pachifier (`QwenImageLayeredPachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`)
|
||||
transformer (`QwenImageTransformer2DModel`)
|
||||
image_resize_processor (`VaeImageProcessor`) text_encoder (`Qwen2_5_VLForConditionalGeneration`) processor
|
||||
(`Qwen2VLProcessor`) tokenizer (`Qwen2Tokenizer`): The tokenizer to use guider (`ClassifierFreeGuidance`)
|
||||
image_processor (`VaeImageProcessor`) vae (`AutoencoderKLQwenImage`) pachifier (`QwenImageLayeredPachifier`)
|
||||
scheduler (`FlowMatchEulerDiscreteScheduler`) transformer (`QwenImageTransformer2DModel`)
|
||||
|
||||
Inputs:
|
||||
image (`Union[Image, List]`):
|
||||
|
||||
Reference in New Issue
Block a user