mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
[modular]some small fix (#12307)
* fix * add mellon node registry * style * update docstring to include more info! * support custom node mellon * HTTPErrpr -> HfHubHTTPErrpr * up * Update src/diffusers/modular_pipelines/qwenimage/node_utils.py
This commit is contained in:
@@ -32,6 +32,8 @@ class FluxModularPipeline(ModularPipeline, FluxLoraLoaderMixin, TextualInversion
|
||||
</Tip>
|
||||
"""
|
||||
|
||||
default_blocks_name = "FluxAutoBlocks"
|
||||
|
||||
@property
|
||||
def default_height(self):
|
||||
return self.default_sample_size * self.vae_scale_factor
|
||||
|
||||
763
src/diffusers/modular_pipelines/mellon_node_utils.py
Normal file
763
src/diffusers/modular_pipelines/mellon_node_utils.py
Normal file
@@ -0,0 +1,763 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
# Simple typed wrapper for parameter overrides
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from huggingface_hub import create_repo, hf_hub_download
|
||||
from huggingface_hub.utils import (
|
||||
EntryNotFoundError,
|
||||
HfHubHTTPError,
|
||||
RepositoryNotFoundError,
|
||||
RevisionNotFoundError,
|
||||
validate_hf_hub_args,
|
||||
)
|
||||
|
||||
from ..utils import HUGGINGFACE_CO_RESOLVE_ENDPOINT, PushToHubMixin, extract_commit_hash
|
||||
from .modular_pipeline import ModularPipelineBlocks
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
SUPPORTED_NODE_TYPES = {"controlnet", "vae_encoder", "denoise", "text_encoder", "decoder"}
|
||||
|
||||
|
||||
# Mellon Input Parameters (runtime parameters, not models)
|
||||
MELLON_INPUT_PARAMS = {
|
||||
# controlnet
|
||||
"control_image": {
|
||||
"label": "Control Image",
|
||||
"type": "image",
|
||||
"display": "input",
|
||||
},
|
||||
"controlnet_conditioning_scale": {
|
||||
"label": "Scale",
|
||||
"type": "float",
|
||||
"default": 0.5,
|
||||
"min": 0,
|
||||
"max": 1,
|
||||
},
|
||||
"control_guidance_end": {
|
||||
"label": "End",
|
||||
"type": "float",
|
||||
"default": 1.0,
|
||||
"min": 0,
|
||||
"max": 1,
|
||||
},
|
||||
"control_guidance_start": {
|
||||
"label": "Start",
|
||||
"type": "float",
|
||||
"default": 0.0,
|
||||
"min": 0,
|
||||
"max": 1,
|
||||
},
|
||||
"controlnet": {
|
||||
"label": "Controlnet",
|
||||
"type": "custom_controlnet",
|
||||
"display": "input",
|
||||
},
|
||||
"embeddings": {
|
||||
"label": "Text Embeddings",
|
||||
"display": "input",
|
||||
"type": "embeddings",
|
||||
},
|
||||
"image": {
|
||||
"label": "Image",
|
||||
"type": "image",
|
||||
"display": "input",
|
||||
},
|
||||
"negative_prompt": {
|
||||
"label": "Negative Prompt",
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"display": "textarea",
|
||||
},
|
||||
"prompt": {
|
||||
"label": "Prompt",
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"display": "textarea",
|
||||
},
|
||||
"guidance_scale": {
|
||||
"label": "Guidance Scale",
|
||||
"type": "float",
|
||||
"display": "slider",
|
||||
"default": 5,
|
||||
"min": 1.0,
|
||||
"max": 30.0,
|
||||
"step": 0.1,
|
||||
},
|
||||
"height": {
|
||||
"label": "Height",
|
||||
"type": "int",
|
||||
"default": 1024,
|
||||
"min": 64,
|
||||
"step": 8,
|
||||
},
|
||||
"image_latents": {
|
||||
"label": "Image Latents",
|
||||
"type": "latents",
|
||||
"display": "input",
|
||||
"onChange": {False: ["height", "width"], True: ["strength"]},
|
||||
},
|
||||
"latents": {
|
||||
"label": "Latents",
|
||||
"type": "latents",
|
||||
"display": "input",
|
||||
},
|
||||
"num_inference_steps": {
|
||||
"label": "Steps",
|
||||
"type": "int",
|
||||
"display": "slider",
|
||||
"default": 25,
|
||||
"min": 1,
|
||||
"max": 100,
|
||||
},
|
||||
"seed": {
|
||||
"label": "Seed",
|
||||
"type": "int",
|
||||
"display": "random",
|
||||
"default": 0,
|
||||
"min": 0,
|
||||
"max": 4294967295,
|
||||
},
|
||||
"strength": {
|
||||
"label": "Strength",
|
||||
"type": "float",
|
||||
"default": 0.5,
|
||||
"min": 0.0,
|
||||
"max": 1.0,
|
||||
"step": 0.01,
|
||||
},
|
||||
"width": {
|
||||
"label": "Width",
|
||||
"type": "int",
|
||||
"default": 1024,
|
||||
"min": 64,
|
||||
"step": 8,
|
||||
},
|
||||
"ip_adapter": {
|
||||
"label": "IP Adapter",
|
||||
"type": "custom_ip_adapter",
|
||||
"display": "input",
|
||||
},
|
||||
}
|
||||
|
||||
# Mellon Model Parameters (diffusers_auto_model types)
|
||||
MELLON_MODEL_PARAMS = {
|
||||
"scheduler": {
|
||||
"label": "Scheduler",
|
||||
"display": "input",
|
||||
"type": "diffusers_auto_model",
|
||||
},
|
||||
"text_encoders": {
|
||||
"label": "Text Encoders",
|
||||
"type": "diffusers_auto_models",
|
||||
"display": "input",
|
||||
},
|
||||
"unet": {
|
||||
"label": "Unet",
|
||||
"display": "input",
|
||||
"type": "diffusers_auto_model",
|
||||
"onSignal": {
|
||||
"action": "signal",
|
||||
"target": "guider",
|
||||
},
|
||||
},
|
||||
"guider": {
|
||||
"label": "Guider",
|
||||
"display": "input",
|
||||
"type": "custom_guider",
|
||||
"onChange": {False: ["guidance_scale"], True: []},
|
||||
},
|
||||
"vae": {
|
||||
"label": "VAE",
|
||||
"display": "input",
|
||||
"type": "diffusers_auto_model",
|
||||
},
|
||||
"controlnet": {
|
||||
"label": "Controlnet Model",
|
||||
"type": "diffusers_auto_model",
|
||||
"display": "input",
|
||||
},
|
||||
}
|
||||
|
||||
# Mellon Output Parameters (display = "output")
|
||||
MELLON_OUTPUT_PARAMS = {
|
||||
"embeddings": {
|
||||
"label": "Text Embeddings",
|
||||
"display": "output",
|
||||
"type": "embeddings",
|
||||
},
|
||||
"images": {
|
||||
"label": "Images",
|
||||
"type": "image",
|
||||
"display": "output",
|
||||
},
|
||||
"image_latents": {
|
||||
"label": "Image Latents",
|
||||
"type": "latents",
|
||||
"display": "output",
|
||||
},
|
||||
"latents": {
|
||||
"label": "Latents",
|
||||
"type": "latents",
|
||||
"display": "output",
|
||||
},
|
||||
"latents_preview": {
|
||||
"label": "Latents Preview",
|
||||
"display": "output",
|
||||
"type": "latent",
|
||||
},
|
||||
"controlnet_out": {
|
||||
"label": "Controlnet",
|
||||
"display": "output",
|
||||
"type": "controlnet",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Default param selections per supported node_type
|
||||
# from MELLON_INPUT_PARAMS / MELLON_MODEL_PARAMS / MELLON_OUTPUT_PARAMS.
|
||||
NODE_TYPE_PARAMS_MAP = {
|
||||
"controlnet": {
|
||||
"inputs": [
|
||||
"control_image",
|
||||
"controlnet_conditioning_scale",
|
||||
"control_guidance_start",
|
||||
"control_guidance_end",
|
||||
"height",
|
||||
"width",
|
||||
],
|
||||
"model_inputs": [
|
||||
"controlnet",
|
||||
"vae",
|
||||
],
|
||||
"outputs": [
|
||||
"controlnet",
|
||||
],
|
||||
"block_names": ["controlnet_vae_encoder"],
|
||||
},
|
||||
"denoise": {
|
||||
"inputs": [
|
||||
"embeddings",
|
||||
"width",
|
||||
"height",
|
||||
"seed",
|
||||
"num_inference_steps",
|
||||
"guidance_scale",
|
||||
"image_latents",
|
||||
"strength",
|
||||
# custom adapters coming in as inputs
|
||||
"controlnet",
|
||||
# ip_adapter is optional and custom; include if available
|
||||
"ip_adapter",
|
||||
],
|
||||
"model_inputs": [
|
||||
"unet",
|
||||
"guider",
|
||||
"scheduler",
|
||||
],
|
||||
"outputs": [
|
||||
"latents",
|
||||
"latents_preview",
|
||||
],
|
||||
"block_names": ["denoise"],
|
||||
},
|
||||
"vae_encoder": {
|
||||
"inputs": [
|
||||
"image",
|
||||
"width",
|
||||
"height",
|
||||
],
|
||||
"model_inputs": [
|
||||
"vae",
|
||||
],
|
||||
"outputs": [
|
||||
"image_latents",
|
||||
],
|
||||
"block_names": ["vae_encoder"],
|
||||
},
|
||||
"text_encoder": {
|
||||
"inputs": [
|
||||
"prompt",
|
||||
"negative_prompt",
|
||||
# optional image prompt input supported in embeddings node
|
||||
"image",
|
||||
],
|
||||
"model_inputs": [
|
||||
"text_encoders",
|
||||
],
|
||||
"outputs": [
|
||||
"embeddings",
|
||||
],
|
||||
"block_names": ["text_encoder"],
|
||||
},
|
||||
"decoder": {
|
||||
"inputs": [
|
||||
"latents",
|
||||
],
|
||||
"model_inputs": [
|
||||
"vae",
|
||||
],
|
||||
"outputs": [
|
||||
"images",
|
||||
],
|
||||
"block_names": ["decode"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MellonParam:
|
||||
name: str
|
||||
label: str
|
||||
type: str
|
||||
display: Optional[str] = None
|
||||
default: Any = None
|
||||
min: Optional[float] = None
|
||||
max: Optional[float] = None
|
||||
step: Optional[float] = None
|
||||
options: Any = None
|
||||
value: Any = None
|
||||
fieldOptions: Optional[Dict[str, Any]] = None
|
||||
onChange: Any = None
|
||||
onSignal: Any = None
|
||||
_map_to_input: Any = None # the block input name this parameter maps to
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
data = asdict(self)
|
||||
return {k: v for k, v in data.items() if not k.startswith("_") and v is not None}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MellonNodeConfig(PushToHubMixin):
|
||||
"""
|
||||
A MellonNodeConfig is a base class to build Mellon nodes UI with modular diffusers.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This is an experimental feature and is likely to change in the future.
|
||||
|
||||
</Tip>
|
||||
"""
|
||||
|
||||
inputs: List[Union[str, MellonParam]]
|
||||
model_inputs: List[Union[str, MellonParam]]
|
||||
outputs: List[Union[str, MellonParam]]
|
||||
blocks_names: list[str]
|
||||
node_type: str
|
||||
config_name = "mellon_config.json"
|
||||
|
||||
def __post_init__(self):
|
||||
if isinstance(self.inputs, list):
|
||||
self.inputs = self._resolve_params_list(self.inputs, MELLON_INPUT_PARAMS)
|
||||
if isinstance(self.model_inputs, list):
|
||||
self.model_inputs = self._resolve_params_list(self.model_inputs, MELLON_MODEL_PARAMS)
|
||||
if isinstance(self.outputs, list):
|
||||
self.outputs = self._resolve_params_list(self.outputs, MELLON_OUTPUT_PARAMS)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_params_list(
|
||||
params: List[Union[str, MellonParam]], default_map: Dict[str, Dict[str, Any]]
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
def _resolve_param(
|
||||
param: Union[str, MellonParam], default_params_map: Dict[str, Dict[str, Any]]
|
||||
) -> Tuple[str, Dict[str, Any]]:
|
||||
if isinstance(param, str):
|
||||
if param not in default_params_map:
|
||||
raise ValueError(f"Unknown param '{param}', please define a `MellonParam` object instead")
|
||||
return param, default_params_map[param].copy()
|
||||
elif isinstance(param, MellonParam):
|
||||
param_dict = param.to_dict()
|
||||
param_name = param_dict.pop("name")
|
||||
return param_name, param_dict
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unknown param type '{type(param)}', please use a string or a `MellonParam` object instead"
|
||||
)
|
||||
|
||||
resolved = {}
|
||||
for p in params:
|
||||
logger.info(f" Resolving param: {p}")
|
||||
name, cfg = _resolve_param(p, default_map)
|
||||
if name in resolved:
|
||||
raise ValueError(f"Duplicate param '{name}'")
|
||||
resolved[name] = cfg
|
||||
return resolved
|
||||
|
||||
@classmethod
|
||||
@validate_hf_hub_args
|
||||
def load_mellon_config(
|
||||
cls,
|
||||
pretrained_model_name_or_path: Union[str, os.PathLike],
|
||||
return_unused_kwargs=False,
|
||||
return_commit_hash=False,
|
||||
**kwargs,
|
||||
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
r"""
|
||||
Load a model or scheduler configuration.
|
||||
|
||||
Parameters:
|
||||
pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*):
|
||||
Can be either:
|
||||
|
||||
- A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
|
||||
the Hub.
|
||||
- A path to a *directory* (for example `./my_model_directory`) containing model weights saved with
|
||||
[`~ConfigMixin.save_config`].
|
||||
|
||||
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
||||
Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
|
||||
is not used.
|
||||
force_download (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
||||
cached versions if they exist.
|
||||
proxies (`Dict[str, str]`, *optional*):
|
||||
A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
|
||||
'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
|
||||
output_loading_info(`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
|
||||
local_files_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
||||
won't be downloaded from the Hub.
|
||||
token (`str` or *bool*, *optional*):
|
||||
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
||||
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
||||
revision (`str`, *optional*, defaults to `"main"`):
|
||||
The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
|
||||
allowed by Git.
|
||||
subfolder (`str`, *optional*, defaults to `""`):
|
||||
The subfolder location of a model file within a larger model repository on the Hub or locally.
|
||||
return_unused_kwargs (`bool`, *optional*, defaults to `False):
|
||||
Whether unused keyword arguments of the config are returned.
|
||||
return_commit_hash (`bool`, *optional*, defaults to `False):
|
||||
Whether the `commit_hash` of the loaded configuration are returned.
|
||||
|
||||
Returns:
|
||||
`dict`:
|
||||
A dictionary of all the parameters stored in a JSON configuration file.
|
||||
|
||||
"""
|
||||
cache_dir = kwargs.pop("cache_dir", None)
|
||||
local_dir = kwargs.pop("local_dir", None)
|
||||
local_dir_use_symlinks = kwargs.pop("local_dir_use_symlinks", "auto")
|
||||
force_download = kwargs.pop("force_download", False)
|
||||
proxies = kwargs.pop("proxies", None)
|
||||
token = kwargs.pop("token", None)
|
||||
local_files_only = kwargs.pop("local_files_only", False)
|
||||
revision = kwargs.pop("revision", None)
|
||||
|
||||
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
||||
|
||||
if cls.config_name is None:
|
||||
raise ValueError(
|
||||
"`self.config_name` is not defined. Note that one should not load a config from "
|
||||
"`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`"
|
||||
)
|
||||
if os.path.isfile(pretrained_model_name_or_path):
|
||||
config_file = pretrained_model_name_or_path
|
||||
elif os.path.isdir(pretrained_model_name_or_path):
|
||||
if os.path.isfile(os.path.join(pretrained_model_name_or_path, cls.config_name)):
|
||||
# Load from a PyTorch checkpoint
|
||||
config_file = os.path.join(pretrained_model_name_or_path, cls.config_name)
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
f"Error no file named {cls.config_name} found in directory {pretrained_model_name_or_path}."
|
||||
)
|
||||
else:
|
||||
try:
|
||||
# Load from URL or cache if already cached
|
||||
config_file = hf_hub_download(
|
||||
pretrained_model_name_or_path,
|
||||
filename=cls.config_name,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
local_files_only=local_files_only,
|
||||
token=token,
|
||||
revision=revision,
|
||||
local_dir=local_dir,
|
||||
local_dir_use_symlinks=local_dir_use_symlinks,
|
||||
)
|
||||
except RepositoryNotFoundError:
|
||||
raise EnvironmentError(
|
||||
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier"
|
||||
" listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a"
|
||||
" token having permission to this repo with `token` or log in with `hf auth login`."
|
||||
)
|
||||
except RevisionNotFoundError:
|
||||
raise EnvironmentError(
|
||||
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for"
|
||||
" this model name. Check the model page at"
|
||||
f" 'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
|
||||
)
|
||||
except EntryNotFoundError:
|
||||
raise EnvironmentError(
|
||||
f"{pretrained_model_name_or_path} does not appear to have a file named {cls.config_name}."
|
||||
)
|
||||
except HfHubHTTPError as err:
|
||||
raise EnvironmentError(
|
||||
"There was a specific connection error when trying to load"
|
||||
f" {pretrained_model_name_or_path}:\n{err}"
|
||||
)
|
||||
except ValueError:
|
||||
raise EnvironmentError(
|
||||
f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it"
|
||||
f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a"
|
||||
f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to"
|
||||
" run the library in offline mode at"
|
||||
" 'https://huggingface.co/docs/diffusers/installation#offline-mode'."
|
||||
)
|
||||
except EnvironmentError:
|
||||
raise EnvironmentError(
|
||||
f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from "
|
||||
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
|
||||
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
|
||||
f"containing a {cls.config_name} file"
|
||||
)
|
||||
try:
|
||||
with open(config_file, "r", encoding="utf-8") as reader:
|
||||
text = reader.read()
|
||||
config_dict = json.loads(text)
|
||||
|
||||
commit_hash = extract_commit_hash(config_file)
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.")
|
||||
|
||||
if not (return_unused_kwargs or return_commit_hash):
|
||||
return config_dict
|
||||
|
||||
outputs = (config_dict,)
|
||||
|
||||
if return_unused_kwargs:
|
||||
outputs += (kwargs,)
|
||||
|
||||
if return_commit_hash:
|
||||
outputs += (commit_hash,)
|
||||
|
||||
return outputs
|
||||
|
||||
def save_mellon_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs):
|
||||
"""
|
||||
Save the Mellon node definition to a JSON file.
|
||||
|
||||
Args:
|
||||
save_directory (`str` or `os.PathLike`):
|
||||
Directory where the configuration JSON file is saved (will be created if it does not exist).
|
||||
push_to_hub (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
if os.path.isfile(save_directory):
|
||||
raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
|
||||
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
|
||||
# If we save using the predefined names, we can load using `from_config`
|
||||
output_config_file = os.path.join(save_directory, self.config_name)
|
||||
|
||||
self.to_json_file(output_config_file)
|
||||
logger.info(f"Mellon node definition saved in {output_config_file}")
|
||||
|
||||
if push_to_hub:
|
||||
commit_message = kwargs.pop("commit_message", None)
|
||||
private = kwargs.pop("private", None)
|
||||
create_pr = kwargs.pop("create_pr", False)
|
||||
token = kwargs.pop("token", None)
|
||||
repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
|
||||
repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id
|
||||
subfolder = kwargs.pop("subfolder", None)
|
||||
|
||||
self._upload_folder(
|
||||
save_directory,
|
||||
repo_id,
|
||||
token=token,
|
||||
commit_message=commit_message,
|
||||
create_pr=create_pr,
|
||||
subfolder=subfolder,
|
||||
)
|
||||
|
||||
def to_json_file(self, json_file_path: Union[str, os.PathLike]):
|
||||
"""
|
||||
Save the Mellon schema dictionary to a JSON file.
|
||||
|
||||
Args:
|
||||
json_file_path (`str` or `os.PathLike`):
|
||||
Path to the JSON file to save a configuration instance's parameters.
|
||||
"""
|
||||
with open(json_file_path, "w", encoding="utf-8") as writer:
|
||||
writer.write(self.to_json_string())
|
||||
|
||||
def to_json_string(self) -> str:
|
||||
"""
|
||||
Serializes this instance to a JSON string of the Mellon schema dict.
|
||||
|
||||
Args:
|
||||
Returns:
|
||||
`str`: String containing all the attributes that make up this configuration instance in JSON format.
|
||||
"""
|
||||
|
||||
mellon_dict = self.to_mellon_dict()
|
||||
return json.dumps(mellon_dict, indent=2, sort_keys=True) + "\n"
|
||||
|
||||
def to_mellon_dict(self) -> Dict[str, Any]:
|
||||
"""Return a JSON-serializable dict focusing on the Mellon schema fields only.
|
||||
|
||||
params is a single flat dict composed as: {**inputs, **model_inputs, **outputs}.
|
||||
"""
|
||||
# inputs/model_inputs/outputs are already normalized dicts
|
||||
merged_params = {}
|
||||
merged_params.update(self.inputs or {})
|
||||
merged_params.update(self.model_inputs or {})
|
||||
merged_params.update(self.outputs or {})
|
||||
|
||||
return {
|
||||
"node_type": self.node_type,
|
||||
"blocks_names": self.blocks_names,
|
||||
"params": merged_params,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_mellon_dict(cls, mellon_dict: Dict[str, Any]) -> "MellonNodeConfig":
|
||||
"""Create a config from a Mellon schema dict produced by to_mellon_dict().
|
||||
|
||||
Splits the flat params dict back into inputs/model_inputs/outputs using the known key spaces from
|
||||
MELLON_INPUT_PARAMS / MELLON_MODEL_PARAMS / MELLON_OUTPUT_PARAMS. Unknown keys are treated as inputs by
|
||||
default.
|
||||
"""
|
||||
flat_params = mellon_dict.get("params", {})
|
||||
|
||||
inputs: Dict[str, Any] = {}
|
||||
model_inputs: Dict[str, Any] = {}
|
||||
outputs: Dict[str, Any] = {}
|
||||
|
||||
for param_name, param_dict in flat_params.items():
|
||||
if param_dict.get("display", "") == "output":
|
||||
outputs[param_name] = param_dict
|
||||
elif param_dict.get("type", "") in ("diffusers_auto_model", "diffusers_auto_models"):
|
||||
model_inputs[param_name] = param_dict
|
||||
else:
|
||||
inputs[param_name] = param_dict
|
||||
|
||||
return cls(
|
||||
inputs=inputs,
|
||||
model_inputs=model_inputs,
|
||||
outputs=outputs,
|
||||
blocks_names=mellon_dict.get("blocks_names", []),
|
||||
node_type=mellon_dict.get("node_type"),
|
||||
)
|
||||
|
||||
# YiYi Notes: not used yet
|
||||
@classmethod
|
||||
def from_blocks(cls, blocks: ModularPipelineBlocks, node_type: str) -> "MellonNodeConfig":
|
||||
"""
|
||||
Create an instance from a ModularPipeline object. If a preset exists in NODE_TYPE_PARAMS_MAP for the node_type,
|
||||
use it; otherwise fall back to deriving lists from the pipeline's expected inputs/components/outputs.
|
||||
"""
|
||||
if node_type not in NODE_TYPE_PARAMS_MAP:
|
||||
raise ValueError(f"Node type {node_type} not supported")
|
||||
|
||||
blocks_names = list(blocks.sub_blocks.keys())
|
||||
|
||||
default_node_config = NODE_TYPE_PARAMS_MAP[node_type]
|
||||
inputs_list: List[Union[str, MellonParam]] = default_node_config.get("inputs", [])
|
||||
model_inputs_list: List[Union[str, MellonParam]] = default_node_config.get("model_inputs", [])
|
||||
outputs_list: List[Union[str, MellonParam]] = default_node_config.get("outputs", [])
|
||||
|
||||
for required_input_name in blocks.required_inputs:
|
||||
if required_input_name not in inputs_list:
|
||||
inputs_list.append(
|
||||
MellonParam(
|
||||
name=required_input_name, label=required_input_name, type=required_input_name, display="input"
|
||||
)
|
||||
)
|
||||
|
||||
for component_spec in blocks.expected_components:
|
||||
if component_spec.name not in model_inputs_list:
|
||||
model_inputs_list.append(
|
||||
MellonParam(
|
||||
name=component_spec.name,
|
||||
label=component_spec.name,
|
||||
type="diffusers_auto_model",
|
||||
display="input",
|
||||
)
|
||||
)
|
||||
|
||||
return cls(
|
||||
inputs=inputs_list,
|
||||
model_inputs=model_inputs_list,
|
||||
outputs=outputs_list,
|
||||
blocks_names=blocks_names,
|
||||
node_type=node_type,
|
||||
)
|
||||
|
||||
|
||||
# Minimal modular registry for Mellon node configs
|
||||
class ModularMellonNodeRegistry:
|
||||
"""Registry mapping (pipeline class, blocks_name) -> list of MellonNodeConfig."""
|
||||
|
||||
def __init__(self):
|
||||
self._registry = {}
|
||||
self._initialized = False
|
||||
|
||||
def register(self, pipeline_cls: type, node_params: Dict[str, MellonNodeConfig]):
|
||||
if not self._initialized:
|
||||
_initialize_registry(self)
|
||||
self._registry[pipeline_cls] = node_params
|
||||
|
||||
def get(self, pipeline_cls: type) -> MellonNodeConfig:
|
||||
if not self._initialized:
|
||||
_initialize_registry(self)
|
||||
return self._registry.get(pipeline_cls, None)
|
||||
|
||||
def get_all(self) -> Dict[type, Dict[str, MellonNodeConfig]]:
|
||||
if not self._initialized:
|
||||
_initialize_registry(self)
|
||||
return self._registry
|
||||
|
||||
|
||||
def _register_preset_node_types(
|
||||
pipeline_cls, params_map: Dict[str, Dict[str, Any]], registry: ModularMellonNodeRegistry
|
||||
):
|
||||
"""Register all node-type presets for a given pipeline class from a params map."""
|
||||
node_configs = {}
|
||||
for node_type, spec in params_map.items():
|
||||
node_config = MellonNodeConfig(
|
||||
inputs=spec.get("inputs", []),
|
||||
model_inputs=spec.get("model_inputs", []),
|
||||
outputs=spec.get("outputs", []),
|
||||
blocks_names=spec.get("block_names", []),
|
||||
node_type=node_type,
|
||||
)
|
||||
node_configs[node_type] = node_config
|
||||
registry.register(pipeline_cls, node_configs)
|
||||
|
||||
|
||||
def _initialize_registry(registry: ModularMellonNodeRegistry):
|
||||
"""Initialize the registry and register all available pipeline configs."""
|
||||
print("Initializing registry")
|
||||
|
||||
registry._initialized = True
|
||||
|
||||
try:
|
||||
from .qwenimage.modular_pipeline import QwenImageModularPipeline
|
||||
from .qwenimage.node_utils import QwenImage_NODE_TYPES_PARAMS_MAP
|
||||
|
||||
_register_preset_node_types(QwenImageModularPipeline, QwenImage_NODE_TYPES_PARAMS_MAP, registry)
|
||||
except Exception:
|
||||
raise Exception("Failed to register QwenImageModularPipeline")
|
||||
|
||||
try:
|
||||
from .stable_diffusion_xl.modular_pipeline import StableDiffusionXLModularPipeline
|
||||
from .stable_diffusion_xl.node_utils import SDXL_NODE_TYPES_PARAMS_MAP
|
||||
|
||||
_register_preset_node_types(StableDiffusionXLModularPipeline, SDXL_NODE_TYPES_PARAMS_MAP, registry)
|
||||
except Exception:
|
||||
raise Exception("Failed to register StableDiffusionXLModularPipeline")
|
||||
@@ -51,6 +51,7 @@ if is_accelerate_available():
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
# map regular pipeline to modular pipeline class name
|
||||
MODULAR_PIPELINE_MAPPING = OrderedDict(
|
||||
[
|
||||
("stable-diffusion-xl", "StableDiffusionXLModularPipeline"),
|
||||
@@ -61,16 +62,6 @@ MODULAR_PIPELINE_MAPPING = OrderedDict(
|
||||
]
|
||||
)
|
||||
|
||||
MODULAR_PIPELINE_BLOCKS_MAPPING = OrderedDict(
|
||||
[
|
||||
("StableDiffusionXLModularPipeline", "StableDiffusionXLAutoBlocks"),
|
||||
("WanModularPipeline", "WanAutoBlocks"),
|
||||
("FluxModularPipeline", "FluxAutoBlocks"),
|
||||
("QwenImageModularPipeline", "QwenImageAutoBlocks"),
|
||||
("QwenImageEditModularPipeline", "QwenImageEditAutoBlocks"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineState:
|
||||
@@ -423,7 +414,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
|
||||
state.set(input_param.name, param, input_param.kwargs_type)
|
||||
|
||||
elif input_param.kwargs_type:
|
||||
# if it is a kwargs type, e.g. "guider_input_fields", it is likely to be a list of parameters
|
||||
# if it is a kwargs type, e.g. "denoiser_input_fields", it is likely to be a list of parameters
|
||||
# we need to first find out which inputs are and loop through them.
|
||||
intermediate_kwargs = state.get_by_kwargs(input_param.kwargs_type)
|
||||
for param_name, current_value in intermediate_kwargs.items():
|
||||
@@ -1454,6 +1445,7 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
|
||||
|
||||
config_name = "modular_model_index.json"
|
||||
hf_device_map = None
|
||||
default_blocks_name = None
|
||||
|
||||
# YiYi TODO: add warning for passing multiple ComponentSpec/ConfigSpec with the same name
|
||||
def __init__(
|
||||
@@ -1514,7 +1506,7 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
|
||||
`_blocks_class_name` in the config dict
|
||||
"""
|
||||
if blocks is None:
|
||||
blocks_class_name = MODULAR_PIPELINE_BLOCKS_MAPPING.get(self.__class__.__name__)
|
||||
blocks_class_name = self.default_blocks_name
|
||||
if blocks_class_name is not None:
|
||||
diffusers_module = importlib.import_module("diffusers")
|
||||
blocks_class = getattr(diffusers_module, blocks_class_name)
|
||||
|
||||
@@ -1,665 +0,0 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import PIL
|
||||
import torch
|
||||
|
||||
from ..configuration_utils import ConfigMixin
|
||||
from ..image_processor import PipelineImageInput
|
||||
from .modular_pipeline import ModularPipelineBlocks, SequentialPipelineBlocks
|
||||
from .modular_pipeline_utils import InputParam
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# YiYi Notes: this is actually for SDXL, put it here for now
|
||||
SDXL_INPUTS_SCHEMA = {
|
||||
"prompt": InputParam(
|
||||
"prompt", type_hint=Union[str, List[str]], description="The prompt or prompts to guide the image generation"
|
||||
),
|
||||
"prompt_2": InputParam(
|
||||
"prompt_2",
|
||||
type_hint=Union[str, List[str]],
|
||||
description="The prompt or prompts to be sent to the tokenizer_2 and text_encoder_2",
|
||||
),
|
||||
"negative_prompt": InputParam(
|
||||
"negative_prompt",
|
||||
type_hint=Union[str, List[str]],
|
||||
description="The prompt or prompts not to guide the image generation",
|
||||
),
|
||||
"negative_prompt_2": InputParam(
|
||||
"negative_prompt_2",
|
||||
type_hint=Union[str, List[str]],
|
||||
description="The negative prompt or prompts for text_encoder_2",
|
||||
),
|
||||
"cross_attention_kwargs": InputParam(
|
||||
"cross_attention_kwargs",
|
||||
type_hint=Optional[dict],
|
||||
description="Kwargs dictionary passed to the AttentionProcessor",
|
||||
),
|
||||
"clip_skip": InputParam(
|
||||
"clip_skip", type_hint=Optional[int], description="Number of layers to skip in CLIP text encoder"
|
||||
),
|
||||
"image": InputParam(
|
||||
"image",
|
||||
type_hint=PipelineImageInput,
|
||||
required=True,
|
||||
description="The image(s) to modify for img2img or inpainting",
|
||||
),
|
||||
"mask_image": InputParam(
|
||||
"mask_image",
|
||||
type_hint=PipelineImageInput,
|
||||
required=True,
|
||||
description="Mask image for inpainting, white pixels will be repainted",
|
||||
),
|
||||
"generator": InputParam(
|
||||
"generator",
|
||||
type_hint=Optional[Union[torch.Generator, List[torch.Generator]]],
|
||||
description="Generator(s) for deterministic generation",
|
||||
),
|
||||
"height": InputParam("height", type_hint=Optional[int], description="Height in pixels of the generated image"),
|
||||
"width": InputParam("width", type_hint=Optional[int], description="Width in pixels of the generated image"),
|
||||
"num_images_per_prompt": InputParam(
|
||||
"num_images_per_prompt", type_hint=int, default=1, description="Number of images to generate per prompt"
|
||||
),
|
||||
"num_inference_steps": InputParam(
|
||||
"num_inference_steps", type_hint=int, default=50, description="Number of denoising steps"
|
||||
),
|
||||
"timesteps": InputParam(
|
||||
"timesteps", type_hint=Optional[torch.Tensor], description="Custom timesteps for the denoising process"
|
||||
),
|
||||
"sigmas": InputParam(
|
||||
"sigmas", type_hint=Optional[torch.Tensor], description="Custom sigmas for the denoising process"
|
||||
),
|
||||
"denoising_end": InputParam(
|
||||
"denoising_end",
|
||||
type_hint=Optional[float],
|
||||
description="Fraction of denoising process to complete before termination",
|
||||
),
|
||||
# YiYi Notes: img2img defaults to 0.3, inpainting defaults to 0.9999
|
||||
"strength": InputParam(
|
||||
"strength", type_hint=float, default=0.3, description="How much to transform the reference image"
|
||||
),
|
||||
"denoising_start": InputParam(
|
||||
"denoising_start", type_hint=Optional[float], description="Starting point of the denoising process"
|
||||
),
|
||||
"latents": InputParam(
|
||||
"latents", type_hint=Optional[torch.Tensor], description="Pre-generated noisy latents for image generation"
|
||||
),
|
||||
"padding_mask_crop": InputParam(
|
||||
"padding_mask_crop",
|
||||
type_hint=Optional[Tuple[int, int]],
|
||||
description="Size of margin in crop for image and mask",
|
||||
),
|
||||
"original_size": InputParam(
|
||||
"original_size",
|
||||
type_hint=Optional[Tuple[int, int]],
|
||||
description="Original size of the image for SDXL's micro-conditioning",
|
||||
),
|
||||
"target_size": InputParam(
|
||||
"target_size", type_hint=Optional[Tuple[int, int]], description="Target size for SDXL's micro-conditioning"
|
||||
),
|
||||
"negative_original_size": InputParam(
|
||||
"negative_original_size",
|
||||
type_hint=Optional[Tuple[int, int]],
|
||||
description="Negative conditioning based on image resolution",
|
||||
),
|
||||
"negative_target_size": InputParam(
|
||||
"negative_target_size",
|
||||
type_hint=Optional[Tuple[int, int]],
|
||||
description="Negative conditioning based on target resolution",
|
||||
),
|
||||
"crops_coords_top_left": InputParam(
|
||||
"crops_coords_top_left",
|
||||
type_hint=Tuple[int, int],
|
||||
default=(0, 0),
|
||||
description="Top-left coordinates for SDXL's micro-conditioning",
|
||||
),
|
||||
"negative_crops_coords_top_left": InputParam(
|
||||
"negative_crops_coords_top_left",
|
||||
type_hint=Tuple[int, int],
|
||||
default=(0, 0),
|
||||
description="Negative conditioning crop coordinates",
|
||||
),
|
||||
"aesthetic_score": InputParam(
|
||||
"aesthetic_score", type_hint=float, default=6.0, description="Simulates aesthetic score of generated image"
|
||||
),
|
||||
"negative_aesthetic_score": InputParam(
|
||||
"negative_aesthetic_score", type_hint=float, default=2.0, description="Simulates negative aesthetic score"
|
||||
),
|
||||
"eta": InputParam("eta", type_hint=float, default=0.0, description="Parameter η in the DDIM paper"),
|
||||
"output_type": InputParam(
|
||||
"output_type", type_hint=str, default="pil", description="Output format (pil/tensor/np.array)"
|
||||
),
|
||||
"ip_adapter_image": InputParam(
|
||||
"ip_adapter_image",
|
||||
type_hint=PipelineImageInput,
|
||||
required=True,
|
||||
description="Image(s) to be used as IP adapter",
|
||||
),
|
||||
"control_image": InputParam(
|
||||
"control_image", type_hint=PipelineImageInput, required=True, description="ControlNet input condition"
|
||||
),
|
||||
"control_guidance_start": InputParam(
|
||||
"control_guidance_start",
|
||||
type_hint=Union[float, List[float]],
|
||||
default=0.0,
|
||||
description="When ControlNet starts applying",
|
||||
),
|
||||
"control_guidance_end": InputParam(
|
||||
"control_guidance_end",
|
||||
type_hint=Union[float, List[float]],
|
||||
default=1.0,
|
||||
description="When ControlNet stops applying",
|
||||
),
|
||||
"controlnet_conditioning_scale": InputParam(
|
||||
"controlnet_conditioning_scale",
|
||||
type_hint=Union[float, List[float]],
|
||||
default=1.0,
|
||||
description="Scale factor for ControlNet outputs",
|
||||
),
|
||||
"guess_mode": InputParam(
|
||||
"guess_mode",
|
||||
type_hint=bool,
|
||||
default=False,
|
||||
description="Enables ControlNet encoder to recognize input without prompts",
|
||||
),
|
||||
"control_mode": InputParam(
|
||||
"control_mode", type_hint=List[int], required=True, description="Control mode for union controlnet"
|
||||
),
|
||||
}
|
||||
|
||||
SDXL_INTERMEDIATE_INPUTS_SCHEMA = {
|
||||
"prompt_embeds": InputParam(
|
||||
"prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
required=True,
|
||||
description="Text embeddings used to guide image generation",
|
||||
),
|
||||
"negative_prompt_embeds": InputParam(
|
||||
"negative_prompt_embeds", type_hint=torch.Tensor, description="Negative text embeddings"
|
||||
),
|
||||
"pooled_prompt_embeds": InputParam(
|
||||
"pooled_prompt_embeds", type_hint=torch.Tensor, required=True, description="Pooled text embeddings"
|
||||
),
|
||||
"negative_pooled_prompt_embeds": InputParam(
|
||||
"negative_pooled_prompt_embeds", type_hint=torch.Tensor, description="Negative pooled text embeddings"
|
||||
),
|
||||
"batch_size": InputParam("batch_size", type_hint=int, required=True, description="Number of prompts"),
|
||||
"dtype": InputParam("dtype", type_hint=torch.dtype, description="Data type of model tensor inputs"),
|
||||
"preprocess_kwargs": InputParam(
|
||||
"preprocess_kwargs", type_hint=Optional[dict], description="Kwargs for ImageProcessor"
|
||||
),
|
||||
"latents": InputParam(
|
||||
"latents", type_hint=torch.Tensor, required=True, description="Initial latents for denoising process"
|
||||
),
|
||||
"timesteps": InputParam("timesteps", type_hint=torch.Tensor, required=True, description="Timesteps for inference"),
|
||||
"num_inference_steps": InputParam(
|
||||
"num_inference_steps", type_hint=int, required=True, description="Number of denoising steps"
|
||||
),
|
||||
"latent_timestep": InputParam(
|
||||
"latent_timestep", type_hint=torch.Tensor, required=True, description="Initial noise level timestep"
|
||||
),
|
||||
"image_latents": InputParam(
|
||||
"image_latents", type_hint=torch.Tensor, required=True, description="Latents representing reference image"
|
||||
),
|
||||
"mask": InputParam("mask", type_hint=torch.Tensor, required=True, description="Mask for inpainting"),
|
||||
"masked_image_latents": InputParam(
|
||||
"masked_image_latents", type_hint=torch.Tensor, description="Masked image latents for inpainting"
|
||||
),
|
||||
"add_time_ids": InputParam(
|
||||
"add_time_ids", type_hint=torch.Tensor, required=True, description="Time ids for conditioning"
|
||||
),
|
||||
"negative_add_time_ids": InputParam(
|
||||
"negative_add_time_ids", type_hint=torch.Tensor, description="Negative time ids"
|
||||
),
|
||||
"timestep_cond": InputParam("timestep_cond", type_hint=torch.Tensor, description="Timestep conditioning for LCM"),
|
||||
"noise": InputParam("noise", type_hint=torch.Tensor, description="Noise added to image latents"),
|
||||
"crops_coords": InputParam("crops_coords", type_hint=Optional[Tuple[int]], description="Crop coordinates"),
|
||||
"ip_adapter_embeds": InputParam(
|
||||
"ip_adapter_embeds", type_hint=List[torch.Tensor], description="Image embeddings for IP-Adapter"
|
||||
),
|
||||
"negative_ip_adapter_embeds": InputParam(
|
||||
"negative_ip_adapter_embeds",
|
||||
type_hint=List[torch.Tensor],
|
||||
description="Negative image embeddings for IP-Adapter",
|
||||
),
|
||||
"images": InputParam(
|
||||
"images",
|
||||
type_hint=Union[List[PIL.Image.Image], List[torch.Tensor], List[np.array]],
|
||||
required=True,
|
||||
description="Generated images",
|
||||
),
|
||||
}
|
||||
|
||||
SDXL_PARAM_SCHEMA = {**SDXL_INPUTS_SCHEMA, **SDXL_INTERMEDIATE_INPUTS_SCHEMA}
|
||||
|
||||
|
||||
DEFAULT_PARAM_MAPS = {
|
||||
"prompt": {
|
||||
"label": "Prompt",
|
||||
"type": "string",
|
||||
"default": "a bear sitting in a chair drinking a milkshake",
|
||||
"display": "textarea",
|
||||
},
|
||||
"negative_prompt": {
|
||||
"label": "Negative Prompt",
|
||||
"type": "string",
|
||||
"default": "deformed, ugly, wrong proportion, low res, bad anatomy, worst quality, low quality",
|
||||
"display": "textarea",
|
||||
},
|
||||
"num_inference_steps": {
|
||||
"label": "Steps",
|
||||
"type": "int",
|
||||
"default": 25,
|
||||
"min": 1,
|
||||
"max": 1000,
|
||||
},
|
||||
"seed": {
|
||||
"label": "Seed",
|
||||
"type": "int",
|
||||
"default": 0,
|
||||
"min": 0,
|
||||
"display": "random",
|
||||
},
|
||||
"width": {
|
||||
"label": "Width",
|
||||
"type": "int",
|
||||
"display": "text",
|
||||
"default": 1024,
|
||||
"min": 8,
|
||||
"max": 8192,
|
||||
"step": 8,
|
||||
"group": "dimensions",
|
||||
},
|
||||
"height": {
|
||||
"label": "Height",
|
||||
"type": "int",
|
||||
"display": "text",
|
||||
"default": 1024,
|
||||
"min": 8,
|
||||
"max": 8192,
|
||||
"step": 8,
|
||||
"group": "dimensions",
|
||||
},
|
||||
"images": {
|
||||
"label": "Images",
|
||||
"type": "image",
|
||||
"display": "output",
|
||||
},
|
||||
"image": {
|
||||
"label": "Image",
|
||||
"type": "image",
|
||||
"display": "input",
|
||||
},
|
||||
}
|
||||
|
||||
DEFAULT_TYPE_MAPS = {
|
||||
"int": {
|
||||
"type": "int",
|
||||
"default": 0,
|
||||
"min": 0,
|
||||
},
|
||||
"float": {
|
||||
"type": "float",
|
||||
"default": 0.0,
|
||||
"min": 0.0,
|
||||
},
|
||||
"str": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
},
|
||||
"bool": {
|
||||
"type": "boolean",
|
||||
"default": False,
|
||||
},
|
||||
"image": {
|
||||
"type": "image",
|
||||
},
|
||||
}
|
||||
|
||||
DEFAULT_MODEL_KEYS = ["unet", "vae", "text_encoder", "tokenizer", "controlnet", "transformer", "image_encoder"]
|
||||
DEFAULT_CATEGORY = "Modular Diffusers"
|
||||
DEFAULT_EXCLUDE_MODEL_KEYS = ["processor", "feature_extractor", "safety_checker"]
|
||||
DEFAULT_PARAMS_GROUPS_KEYS = {
|
||||
"text_encoders": ["text_encoder", "tokenizer"],
|
||||
"ip_adapter_embeds": ["ip_adapter_embeds"],
|
||||
"prompt_embeddings": ["prompt_embeds"],
|
||||
}
|
||||
|
||||
|
||||
def get_group_name(name, group_params_keys=DEFAULT_PARAMS_GROUPS_KEYS):
|
||||
"""
|
||||
Get the group name for a given parameter name, if not part of a group, return None e.g. "prompt_embeds" ->
|
||||
"text_embeds", "text_encoder" -> "text_encoders", "prompt" -> None
|
||||
"""
|
||||
if name is None:
|
||||
return None
|
||||
for group_name, group_keys in group_params_keys.items():
|
||||
for group_key in group_keys:
|
||||
if group_key in name:
|
||||
return group_name
|
||||
return None
|
||||
|
||||
|
||||
class ModularNode(ConfigMixin):
|
||||
"""
|
||||
A ModularNode is a base class to build UI nodes using diffusers. Currently only supports Mellon. It is a wrapper
|
||||
around a ModularPipelineBlocks object.
|
||||
|
||||
<Tip warning={true}>
|
||||
|
||||
This is an experimental feature and is likely to change in the future.
|
||||
|
||||
</Tip>
|
||||
"""
|
||||
|
||||
config_name = "node_config.json"
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(
|
||||
cls,
|
||||
pretrained_model_name_or_path: str,
|
||||
trust_remote_code: Optional[bool] = None,
|
||||
**kwargs,
|
||||
):
|
||||
blocks = ModularPipelineBlocks.from_pretrained(
|
||||
pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
|
||||
)
|
||||
return cls(blocks, **kwargs)
|
||||
|
||||
def __init__(self, blocks, category=DEFAULT_CATEGORY, label=None, **kwargs):
|
||||
self.blocks = blocks
|
||||
|
||||
if label is None:
|
||||
label = self.blocks.__class__.__name__
|
||||
# blocks param name -> mellon param name
|
||||
self.name_mapping = {}
|
||||
|
||||
input_params = {}
|
||||
# pass or create a default param dict for each input
|
||||
# e.g. for prompt,
|
||||
# prompt = {
|
||||
# "name": "text_input", # the name of the input in node definition, could be different from the input name in diffusers
|
||||
# "label": "Prompt",
|
||||
# "type": "string",
|
||||
# "default": "a bear sitting in a chair drinking a milkshake",
|
||||
# "display": "textarea"}
|
||||
# if type is not specified, it'll be a "custom" param of its own type
|
||||
# e.g. you can pass ModularNode(scheduler = {name :"scheduler"})
|
||||
# it will get this spec in node definition {"scheduler": {"label": "Scheduler", "type": "scheduler", "display": "input"}}
|
||||
# name can be a dict, in that case, it is part of a "dict" input in mellon nodes, e.g. text_encoder= {name: {"text_encoders": "text_encoder"}}
|
||||
inputs = self.blocks.inputs + self.blocks.intermediate_inputs
|
||||
for inp in inputs:
|
||||
param = kwargs.pop(inp.name, None)
|
||||
if param:
|
||||
# user can pass a param dict for all inputs, e.g. ModularNode(prompt = {...})
|
||||
input_params[inp.name] = param
|
||||
mellon_name = param.pop("name", inp.name)
|
||||
if mellon_name != inp.name:
|
||||
self.name_mapping[inp.name] = mellon_name
|
||||
continue
|
||||
|
||||
if inp.name not in DEFAULT_PARAM_MAPS and not inp.required and not get_group_name(inp.name):
|
||||
continue
|
||||
|
||||
if inp.name in DEFAULT_PARAM_MAPS:
|
||||
# first check if it's in the default param map, if so, directly use that
|
||||
param = DEFAULT_PARAM_MAPS[inp.name].copy()
|
||||
elif get_group_name(inp.name):
|
||||
param = get_group_name(inp.name)
|
||||
if inp.name not in self.name_mapping:
|
||||
self.name_mapping[inp.name] = param
|
||||
else:
|
||||
# if not, check if it's in the SDXL input schema, if so,
|
||||
# 1. use the type hint to determine the type
|
||||
# 2. use the default param dict for the type e.g. if "steps" is a "int" type, {"steps": {"type": "int", "default": 0, "min": 0}}
|
||||
if inp.type_hint is not None:
|
||||
type_str = str(inp.type_hint).lower()
|
||||
else:
|
||||
inp_spec = SDXL_PARAM_SCHEMA.get(inp.name, None)
|
||||
type_str = str(inp_spec.type_hint).lower() if inp_spec else ""
|
||||
for type_key, type_param in DEFAULT_TYPE_MAPS.items():
|
||||
if type_key in type_str:
|
||||
param = type_param.copy()
|
||||
param["label"] = inp.name
|
||||
param["display"] = "input"
|
||||
break
|
||||
else:
|
||||
param = inp.name
|
||||
# add the param dict to the inp_params dict
|
||||
input_params[inp.name] = param
|
||||
|
||||
component_params = {}
|
||||
for comp in self.blocks.expected_components:
|
||||
param = kwargs.pop(comp.name, None)
|
||||
if param:
|
||||
component_params[comp.name] = param
|
||||
mellon_name = param.pop("name", comp.name)
|
||||
if mellon_name != comp.name:
|
||||
self.name_mapping[comp.name] = mellon_name
|
||||
continue
|
||||
|
||||
to_exclude = False
|
||||
for exclude_key in DEFAULT_EXCLUDE_MODEL_KEYS:
|
||||
if exclude_key in comp.name:
|
||||
to_exclude = True
|
||||
break
|
||||
if to_exclude:
|
||||
continue
|
||||
|
||||
if get_group_name(comp.name):
|
||||
param = get_group_name(comp.name)
|
||||
if comp.name not in self.name_mapping:
|
||||
self.name_mapping[comp.name] = param
|
||||
elif comp.name in DEFAULT_MODEL_KEYS:
|
||||
param = {"label": comp.name, "type": "diffusers_auto_model", "display": "input"}
|
||||
else:
|
||||
param = comp.name
|
||||
# add the param dict to the model_params dict
|
||||
component_params[comp.name] = param
|
||||
|
||||
output_params = {}
|
||||
if isinstance(self.blocks, SequentialPipelineBlocks):
|
||||
last_block_name = list(self.blocks.sub_blocks.keys())[-1]
|
||||
outputs = self.blocks.sub_blocks[last_block_name].intermediate_outputs
|
||||
else:
|
||||
outputs = self.blocks.intermediate_outputs
|
||||
|
||||
for out in outputs:
|
||||
param = kwargs.pop(out.name, None)
|
||||
if param:
|
||||
output_params[out.name] = param
|
||||
mellon_name = param.pop("name", out.name)
|
||||
if mellon_name != out.name:
|
||||
self.name_mapping[out.name] = mellon_name
|
||||
continue
|
||||
|
||||
if out.name in DEFAULT_PARAM_MAPS:
|
||||
param = DEFAULT_PARAM_MAPS[out.name].copy()
|
||||
param["display"] = "output"
|
||||
else:
|
||||
group_name = get_group_name(out.name)
|
||||
if group_name:
|
||||
param = group_name
|
||||
if out.name not in self.name_mapping:
|
||||
self.name_mapping[out.name] = param
|
||||
else:
|
||||
param = out.name
|
||||
# add the param dict to the outputs dict
|
||||
output_params[out.name] = param
|
||||
|
||||
if len(kwargs) > 0:
|
||||
logger.warning(f"Unused kwargs: {kwargs}")
|
||||
|
||||
register_dict = {
|
||||
"category": category,
|
||||
"label": label,
|
||||
"input_params": input_params,
|
||||
"component_params": component_params,
|
||||
"output_params": output_params,
|
||||
"name_mapping": self.name_mapping,
|
||||
}
|
||||
self.register_to_config(**register_dict)
|
||||
|
||||
def setup(self, components_manager, collection=None):
|
||||
self.pipeline = self.blocks.init_pipeline(components_manager=components_manager, collection=collection)
|
||||
self._components_manager = components_manager
|
||||
|
||||
@property
|
||||
def mellon_config(self):
|
||||
return self._convert_to_mellon_config()
|
||||
|
||||
def _convert_to_mellon_config(self):
|
||||
node = {}
|
||||
node["label"] = self.config.label
|
||||
node["category"] = self.config.category
|
||||
|
||||
node_param = {}
|
||||
for inp_name, inp_param in self.config.input_params.items():
|
||||
if inp_name in self.name_mapping:
|
||||
mellon_name = self.name_mapping[inp_name]
|
||||
else:
|
||||
mellon_name = inp_name
|
||||
if isinstance(inp_param, str):
|
||||
param = {
|
||||
"label": inp_param,
|
||||
"type": inp_param,
|
||||
"display": "input",
|
||||
}
|
||||
else:
|
||||
param = inp_param
|
||||
|
||||
if mellon_name not in node_param:
|
||||
node_param[mellon_name] = param
|
||||
else:
|
||||
logger.debug(f"Input param {mellon_name} already exists in node_param, skipping {inp_name}")
|
||||
|
||||
for comp_name, comp_param in self.config.component_params.items():
|
||||
if comp_name in self.name_mapping:
|
||||
mellon_name = self.name_mapping[comp_name]
|
||||
else:
|
||||
mellon_name = comp_name
|
||||
if isinstance(comp_param, str):
|
||||
param = {
|
||||
"label": comp_param,
|
||||
"type": comp_param,
|
||||
"display": "input",
|
||||
}
|
||||
else:
|
||||
param = comp_param
|
||||
|
||||
if mellon_name not in node_param:
|
||||
node_param[mellon_name] = param
|
||||
else:
|
||||
logger.debug(f"Component param {comp_param} already exists in node_param, skipping {comp_name}")
|
||||
|
||||
for out_name, out_param in self.config.output_params.items():
|
||||
if out_name in self.name_mapping:
|
||||
mellon_name = self.name_mapping[out_name]
|
||||
else:
|
||||
mellon_name = out_name
|
||||
if isinstance(out_param, str):
|
||||
param = {
|
||||
"label": out_param,
|
||||
"type": out_param,
|
||||
"display": "output",
|
||||
}
|
||||
else:
|
||||
param = out_param
|
||||
|
||||
if mellon_name not in node_param:
|
||||
node_param[mellon_name] = param
|
||||
else:
|
||||
logger.debug(f"Output param {out_param} already exists in node_param, skipping {out_name}")
|
||||
node["params"] = node_param
|
||||
return node
|
||||
|
||||
def save_mellon_config(self, file_path):
|
||||
"""
|
||||
Save the Mellon configuration to a JSON file.
|
||||
|
||||
Args:
|
||||
file_path (str or Path): Path where the JSON file will be saved
|
||||
|
||||
Returns:
|
||||
Path: Path to the saved config file
|
||||
"""
|
||||
file_path = Path(file_path)
|
||||
|
||||
# Create directory if it doesn't exist
|
||||
os.makedirs(file_path.parent, exist_ok=True)
|
||||
|
||||
# Create a combined dictionary with module definition and name mapping
|
||||
config = {"module": self.mellon_config, "name_mapping": self.name_mapping}
|
||||
|
||||
# Save the config to file
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
json.dump(config, f, indent=2)
|
||||
|
||||
logger.info(f"Mellon config and name mapping saved to {file_path}")
|
||||
|
||||
return file_path
|
||||
|
||||
@classmethod
|
||||
def load_mellon_config(cls, file_path):
|
||||
"""
|
||||
Load a Mellon configuration from a JSON file.
|
||||
|
||||
Args:
|
||||
file_path (str or Path): Path to the JSON file containing Mellon config
|
||||
|
||||
Returns:
|
||||
dict: The loaded combined configuration containing 'module' and 'name_mapping'
|
||||
"""
|
||||
file_path = Path(file_path)
|
||||
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"Config file not found: {file_path}")
|
||||
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
config = json.load(f)
|
||||
|
||||
logger.info(f"Mellon config loaded from {file_path}")
|
||||
|
||||
return config
|
||||
|
||||
def process_inputs(self, **kwargs):
|
||||
params_components = {}
|
||||
for comp_name, comp_param in self.config.component_params.items():
|
||||
logger.debug(f"component: {comp_name}")
|
||||
mellon_comp_name = self.name_mapping.get(comp_name, comp_name)
|
||||
if mellon_comp_name in kwargs:
|
||||
if isinstance(kwargs[mellon_comp_name], dict) and comp_name in kwargs[mellon_comp_name]:
|
||||
comp = kwargs[mellon_comp_name].pop(comp_name)
|
||||
else:
|
||||
comp = kwargs.pop(mellon_comp_name)
|
||||
if comp:
|
||||
params_components[comp_name] = self._components_manager.get_one(comp["model_id"])
|
||||
|
||||
params_run = {}
|
||||
for inp_name, inp_param in self.config.input_params.items():
|
||||
logger.debug(f"input: {inp_name}")
|
||||
mellon_inp_name = self.name_mapping.get(inp_name, inp_name)
|
||||
if mellon_inp_name in kwargs:
|
||||
if isinstance(kwargs[mellon_inp_name], dict) and inp_name in kwargs[mellon_inp_name]:
|
||||
inp = kwargs[mellon_inp_name].pop(inp_name)
|
||||
else:
|
||||
inp = kwargs.pop(mellon_inp_name)
|
||||
if inp is not None:
|
||||
params_run[inp_name] = inp
|
||||
|
||||
return_output_names = list(self.config.output_params.keys())
|
||||
|
||||
return params_components, params_run, return_output_names
|
||||
|
||||
def execute(self, **kwargs):
|
||||
params_components, params_run, return_output_names = self.process_inputs(**kwargs)
|
||||
|
||||
self.pipeline.update_components(**params_components)
|
||||
output = self.pipeline(**params_run, output=return_output_names)
|
||||
return output
|
||||
@@ -577,9 +577,8 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
|
||||
def inputs(self) -> List[InputParam]:
|
||||
return [
|
||||
InputParam(name="batch_size", required=True),
|
||||
InputParam(
|
||||
name="resized_image", required=True, type_hint=torch.Tensor, description="The resized image input"
|
||||
),
|
||||
InputParam(name="image_height", required=True),
|
||||
InputParam(name="image_width", required=True),
|
||||
InputParam(name="height", required=True),
|
||||
InputParam(name="width", required=True),
|
||||
InputParam(name="prompt_embeds_mask"),
|
||||
@@ -612,10 +611,6 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
|
||||
block_state = self.get_block_state(state)
|
||||
|
||||
# for edit, image size can be different from the target size (height/width)
|
||||
image = (
|
||||
block_state.resized_image[0] if isinstance(block_state.resized_image, list) else block_state.resized_image
|
||||
)
|
||||
image_width, image_height = image.size
|
||||
|
||||
block_state.img_shapes = [
|
||||
[
|
||||
@@ -624,7 +619,11 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
|
||||
block_state.height // components.vae_scale_factor // 2,
|
||||
block_state.width // components.vae_scale_factor // 2,
|
||||
),
|
||||
(1, image_height // components.vae_scale_factor // 2, image_width // components.vae_scale_factor // 2),
|
||||
(
|
||||
1,
|
||||
block_state.image_height // components.vae_scale_factor // 2,
|
||||
block_state.image_width // components.vae_scale_factor // 2,
|
||||
),
|
||||
]
|
||||
] * block_state.batch_size
|
||||
|
||||
|
||||
@@ -496,7 +496,7 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
|
||||
)
|
||||
|
||||
if components.requires_unconditional_embeds:
|
||||
negative_prompt = block_state.negative_prompt or ""
|
||||
negative_prompt = block_state.negative_prompt or " "
|
||||
block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit(
|
||||
components.text_encoder,
|
||||
components.processor,
|
||||
|
||||
@@ -307,6 +307,13 @@ class QwenImageInputsDynamicStep(ModularPipelineBlocks):
|
||||
|
||||
return inputs
|
||||
|
||||
@property
|
||||
def intermediate_outputs(self) -> List[OutputParam]:
|
||||
return [
|
||||
OutputParam(name="image_height", type_hint=int, description="The height of the image latents"),
|
||||
OutputParam(name="image_width", type_hint=int, description="The width of the image latents"),
|
||||
]
|
||||
|
||||
@property
|
||||
def expected_components(self) -> List[ComponentSpec]:
|
||||
return [
|
||||
@@ -327,6 +334,11 @@ class QwenImageInputsDynamicStep(ModularPipelineBlocks):
|
||||
block_state.height = block_state.height or height
|
||||
block_state.width = block_state.width or width
|
||||
|
||||
if not hasattr(block_state, "image_height"):
|
||||
block_state.image_height = height
|
||||
if not hasattr(block_state, "image_width"):
|
||||
block_state.image_width = width
|
||||
|
||||
# 2. Patchify the image latent tensor
|
||||
image_latent_tensor = components.pachifier.pack_latents(image_latent_tensor)
|
||||
|
||||
|
||||
@@ -511,17 +511,42 @@ class QwenImageAutoDecodeStep(AutoPipelineBlocks):
|
||||
)
|
||||
|
||||
|
||||
class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
model_name = "qwenimage"
|
||||
block_classes = [
|
||||
QwenImageAutoInputStep,
|
||||
QwenImageOptionalControlNetInputStep,
|
||||
QwenImageAutoBeforeDenoiseStep,
|
||||
QwenImageOptionalControlNetBeforeDenoiseStep,
|
||||
QwenImageAutoDenoiseStep,
|
||||
]
|
||||
block_names = ["input", "controlnet_input", "before_denoise", "controlnet_before_denoise", "denoise", "decode"]
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return (
|
||||
"Core step that performs the denoising process. \n"
|
||||
+ " - `QwenImageAutoInputStep` (input) standardizes the inputs for the denoising step.\n"
|
||||
+ " - `QwenImageOptionalControlNetInputStep` (controlnet_input) prepares the controlnet input.\n"
|
||||
+ " - `QwenImageAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
|
||||
+ " - `QwenImageOptionalControlNetBeforeDenoiseStep` (controlnet_before_denoise) prepares the controlnet input for the denoising step.\n"
|
||||
+ " - `QwenImageAutoDenoiseStep` (denoise) iteratively denoises the latents.\n"
|
||||
+ " - `QwenImageAutoDecodeStep` (decode) decodes the latents into images.\n\n"
|
||||
+ "This step support text-to-image, image-to-image, inpainting, and controlnet tasks for QwenImage:\n"
|
||||
+ " - for image-to-image generation, you need to provide `image_latents`\n"
|
||||
+ " - for inpainting, you need to provide `processed_mask_image` and `image_latents`\n"
|
||||
+ " - to run the controlnet workflow, you need to provide `control_image_latents`\n"
|
||||
+ " - for text-to-image generation, all you need to provide is prompt embeddings"
|
||||
)
|
||||
|
||||
|
||||
## 1.10 QwenImage/auto block & presets
|
||||
AUTO_BLOCKS = InsertableDict(
|
||||
[
|
||||
("text_encoder", QwenImageTextEncoderStep()),
|
||||
("vae_encoder", QwenImageAutoVaeEncoderStep()),
|
||||
("controlnet_vae_encoder", QwenImageOptionalControlNetVaeEncoderStep()),
|
||||
("input", QwenImageAutoInputStep()),
|
||||
("controlnet_input", QwenImageOptionalControlNetInputStep()),
|
||||
("before_denoise", QwenImageAutoBeforeDenoiseStep()),
|
||||
("controlnet_before_denoise", QwenImageOptionalControlNetBeforeDenoiseStep()),
|
||||
("denoise", QwenImageAutoDenoiseStep()),
|
||||
("denoise", QwenImageCoreDenoiseStep()),
|
||||
("decode", QwenImageAutoDecodeStep()),
|
||||
]
|
||||
)
|
||||
@@ -699,7 +724,7 @@ class QwenImageEditAutoVaeEncoderStep(AutoPipelineBlocks):
|
||||
class QwenImageEditAutoInputStep(AutoPipelineBlocks):
|
||||
block_classes = [QwenImageInpaintInputStep, QwenImageEditInputStep]
|
||||
block_names = ["edit_inpaint", "edit"]
|
||||
block_trigger_inputs = ["processed_mask_image", "image"]
|
||||
block_trigger_inputs = ["processed_mask_image", "image_latents"]
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
@@ -800,13 +825,34 @@ class QwenImageEditAutoDenoiseStep(AutoPipelineBlocks):
|
||||
|
||||
## 2.7 QwenImage-Edit/auto blocks & presets
|
||||
|
||||
|
||||
class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
model_name = "qwenimage-edit"
|
||||
block_classes = [
|
||||
QwenImageEditAutoInputStep,
|
||||
QwenImageEditAutoBeforeDenoiseStep,
|
||||
QwenImageEditAutoDenoiseStep,
|
||||
]
|
||||
block_names = ["input", "before_denoise", "denoise"]
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return (
|
||||
"Core step that performs the denoising process. \n"
|
||||
+ " - `QwenImageEditAutoInputStep` (input) standardizes the inputs for the denoising step.\n"
|
||||
+ " - `QwenImageEditAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
|
||||
+ " - `QwenImageEditAutoDenoiseStep` (denoise) iteratively denoises the latents.\n\n"
|
||||
+ "This step support edit (img2img) and edit inpainting workflow for QwenImage Edit:\n"
|
||||
+ " - When `processed_mask_image` is provided, it will be used for edit inpainting task.\n"
|
||||
+ " - When `image_latents` is provided, it will be used for edit (img2img) task.\n"
|
||||
)
|
||||
|
||||
|
||||
EDIT_AUTO_BLOCKS = InsertableDict(
|
||||
[
|
||||
("text_encoder", QwenImageEditVLEncoderStep()),
|
||||
("vae_encoder", QwenImageEditAutoVaeEncoderStep()),
|
||||
("input", QwenImageEditAutoInputStep()),
|
||||
("before_denoise", QwenImageEditAutoBeforeDenoiseStep()),
|
||||
("denoise", QwenImageEditAutoDenoiseStep()),
|
||||
("denoise", QwenImageEditCoreDenoiseStep()),
|
||||
("decode", QwenImageAutoDecodeStep()),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -104,6 +104,8 @@ class QwenImageModularPipeline(ModularPipeline, QwenImageLoraLoaderMixin):
|
||||
</Tip>
|
||||
"""
|
||||
|
||||
default_blocks_name = "QwenImageAutoBlocks"
|
||||
|
||||
@property
|
||||
def default_height(self):
|
||||
return self.default_sample_size * self.vae_scale_factor
|
||||
@@ -158,6 +160,8 @@ class QwenImageEditModularPipeline(ModularPipeline, QwenImageLoraLoaderMixin):
|
||||
</Tip>
|
||||
"""
|
||||
|
||||
default_blocks_name = "QwenImageEditAutoBlocks"
|
||||
|
||||
# YiYi TODO: qwen edit should not provide default height/width, should be derived from the resized input image (after adjustment) produced by the resize step.
|
||||
@property
|
||||
def default_height(self):
|
||||
|
||||
95
src/diffusers/modular_pipelines/qwenimage/node_utils.py
Normal file
95
src/diffusers/modular_pipelines/qwenimage/node_utils.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# Copyright 2025 Qwen-Image Team and The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# mellon nodes
|
||||
QwenImage_NODE_TYPES_PARAMS_MAP = {
|
||||
"controlnet": {
|
||||
"inputs": [
|
||||
"control_image",
|
||||
"controlnet_conditioning_scale",
|
||||
"control_guidance_start",
|
||||
"control_guidance_end",
|
||||
"height",
|
||||
"width",
|
||||
],
|
||||
"model_inputs": [
|
||||
"controlnet",
|
||||
"vae",
|
||||
],
|
||||
"outputs": [
|
||||
"controlnet_out",
|
||||
],
|
||||
"block_names": ["controlnet_vae_encoder"],
|
||||
},
|
||||
"denoise": {
|
||||
"inputs": [
|
||||
"embeddings",
|
||||
"width",
|
||||
"height",
|
||||
"seed",
|
||||
"num_inference_steps",
|
||||
"guidance_scale",
|
||||
"image_latents",
|
||||
"strength",
|
||||
"controlnet",
|
||||
],
|
||||
"model_inputs": [
|
||||
"unet",
|
||||
"guider",
|
||||
"scheduler",
|
||||
],
|
||||
"outputs": [
|
||||
"latents",
|
||||
"latents_preview",
|
||||
],
|
||||
"block_names": ["denoise"],
|
||||
},
|
||||
"vae_encoder": {
|
||||
"inputs": [
|
||||
"image",
|
||||
"width",
|
||||
"height",
|
||||
],
|
||||
"model_inputs": [
|
||||
"vae",
|
||||
],
|
||||
"outputs": [
|
||||
"image_latents",
|
||||
],
|
||||
},
|
||||
"text_encoder": {
|
||||
"inputs": [
|
||||
"prompt",
|
||||
"negative_prompt",
|
||||
],
|
||||
"model_inputs": [
|
||||
"text_encoders",
|
||||
],
|
||||
"outputs": [
|
||||
"embeddings",
|
||||
],
|
||||
},
|
||||
"decoder": {
|
||||
"inputs": [
|
||||
"latents",
|
||||
],
|
||||
"model_inputs": [
|
||||
"vae",
|
||||
],
|
||||
"outputs": [
|
||||
"images",
|
||||
],
|
||||
},
|
||||
}
|
||||
@@ -262,37 +262,37 @@ class StableDiffusionXLInputStep(ModularPipelineBlocks):
|
||||
OutputParam(
|
||||
"prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
|
||||
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
|
||||
description="text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
|
||||
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
|
||||
description="negative text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"pooled_prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
|
||||
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
|
||||
description="pooled text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_pooled_prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
|
||||
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
|
||||
description="negative pooled text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"ip_adapter_embeds",
|
||||
type_hint=List[torch.Tensor],
|
||||
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
|
||||
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
|
||||
description="image embeddings for IP-Adapter",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_ip_adapter_embeds",
|
||||
type_hint=List[torch.Tensor],
|
||||
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
|
||||
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
|
||||
description="negative image embeddings for IP-Adapter",
|
||||
),
|
||||
]
|
||||
@@ -1120,13 +1120,13 @@ class StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep(ModularPipelineB
|
||||
OutputParam(
|
||||
"add_time_ids",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="The time ids to condition the denoising process",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_add_time_ids",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="The negative time ids to condition the denoising process",
|
||||
),
|
||||
OutputParam("timestep_cond", type_hint=torch.Tensor, description="The timestep cond to use for LCM"),
|
||||
@@ -1331,13 +1331,13 @@ class StableDiffusionXLPrepareAdditionalConditioningStep(ModularPipelineBlocks):
|
||||
OutputParam(
|
||||
"add_time_ids",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="The time ids to condition the denoising process",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_add_time_ids",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="The negative time ids to condition the denoising process",
|
||||
),
|
||||
OutputParam("timestep_cond", type_hint=torch.Tensor, description="The timestep cond to use for LCM"),
|
||||
|
||||
@@ -183,14 +183,14 @@ class StableDiffusionXLLoopDenoiser(ModularPipelineBlocks):
|
||||
description="The guidance scale embedding to use for Latent Consistency Models(LCMs). Can be generated in prepare_additional_conditioning step.",
|
||||
),
|
||||
InputParam(
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description=(
|
||||
"All conditional model inputs that need to be prepared with guider. "
|
||||
"It should contain prompt_embeds/negative_prompt_embeds, "
|
||||
"add_time_ids/negative_add_time_ids, "
|
||||
"pooled_prompt_embeds/negative_pooled_prompt_embeds, "
|
||||
"and ip_adapter_embeds/negative_ip_adapter_embeds (optional)."
|
||||
"please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
|
||||
"please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -307,14 +307,14 @@ class StableDiffusionXLControlNetLoopDenoiser(ModularPipelineBlocks):
|
||||
description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
|
||||
),
|
||||
InputParam(
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description=(
|
||||
"All conditional model inputs that need to be prepared with guider. "
|
||||
"It should contain prompt_embeds/negative_prompt_embeds, "
|
||||
"add_time_ids/negative_add_time_ids, "
|
||||
"pooled_prompt_embeds/negative_pooled_prompt_embeds, "
|
||||
"and ip_adapter_embeds/negative_ip_adapter_embeds (optional)."
|
||||
"please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
|
||||
"please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
|
||||
),
|
||||
),
|
||||
InputParam(
|
||||
|
||||
@@ -258,25 +258,25 @@ class StableDiffusionXLTextEncoderStep(ModularPipelineBlocks):
|
||||
OutputParam(
|
||||
"prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="negative text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"pooled_prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="pooled text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_pooled_prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="negative pooled text embeddings used to guide the image generation",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -82,19 +82,17 @@ class StableDiffusionXLAutoIPAdapterStep(AutoPipelineBlocks):
|
||||
# before_denoise: text2img
|
||||
class StableDiffusionXLBeforeDenoiseStep(SequentialPipelineBlocks):
|
||||
block_classes = [
|
||||
StableDiffusionXLInputStep,
|
||||
StableDiffusionXLSetTimestepsStep,
|
||||
StableDiffusionXLPrepareLatentsStep,
|
||||
StableDiffusionXLPrepareAdditionalConditioningStep,
|
||||
]
|
||||
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
|
||||
block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return (
|
||||
"Before denoise step that prepare the inputs for the denoise step.\n"
|
||||
+ "This is a sequential pipeline blocks:\n"
|
||||
+ " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
|
||||
+ " - `StableDiffusionXLSetTimestepsStep` is used to set the timesteps\n"
|
||||
+ " - `StableDiffusionXLPrepareLatentsStep` is used to prepare the latents\n"
|
||||
+ " - `StableDiffusionXLPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
|
||||
@@ -104,19 +102,17 @@ class StableDiffusionXLBeforeDenoiseStep(SequentialPipelineBlocks):
|
||||
# before_denoise: img2img
|
||||
class StableDiffusionXLImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
|
||||
block_classes = [
|
||||
StableDiffusionXLInputStep,
|
||||
StableDiffusionXLImg2ImgSetTimestepsStep,
|
||||
StableDiffusionXLImg2ImgPrepareLatentsStep,
|
||||
StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep,
|
||||
]
|
||||
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
|
||||
block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return (
|
||||
"Before denoise step that prepare the inputs for the denoise step for img2img task.\n"
|
||||
+ "This is a sequential pipeline blocks:\n"
|
||||
+ " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
|
||||
+ " - `StableDiffusionXLImg2ImgSetTimestepsStep` is used to set the timesteps\n"
|
||||
+ " - `StableDiffusionXLImg2ImgPrepareLatentsStep` is used to prepare the latents\n"
|
||||
+ " - `StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
|
||||
@@ -126,19 +122,17 @@ class StableDiffusionXLImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
|
||||
# before_denoise: inpainting
|
||||
class StableDiffusionXLInpaintBeforeDenoiseStep(SequentialPipelineBlocks):
|
||||
block_classes = [
|
||||
StableDiffusionXLInputStep,
|
||||
StableDiffusionXLImg2ImgSetTimestepsStep,
|
||||
StableDiffusionXLInpaintPrepareLatentsStep,
|
||||
StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep,
|
||||
]
|
||||
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
|
||||
block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return (
|
||||
"Before denoise step that prepare the inputs for the denoise step for inpainting task.\n"
|
||||
+ "This is a sequential pipeline blocks:\n"
|
||||
+ " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
|
||||
+ " - `StableDiffusionXLImg2ImgSetTimestepsStep` is used to set the timesteps\n"
|
||||
+ " - `StableDiffusionXLInpaintPrepareLatentsStep` is used to prepare the latents\n"
|
||||
+ " - `StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
|
||||
@@ -255,25 +249,48 @@ class StableDiffusionXLAutoDecodeStep(AutoPipelineBlocks):
|
||||
)
|
||||
|
||||
|
||||
class StableDiffusionXLCoreDenoiseStep(SequentialPipelineBlocks):
|
||||
block_classes = [
|
||||
StableDiffusionXLInputStep,
|
||||
StableDiffusionXLAutoBeforeDenoiseStep,
|
||||
StableDiffusionXLAutoControlNetInputStep,
|
||||
StableDiffusionXLAutoDenoiseStep,
|
||||
]
|
||||
block_names = ["input", "before_denoise", "controlnet_input", "denoise"]
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return (
|
||||
"Core step that performs the denoising process. \n"
|
||||
+ " - `StableDiffusionXLInputStep` (input) standardizes the inputs for the denoising step.\n"
|
||||
+ " - `StableDiffusionXLAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
|
||||
+ " - `StableDiffusionXLAutoControlNetInputStep` (controlnet_input) prepares the controlnet input.\n"
|
||||
+ " - `StableDiffusionXLAutoDenoiseStep` (denoise) iteratively denoises the latents.\n\n"
|
||||
+ "This step support text-to-image, image-to-image, inpainting, with or without controlnet/controlnet_union/ip_adapter for Stable Diffusion XL:\n"
|
||||
+ "- for image-to-image generation, you need to provide `image_latents`\n"
|
||||
+ "- for inpainting, you need to provide `mask_image` and `image_latents`\n"
|
||||
+ "- to run the controlnet workflow, you need to provide `control_image`\n"
|
||||
+ "- to run the controlnet_union workflow, you need to provide `control_image` and `control_mode`\n"
|
||||
+ "- to run the ip_adapter workflow, you need to load ip_adapter into your unet and provide `ip_adapter_embeds`\n"
|
||||
+ "- for text-to-image generation, all you need to provide is prompt embeddings\n"
|
||||
)
|
||||
|
||||
|
||||
# ip-adapter, controlnet, text2img, img2img, inpainting
|
||||
class StableDiffusionXLAutoBlocks(SequentialPipelineBlocks):
|
||||
block_classes = [
|
||||
StableDiffusionXLTextEncoderStep,
|
||||
StableDiffusionXLAutoIPAdapterStep,
|
||||
StableDiffusionXLAutoVaeEncoderStep,
|
||||
StableDiffusionXLAutoBeforeDenoiseStep,
|
||||
StableDiffusionXLAutoControlNetInputStep,
|
||||
StableDiffusionXLAutoDenoiseStep,
|
||||
StableDiffusionXLCoreDenoiseStep,
|
||||
StableDiffusionXLAutoDecodeStep,
|
||||
]
|
||||
block_names = [
|
||||
"text_encoder",
|
||||
"ip_adapter",
|
||||
"image_encoder",
|
||||
"before_denoise",
|
||||
"controlnet_input",
|
||||
"vae_encoder",
|
||||
"denoise",
|
||||
"decoder",
|
||||
"decode",
|
||||
]
|
||||
|
||||
@property
|
||||
@@ -321,7 +338,7 @@ TEXT2IMAGE_BLOCKS = InsertableDict(
|
||||
IMAGE2IMAGE_BLOCKS = InsertableDict(
|
||||
[
|
||||
("text_encoder", StableDiffusionXLTextEncoderStep),
|
||||
("image_encoder", StableDiffusionXLVaeEncoderStep),
|
||||
("vae_encoder", StableDiffusionXLVaeEncoderStep),
|
||||
("input", StableDiffusionXLInputStep),
|
||||
("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
|
||||
("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep),
|
||||
@@ -334,7 +351,7 @@ IMAGE2IMAGE_BLOCKS = InsertableDict(
|
||||
INPAINT_BLOCKS = InsertableDict(
|
||||
[
|
||||
("text_encoder", StableDiffusionXLTextEncoderStep),
|
||||
("image_encoder", StableDiffusionXLInpaintVaeEncoderStep),
|
||||
("vae_encoder", StableDiffusionXLInpaintVaeEncoderStep),
|
||||
("input", StableDiffusionXLInputStep),
|
||||
("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
|
||||
("prepare_latents", StableDiffusionXLInpaintPrepareLatentsStep),
|
||||
@@ -361,10 +378,8 @@ AUTO_BLOCKS = InsertableDict(
|
||||
[
|
||||
("text_encoder", StableDiffusionXLTextEncoderStep),
|
||||
("ip_adapter", StableDiffusionXLAutoIPAdapterStep),
|
||||
("image_encoder", StableDiffusionXLAutoVaeEncoderStep),
|
||||
("before_denoise", StableDiffusionXLAutoBeforeDenoiseStep),
|
||||
("controlnet_input", StableDiffusionXLAutoControlNetInputStep),
|
||||
("denoise", StableDiffusionXLAutoDenoiseStep),
|
||||
("vae_encoder", StableDiffusionXLAutoVaeEncoderStep),
|
||||
("denoise", StableDiffusionXLCoreDenoiseStep),
|
||||
("decode", StableDiffusionXLAutoDecodeStep),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -54,6 +54,8 @@ class StableDiffusionXLModularPipeline(
|
||||
</Tip>
|
||||
"""
|
||||
|
||||
default_blocks_name = "StableDiffusionXLAutoBlocks"
|
||||
|
||||
@property
|
||||
def default_height(self):
|
||||
return self.default_sample_size * self.vae_scale_factor
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
SDXL_NODE_TYPES_PARAMS_MAP = {
|
||||
"controlnet": {
|
||||
"inputs": [
|
||||
"control_image",
|
||||
"controlnet_conditioning_scale",
|
||||
"control_guidance_start",
|
||||
"control_guidance_end",
|
||||
"height",
|
||||
"width",
|
||||
],
|
||||
"model_inputs": [
|
||||
"controlnet",
|
||||
],
|
||||
"outputs": [
|
||||
"controlnet_out",
|
||||
],
|
||||
"block_names": [None],
|
||||
},
|
||||
"denoise": {
|
||||
"inputs": [
|
||||
"embeddings",
|
||||
"width",
|
||||
"height",
|
||||
"seed",
|
||||
"num_inference_steps",
|
||||
"guidance_scale",
|
||||
"image_latents",
|
||||
"strength",
|
||||
# custom adapters coming in as inputs
|
||||
"controlnet",
|
||||
# ip_adapter is optional and custom; include if available
|
||||
"ip_adapter",
|
||||
],
|
||||
"model_inputs": [
|
||||
"unet",
|
||||
"guider",
|
||||
"scheduler",
|
||||
],
|
||||
"outputs": [
|
||||
"latents",
|
||||
"latents_preview",
|
||||
],
|
||||
"block_names": ["denoise"],
|
||||
},
|
||||
"vae_encoder": {
|
||||
"inputs": [
|
||||
"image",
|
||||
"width",
|
||||
"height",
|
||||
],
|
||||
"model_inputs": [
|
||||
"vae",
|
||||
],
|
||||
"outputs": [
|
||||
"image_latents",
|
||||
],
|
||||
"block_names": ["vae_encoder"],
|
||||
},
|
||||
"text_encoder": {
|
||||
"inputs": [
|
||||
"prompt",
|
||||
"negative_prompt",
|
||||
],
|
||||
"model_inputs": [
|
||||
"text_encoders",
|
||||
],
|
||||
"outputs": [
|
||||
"embeddings",
|
||||
],
|
||||
"block_names": ["text_encoder"],
|
||||
},
|
||||
"decoder": {
|
||||
"inputs": [
|
||||
"latents",
|
||||
],
|
||||
"model_inputs": [
|
||||
"vae",
|
||||
],
|
||||
"outputs": [
|
||||
"images",
|
||||
],
|
||||
"block_names": ["decode"],
|
||||
},
|
||||
}
|
||||
@@ -146,13 +146,13 @@ class WanInputStep(ModularPipelineBlocks):
|
||||
OutputParam(
|
||||
"prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
|
||||
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
|
||||
description="text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
|
||||
kwargs_type="denoiser_input_fields", # already in intermedites state but declare here again for denoiser_input_fields
|
||||
description="negative text embeddings used to guide the image generation",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -79,11 +79,11 @@ class WanLoopDenoiser(ModularPipelineBlocks):
|
||||
description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
|
||||
),
|
||||
InputParam(
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description=(
|
||||
"All conditional model inputs that need to be prepared with guider. "
|
||||
"It should contain prompt_embeds/negative_prompt_embeds. "
|
||||
"Please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
|
||||
"Please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -89,13 +89,13 @@ class WanTextEncoderStep(ModularPipelineBlocks):
|
||||
OutputParam(
|
||||
"prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="text embeddings used to guide the image generation",
|
||||
),
|
||||
OutputParam(
|
||||
"negative_prompt_embeds",
|
||||
type_hint=torch.Tensor,
|
||||
kwargs_type="guider_input_fields",
|
||||
kwargs_type="denoiser_input_fields",
|
||||
description="negative text embeddings used to guide the image generation",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -37,6 +37,8 @@ class WanModularPipeline(
|
||||
</Tip>
|
||||
"""
|
||||
|
||||
default_blocks_name = "WanAutoBlocks"
|
||||
|
||||
@property
|
||||
def default_height(self):
|
||||
return self.default_sample_height * self.vae_scale_factor_spatial
|
||||
|
||||
Reference in New Issue
Block a user