[modular]some small fix (#12307)

* fix * add mellon node registry * style * update docstring to include more info! * support custom node mellon * HTTPErrpr -> HfHubHTTPErrpr * up * Update src/diffusers/modular_pipelines/qwenimage/node_utils.py
2026-01-27 17:22:53 +03:00 · 2025-09-29 11:42:34 -10:00
parent c07fcf780a
commit 76d4e416bc
20 changed files with 1107 additions and 741 deletions
--- a/src/diffusers/modular_pipelines/flux/modular_pipeline.py
+++ b/src/diffusers/modular_pipelines/flux/modular_pipeline.py
@@ -32,6 +32,8 @@ class FluxModularPipeline(ModularPipeline, FluxLoraLoaderMixin, TextualInversion
    </Tip>
    """

+    default_blocks_name = "FluxAutoBlocks"
+
    @property
    def default_height(self):
        return self.default_sample_size * self.vae_scale_factor
--- a/src/diffusers/modular_pipelines/mellon_node_utils.py
+++ b/src/diffusers/modular_pipelines/mellon_node_utils.py
@@ -0,0 +1,763 @@
+import json
+import logging
+import os
+
+# Simple typed wrapper for parameter overrides
+from dataclasses import asdict, dataclass
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+from huggingface_hub import create_repo, hf_hub_download
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    HfHubHTTPError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+    validate_hf_hub_args,
+)
+
+from ..utils import HUGGINGFACE_CO_RESOLVE_ENDPOINT, PushToHubMixin, extract_commit_hash
+from .modular_pipeline import ModularPipelineBlocks
+
+
+logger = logging.getLogger(__name__)
+
+
+SUPPORTED_NODE_TYPES = {"controlnet", "vae_encoder", "denoise", "text_encoder", "decoder"}
+
+
+# Mellon Input Parameters (runtime parameters, not models)
+MELLON_INPUT_PARAMS = {
+    # controlnet
+    "control_image": {
+        "label": "Control Image",
+        "type": "image",
+        "display": "input",
+    },
+    "controlnet_conditioning_scale": {
+        "label": "Scale",
+        "type": "float",
+        "default": 0.5,
+        "min": 0,
+        "max": 1,
+    },
+    "control_guidance_end": {
+        "label": "End",
+        "type": "float",
+        "default": 1.0,
+        "min": 0,
+        "max": 1,
+    },
+    "control_guidance_start": {
+        "label": "Start",
+        "type": "float",
+        "default": 0.0,
+        "min": 0,
+        "max": 1,
+    },
+    "controlnet": {
+        "label": "Controlnet",
+        "type": "custom_controlnet",
+        "display": "input",
+    },
+    "embeddings": {
+        "label": "Text Embeddings",
+        "display": "input",
+        "type": "embeddings",
+    },
+    "image": {
+        "label": "Image",
+        "type": "image",
+        "display": "input",
+    },
+    "negative_prompt": {
+        "label": "Negative Prompt",
+        "type": "string",
+        "default": "",
+        "display": "textarea",
+    },
+    "prompt": {
+        "label": "Prompt",
+        "type": "string",
+        "default": "",
+        "display": "textarea",
+    },
+    "guidance_scale": {
+        "label": "Guidance Scale",
+        "type": "float",
+        "display": "slider",
+        "default": 5,
+        "min": 1.0,
+        "max": 30.0,
+        "step": 0.1,
+    },
+    "height": {
+        "label": "Height",
+        "type": "int",
+        "default": 1024,
+        "min": 64,
+        "step": 8,
+    },
+    "image_latents": {
+        "label": "Image Latents",
+        "type": "latents",
+        "display": "input",
+        "onChange": {False: ["height", "width"], True: ["strength"]},
+    },
+    "latents": {
+        "label": "Latents",
+        "type": "latents",
+        "display": "input",
+    },
+    "num_inference_steps": {
+        "label": "Steps",
+        "type": "int",
+        "display": "slider",
+        "default": 25,
+        "min": 1,
+        "max": 100,
+    },
+    "seed": {
+        "label": "Seed",
+        "type": "int",
+        "display": "random",
+        "default": 0,
+        "min": 0,
+        "max": 4294967295,
+    },
+    "strength": {
+        "label": "Strength",
+        "type": "float",
+        "default": 0.5,
+        "min": 0.0,
+        "max": 1.0,
+        "step": 0.01,
+    },
+    "width": {
+        "label": "Width",
+        "type": "int",
+        "default": 1024,
+        "min": 64,
+        "step": 8,
+    },
+    "ip_adapter": {
+        "label": "IP Adapter",
+        "type": "custom_ip_adapter",
+        "display": "input",
+    },
+}
+
+# Mellon Model Parameters (diffusers_auto_model types)
+MELLON_MODEL_PARAMS = {
+    "scheduler": {
+        "label": "Scheduler",
+        "display": "input",
+        "type": "diffusers_auto_model",
+    },
+    "text_encoders": {
+        "label": "Text Encoders",
+        "type": "diffusers_auto_models",
+        "display": "input",
+    },
+    "unet": {
+        "label": "Unet",
+        "display": "input",
+        "type": "diffusers_auto_model",
+        "onSignal": {
+            "action": "signal",
+            "target": "guider",
+        },
+    },
+    "guider": {
+        "label": "Guider",
+        "display": "input",
+        "type": "custom_guider",
+        "onChange": {False: ["guidance_scale"], True: []},
+    },
+    "vae": {
+        "label": "VAE",
+        "display": "input",
+        "type": "diffusers_auto_model",
+    },
+    "controlnet": {
+        "label": "Controlnet Model",
+        "type": "diffusers_auto_model",
+        "display": "input",
+    },
+}
+
+# Mellon Output Parameters (display = "output")
+MELLON_OUTPUT_PARAMS = {
+    "embeddings": {
+        "label": "Text Embeddings",
+        "display": "output",
+        "type": "embeddings",
+    },
+    "images": {
+        "label": "Images",
+        "type": "image",
+        "display": "output",
+    },
+    "image_latents": {
+        "label": "Image Latents",
+        "type": "latents",
+        "display": "output",
+    },
+    "latents": {
+        "label": "Latents",
+        "type": "latents",
+        "display": "output",
+    },
+    "latents_preview": {
+        "label": "Latents Preview",
+        "display": "output",
+        "type": "latent",
+    },
+    "controlnet_out": {
+        "label": "Controlnet",
+        "display": "output",
+        "type": "controlnet",
+    },
+}
+
+
+# Default param selections per supported node_type
+# from MELLON_INPUT_PARAMS / MELLON_MODEL_PARAMS / MELLON_OUTPUT_PARAMS.
+NODE_TYPE_PARAMS_MAP = {
+    "controlnet": {
+        "inputs": [
+            "control_image",
+            "controlnet_conditioning_scale",
+            "control_guidance_start",
+            "control_guidance_end",
+            "height",
+            "width",
+        ],
+        "model_inputs": [
+            "controlnet",
+            "vae",
+        ],
+        "outputs": [
+            "controlnet",
+        ],
+        "block_names": ["controlnet_vae_encoder"],
+    },
+    "denoise": {
+        "inputs": [
+            "embeddings",
+            "width",
+            "height",
+            "seed",
+            "num_inference_steps",
+            "guidance_scale",
+            "image_latents",
+            "strength",
+            # custom adapters coming in as inputs
+            "controlnet",
+            # ip_adapter is optional and custom; include if available
+            "ip_adapter",
+        ],
+        "model_inputs": [
+            "unet",
+            "guider",
+            "scheduler",
+        ],
+        "outputs": [
+            "latents",
+            "latents_preview",
+        ],
+        "block_names": ["denoise"],
+    },
+    "vae_encoder": {
+        "inputs": [
+            "image",
+            "width",
+            "height",
+        ],
+        "model_inputs": [
+            "vae",
+        ],
+        "outputs": [
+            "image_latents",
+        ],
+        "block_names": ["vae_encoder"],
+    },
+    "text_encoder": {
+        "inputs": [
+            "prompt",
+            "negative_prompt",
+            # optional image prompt input supported in embeddings node
+            "image",
+        ],
+        "model_inputs": [
+            "text_encoders",
+        ],
+        "outputs": [
+            "embeddings",
+        ],
+        "block_names": ["text_encoder"],
+    },
+    "decoder": {
+        "inputs": [
+            "latents",
+        ],
+        "model_inputs": [
+            "vae",
+        ],
+        "outputs": [
+            "images",
+        ],
+        "block_names": ["decode"],
+    },
+}
+
+
+@dataclass(frozen=True)
+class MellonParam:
+    name: str
+    label: str
+    type: str
+    display: Optional[str] = None
+    default: Any = None
+    min: Optional[float] = None
+    max: Optional[float] = None
+    step: Optional[float] = None
+    options: Any = None
+    value: Any = None
+    fieldOptions: Optional[Dict[str, Any]] = None
+    onChange: Any = None
+    onSignal: Any = None
+    _map_to_input: Any = None  # the block input name this parameter maps to
+
+    def to_dict(self) -> Dict[str, Any]:
+        data = asdict(self)
+        return {k: v for k, v in data.items() if not k.startswith("_") and v is not None}
+
+
+@dataclass
+class MellonNodeConfig(PushToHubMixin):
+    """
+    A MellonNodeConfig is a base class to build Mellon nodes UI with modular diffusers.
+
+    <Tip warning={true}>
+
+        This is an experimental feature and is likely to change in the future.
+
+    </Tip>
+    """
+
+    inputs: List[Union[str, MellonParam]]
+    model_inputs: List[Union[str, MellonParam]]
+    outputs: List[Union[str, MellonParam]]
+    blocks_names: list[str]
+    node_type: str
+    config_name = "mellon_config.json"
+
+    def __post_init__(self):
+        if isinstance(self.inputs, list):
+            self.inputs = self._resolve_params_list(self.inputs, MELLON_INPUT_PARAMS)
+        if isinstance(self.model_inputs, list):
+            self.model_inputs = self._resolve_params_list(self.model_inputs, MELLON_MODEL_PARAMS)
+        if isinstance(self.outputs, list):
+            self.outputs = self._resolve_params_list(self.outputs, MELLON_OUTPUT_PARAMS)
+
+    @staticmethod
+    def _resolve_params_list(
+        params: List[Union[str, MellonParam]], default_map: Dict[str, Dict[str, Any]]
+    ) -> Dict[str, Dict[str, Any]]:
+        def _resolve_param(
+            param: Union[str, MellonParam], default_params_map: Dict[str, Dict[str, Any]]
+        ) -> Tuple[str, Dict[str, Any]]:
+            if isinstance(param, str):
+                if param not in default_params_map:
+                    raise ValueError(f"Unknown param '{param}', please define a `MellonParam` object instead")
+                return param, default_params_map[param].copy()
+            elif isinstance(param, MellonParam):
+                param_dict = param.to_dict()
+                param_name = param_dict.pop("name")
+                return param_name, param_dict
+            else:
+                raise ValueError(
+                    f"Unknown param type '{type(param)}', please use a string or a  `MellonParam` object instead"
+                )
+
+        resolved = {}
+        for p in params:
+            logger.info(f" Resolving param: {p}")
+            name, cfg = _resolve_param(p, default_map)
+            if name in resolved:
+                raise ValueError(f"Duplicate param '{name}'")
+            resolved[name] = cfg
+        return resolved
+
+    @classmethod
+    @validate_hf_hub_args
+    def load_mellon_config(
+        cls,
+        pretrained_model_name_or_path: Union[str, os.PathLike],
+        return_unused_kwargs=False,
+        return_commit_hash=False,
+        **kwargs,
+    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        r"""
+        Load a model or scheduler configuration.
+
+        Parameters:
+            pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*):
+                Can be either:
+
+                    - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
+                      the Hub.
+                    - A path to a *directory* (for example `./my_model_directory`) containing model weights saved with
+                      [`~ConfigMixin.save_config`].
+
+            cache_dir (`Union[str, os.PathLike]`, *optional*):
+                Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
+                is not used.
+            force_download (`bool`, *optional*, defaults to `False`):
+                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
+                cached versions if they exist.
+            proxies (`Dict[str, str]`, *optional*):
+                A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
+                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
+            output_loading_info(`bool`, *optional*, defaults to `False`):
+                Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
+            local_files_only (`bool`, *optional*, defaults to `False`):
+                Whether to only load local model weights and configuration files or not. If set to `True`, the model
+                won't be downloaded from the Hub.
+            token (`str` or *bool*, *optional*):
+                The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
+                `diffusers-cli login` (stored in `~/.huggingface`) is used.
+            revision (`str`, *optional*, defaults to `"main"`):
+                The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
+                allowed by Git.
+            subfolder (`str`, *optional*, defaults to `""`):
+                The subfolder location of a model file within a larger model repository on the Hub or locally.
+            return_unused_kwargs (`bool`, *optional*, defaults to `False):
+                Whether unused keyword arguments of the config are returned.
+            return_commit_hash (`bool`, *optional*, defaults to `False):
+                Whether the `commit_hash` of the loaded configuration are returned.
+
+        Returns:
+            `dict`:
+                A dictionary of all the parameters stored in a JSON configuration file.
+
+        """
+        cache_dir = kwargs.pop("cache_dir", None)
+        local_dir = kwargs.pop("local_dir", None)
+        local_dir_use_symlinks = kwargs.pop("local_dir_use_symlinks", "auto")
+        force_download = kwargs.pop("force_download", False)
+        proxies = kwargs.pop("proxies", None)
+        token = kwargs.pop("token", None)
+        local_files_only = kwargs.pop("local_files_only", False)
+        revision = kwargs.pop("revision", None)
+
+        pretrained_model_name_or_path = str(pretrained_model_name_or_path)
+
+        if cls.config_name is None:
+            raise ValueError(
+                "`self.config_name` is not defined. Note that one should not load a config from "
+                "`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`"
+            )
+        if os.path.isfile(pretrained_model_name_or_path):
+            config_file = pretrained_model_name_or_path
+        elif os.path.isdir(pretrained_model_name_or_path):
+            if os.path.isfile(os.path.join(pretrained_model_name_or_path, cls.config_name)):
+                # Load from a PyTorch checkpoint
+                config_file = os.path.join(pretrained_model_name_or_path, cls.config_name)
+            else:
+                raise EnvironmentError(
+                    f"Error no file named {cls.config_name} found in directory {pretrained_model_name_or_path}."
+                )
+        else:
+            try:
+                # Load from URL or cache if already cached
+                config_file = hf_hub_download(
+                    pretrained_model_name_or_path,
+                    filename=cls.config_name,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    local_files_only=local_files_only,
+                    token=token,
+                    revision=revision,
+                    local_dir=local_dir,
+                    local_dir_use_symlinks=local_dir_use_symlinks,
+                )
+            except RepositoryNotFoundError:
+                raise EnvironmentError(
+                    f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier"
+                    " listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a"
+                    " token having permission to this repo with `token` or log in with `hf auth login`."
+                )
+            except RevisionNotFoundError:
+                raise EnvironmentError(
+                    f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for"
+                    " this model name. Check the model page at"
+                    f" 'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
+                )
+            except EntryNotFoundError:
+                raise EnvironmentError(
+                    f"{pretrained_model_name_or_path} does not appear to have a file named {cls.config_name}."
+                )
+            except HfHubHTTPError as err:
+                raise EnvironmentError(
+                    "There was a specific connection error when trying to load"
+                    f" {pretrained_model_name_or_path}:\n{err}"
+                )
+            except ValueError:
+                raise EnvironmentError(
+                    f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it"
+                    f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a"
+                    f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to"
+                    " run the library in offline mode at"
+                    " 'https://huggingface.co/docs/diffusers/installation#offline-mode'."
+                )
+            except EnvironmentError:
+                raise EnvironmentError(
+                    f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from "
+                    "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
+                    f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
+                    f"containing a {cls.config_name} file"
+                )
+        try:
+            with open(config_file, "r", encoding="utf-8") as reader:
+                text = reader.read()
+            config_dict = json.loads(text)
+
+            commit_hash = extract_commit_hash(config_file)
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.")
+
+        if not (return_unused_kwargs or return_commit_hash):
+            return config_dict
+
+        outputs = (config_dict,)
+
+        if return_unused_kwargs:
+            outputs += (kwargs,)
+
+        if return_commit_hash:
+            outputs += (commit_hash,)
+
+        return outputs
+
+    def save_mellon_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs):
+        """
+        Save the Mellon node definition to a JSON file.
+
+        Args:
+            save_directory (`str` or `os.PathLike`):
+                Directory where the configuration JSON file is saved (will be created if it does not exist).
+            push_to_hub (`bool`, *optional*, defaults to `False`):
+                Whether or not to push your model to the Hugging Face Hub after saving it. You can specify the
+                repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
+                namespace).
+            kwargs (`Dict[str, Any]`, *optional*):
+                Additional keyword arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
+        """
+        if os.path.isfile(save_directory):
+            raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
+
+        os.makedirs(save_directory, exist_ok=True)
+
+        # If we save using the predefined names, we can load using `from_config`
+        output_config_file = os.path.join(save_directory, self.config_name)
+
+        self.to_json_file(output_config_file)
+        logger.info(f"Mellon node definition saved in {output_config_file}")
+
+        if push_to_hub:
+            commit_message = kwargs.pop("commit_message", None)
+            private = kwargs.pop("private", None)
+            create_pr = kwargs.pop("create_pr", False)
+            token = kwargs.pop("token", None)
+            repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
+            repo_id = create_repo(repo_id, exist_ok=True, private=private, token=token).repo_id
+            subfolder = kwargs.pop("subfolder", None)
+
+            self._upload_folder(
+                save_directory,
+                repo_id,
+                token=token,
+                commit_message=commit_message,
+                create_pr=create_pr,
+                subfolder=subfolder,
+            )
+
+    def to_json_file(self, json_file_path: Union[str, os.PathLike]):
+        """
+        Save the Mellon schema dictionary to a JSON file.
+
+        Args:
+            json_file_path (`str` or `os.PathLike`):
+                Path to the JSON file to save a configuration instance's parameters.
+        """
+        with open(json_file_path, "w", encoding="utf-8") as writer:
+            writer.write(self.to_json_string())
+
+    def to_json_string(self) -> str:
+        """
+        Serializes this instance to a JSON string of the Mellon schema dict.
+
+        Args:
+        Returns:
+            `str`: String containing all the attributes that make up this configuration instance in JSON format.
+        """
+
+        mellon_dict = self.to_mellon_dict()
+        return json.dumps(mellon_dict, indent=2, sort_keys=True) + "\n"
+
+    def to_mellon_dict(self) -> Dict[str, Any]:
+        """Return a JSON-serializable dict focusing on the Mellon schema fields only.
+
+        params is a single flat dict composed as: {**inputs, **model_inputs, **outputs}.
+        """
+        # inputs/model_inputs/outputs are already normalized dicts
+        merged_params = {}
+        merged_params.update(self.inputs or {})
+        merged_params.update(self.model_inputs or {})
+        merged_params.update(self.outputs or {})
+
+        return {
+            "node_type": self.node_type,
+            "blocks_names": self.blocks_names,
+            "params": merged_params,
+        }
+
+    @classmethod
+    def from_mellon_dict(cls, mellon_dict: Dict[str, Any]) -> "MellonNodeConfig":
+        """Create a config from a Mellon schema dict produced by to_mellon_dict().
+
+        Splits the flat params dict back into inputs/model_inputs/outputs using the known key spaces from
+        MELLON_INPUT_PARAMS / MELLON_MODEL_PARAMS / MELLON_OUTPUT_PARAMS. Unknown keys are treated as inputs by
+        default.
+        """
+        flat_params = mellon_dict.get("params", {})
+
+        inputs: Dict[str, Any] = {}
+        model_inputs: Dict[str, Any] = {}
+        outputs: Dict[str, Any] = {}
+
+        for param_name, param_dict in flat_params.items():
+            if param_dict.get("display", "") == "output":
+                outputs[param_name] = param_dict
+            elif param_dict.get("type", "") in ("diffusers_auto_model", "diffusers_auto_models"):
+                model_inputs[param_name] = param_dict
+            else:
+                inputs[param_name] = param_dict
+
+        return cls(
+            inputs=inputs,
+            model_inputs=model_inputs,
+            outputs=outputs,
+            blocks_names=mellon_dict.get("blocks_names", []),
+            node_type=mellon_dict.get("node_type"),
+        )
+
+    # YiYi Notes: not used yet
+    @classmethod
+    def from_blocks(cls, blocks: ModularPipelineBlocks, node_type: str) -> "MellonNodeConfig":
+        """
+        Create an instance from a ModularPipeline object. If a preset exists in NODE_TYPE_PARAMS_MAP for the node_type,
+        use it; otherwise fall back to deriving lists from the pipeline's expected inputs/components/outputs.
+        """
+        if node_type not in NODE_TYPE_PARAMS_MAP:
+            raise ValueError(f"Node type {node_type} not supported")
+
+        blocks_names = list(blocks.sub_blocks.keys())
+
+        default_node_config = NODE_TYPE_PARAMS_MAP[node_type]
+        inputs_list: List[Union[str, MellonParam]] = default_node_config.get("inputs", [])
+        model_inputs_list: List[Union[str, MellonParam]] = default_node_config.get("model_inputs", [])
+        outputs_list: List[Union[str, MellonParam]] = default_node_config.get("outputs", [])
+
+        for required_input_name in blocks.required_inputs:
+            if required_input_name not in inputs_list:
+                inputs_list.append(
+                    MellonParam(
+                        name=required_input_name, label=required_input_name, type=required_input_name, display="input"
+                    )
+                )
+
+        for component_spec in blocks.expected_components:
+            if component_spec.name not in model_inputs_list:
+                model_inputs_list.append(
+                    MellonParam(
+                        name=component_spec.name,
+                        label=component_spec.name,
+                        type="diffusers_auto_model",
+                        display="input",
+                    )
+                )
+
+        return cls(
+            inputs=inputs_list,
+            model_inputs=model_inputs_list,
+            outputs=outputs_list,
+            blocks_names=blocks_names,
+            node_type=node_type,
+        )
+
+
+# Minimal modular registry for Mellon node configs
+class ModularMellonNodeRegistry:
+    """Registry mapping (pipeline class, blocks_name) -> list of MellonNodeConfig."""
+
+    def __init__(self):
+        self._registry = {}
+        self._initialized = False
+
+    def register(self, pipeline_cls: type, node_params: Dict[str, MellonNodeConfig]):
+        if not self._initialized:
+            _initialize_registry(self)
+        self._registry[pipeline_cls] = node_params
+
+    def get(self, pipeline_cls: type) -> MellonNodeConfig:
+        if not self._initialized:
+            _initialize_registry(self)
+        return self._registry.get(pipeline_cls, None)
+
+    def get_all(self) -> Dict[type, Dict[str, MellonNodeConfig]]:
+        if not self._initialized:
+            _initialize_registry(self)
+        return self._registry
+
+
+def _register_preset_node_types(
+    pipeline_cls, params_map: Dict[str, Dict[str, Any]], registry: ModularMellonNodeRegistry
+):
+    """Register all node-type presets for a given pipeline class from a params map."""
+    node_configs = {}
+    for node_type, spec in params_map.items():
+        node_config = MellonNodeConfig(
+            inputs=spec.get("inputs", []),
+            model_inputs=spec.get("model_inputs", []),
+            outputs=spec.get("outputs", []),
+            blocks_names=spec.get("block_names", []),
+            node_type=node_type,
+        )
+        node_configs[node_type] = node_config
+    registry.register(pipeline_cls, node_configs)
+
+
+def _initialize_registry(registry: ModularMellonNodeRegistry):
+    """Initialize the registry and register all available pipeline configs."""
+    print("Initializing registry")
+
+    registry._initialized = True
+
+    try:
+        from .qwenimage.modular_pipeline import QwenImageModularPipeline
+        from .qwenimage.node_utils import QwenImage_NODE_TYPES_PARAMS_MAP
+
+        _register_preset_node_types(QwenImageModularPipeline, QwenImage_NODE_TYPES_PARAMS_MAP, registry)
+    except Exception:
+        raise Exception("Failed to register QwenImageModularPipeline")
+
+    try:
+        from .stable_diffusion_xl.modular_pipeline import StableDiffusionXLModularPipeline
+        from .stable_diffusion_xl.node_utils import SDXL_NODE_TYPES_PARAMS_MAP
+
+        _register_preset_node_types(StableDiffusionXLModularPipeline, SDXL_NODE_TYPES_PARAMS_MAP, registry)
+    except Exception:
+        raise Exception("Failed to register StableDiffusionXLModularPipeline")
--- a/src/diffusers/modular_pipelines/modular_pipeline.py
+++ b/src/diffusers/modular_pipelines/modular_pipeline.py
@@ -51,6 +51,7 @@ if is_accelerate_available():
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name


+# map regular pipeline to modular pipeline class name
 MODULAR_PIPELINE_MAPPING = OrderedDict(
    [
        ("stable-diffusion-xl", "StableDiffusionXLModularPipeline"),
@@ -61,16 +62,6 @@ MODULAR_PIPELINE_MAPPING = OrderedDict(
    ]
 )

-MODULAR_PIPELINE_BLOCKS_MAPPING = OrderedDict(
-    [
-        ("StableDiffusionXLModularPipeline", "StableDiffusionXLAutoBlocks"),
-        ("WanModularPipeline", "WanAutoBlocks"),
-        ("FluxModularPipeline", "FluxAutoBlocks"),
-        ("QwenImageModularPipeline", "QwenImageAutoBlocks"),
-        ("QwenImageEditModularPipeline", "QwenImageEditAutoBlocks"),
-    ]
-)
-

@dataclass
 class PipelineState:
@@ -423,7 +414,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin):
                    state.set(input_param.name, param, input_param.kwargs_type)

            elif input_param.kwargs_type:
-                # if it is a kwargs type, e.g. "guider_input_fields", it is likely to be a list of parameters
+                # if it is a kwargs type, e.g. "denoiser_input_fields", it is likely to be a list of parameters
                # we need to first find out which inputs are and loop through them.
                intermediate_kwargs = state.get_by_kwargs(input_param.kwargs_type)
                for param_name, current_value in intermediate_kwargs.items():
@@ -1454,6 +1445,7 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):

    config_name = "modular_model_index.json"
    hf_device_map = None
+    default_blocks_name = None

    # YiYi TODO: add warning for passing multiple ComponentSpec/ConfigSpec with the same name
    def __init__(
@@ -1514,7 +1506,7 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
              `_blocks_class_name` in the config dict
        """
        if blocks is None:
-            blocks_class_name = MODULAR_PIPELINE_BLOCKS_MAPPING.get(self.__class__.__name__)
+            blocks_class_name = self.default_blocks_name
            if blocks_class_name is not None:
                diffusers_module = importlib.import_module("diffusers")
                blocks_class = getattr(diffusers_module, blocks_class_name)
--- a/src/diffusers/modular_pipelines/node_utils.py
+++ b/src/diffusers/modular_pipelines/node_utils.py
@@ -1,665 +0,0 @@
-import json
-import logging
-import os
-from pathlib import Path
-from typing import List, Optional, Tuple, Union
-
-import numpy as np
-import PIL
-import torch
-
-from ..configuration_utils import ConfigMixin
-from ..image_processor import PipelineImageInput
-from .modular_pipeline import ModularPipelineBlocks, SequentialPipelineBlocks
-from .modular_pipeline_utils import InputParam
-
-
-logger = logging.getLogger(__name__)
-
-# YiYi Notes: this is actually for SDXL, put it here for now
-SDXL_INPUTS_SCHEMA = {
-    "prompt": InputParam(
-        "prompt", type_hint=Union[str, List[str]], description="The prompt or prompts to guide the image generation"
-    ),
-    "prompt_2": InputParam(
-        "prompt_2",
-        type_hint=Union[str, List[str]],
-        description="The prompt or prompts to be sent to the tokenizer_2 and text_encoder_2",
-    ),
-    "negative_prompt": InputParam(
-        "negative_prompt",
-        type_hint=Union[str, List[str]],
-        description="The prompt or prompts not to guide the image generation",
-    ),
-    "negative_prompt_2": InputParam(
-        "negative_prompt_2",
-        type_hint=Union[str, List[str]],
-        description="The negative prompt or prompts for text_encoder_2",
-    ),
-    "cross_attention_kwargs": InputParam(
-        "cross_attention_kwargs",
-        type_hint=Optional[dict],
-        description="Kwargs dictionary passed to the AttentionProcessor",
-    ),
-    "clip_skip": InputParam(
-        "clip_skip", type_hint=Optional[int], description="Number of layers to skip in CLIP text encoder"
-    ),
-    "image": InputParam(
-        "image",
-        type_hint=PipelineImageInput,
-        required=True,
-        description="The image(s) to modify for img2img or inpainting",
-    ),
-    "mask_image": InputParam(
-        "mask_image",
-        type_hint=PipelineImageInput,
-        required=True,
-        description="Mask image for inpainting, white pixels will be repainted",
-    ),
-    "generator": InputParam(
-        "generator",
-        type_hint=Optional[Union[torch.Generator, List[torch.Generator]]],
-        description="Generator(s) for deterministic generation",
-    ),
-    "height": InputParam("height", type_hint=Optional[int], description="Height in pixels of the generated image"),
-    "width": InputParam("width", type_hint=Optional[int], description="Width in pixels of the generated image"),
-    "num_images_per_prompt": InputParam(
-        "num_images_per_prompt", type_hint=int, default=1, description="Number of images to generate per prompt"
-    ),
-    "num_inference_steps": InputParam(
-        "num_inference_steps", type_hint=int, default=50, description="Number of denoising steps"
-    ),
-    "timesteps": InputParam(
-        "timesteps", type_hint=Optional[torch.Tensor], description="Custom timesteps for the denoising process"
-    ),
-    "sigmas": InputParam(
-        "sigmas", type_hint=Optional[torch.Tensor], description="Custom sigmas for the denoising process"
-    ),
-    "denoising_end": InputParam(
-        "denoising_end",
-        type_hint=Optional[float],
-        description="Fraction of denoising process to complete before termination",
-    ),
-    # YiYi Notes: img2img defaults to 0.3, inpainting defaults to 0.9999
-    "strength": InputParam(
-        "strength", type_hint=float, default=0.3, description="How much to transform the reference image"
-    ),
-    "denoising_start": InputParam(
-        "denoising_start", type_hint=Optional[float], description="Starting point of the denoising process"
-    ),
-    "latents": InputParam(
-        "latents", type_hint=Optional[torch.Tensor], description="Pre-generated noisy latents for image generation"
-    ),
-    "padding_mask_crop": InputParam(
-        "padding_mask_crop",
-        type_hint=Optional[Tuple[int, int]],
-        description="Size of margin in crop for image and mask",
-    ),
-    "original_size": InputParam(
-        "original_size",
-        type_hint=Optional[Tuple[int, int]],
-        description="Original size of the image for SDXL's micro-conditioning",
-    ),
-    "target_size": InputParam(
-        "target_size", type_hint=Optional[Tuple[int, int]], description="Target size for SDXL's micro-conditioning"
-    ),
-    "negative_original_size": InputParam(
-        "negative_original_size",
-        type_hint=Optional[Tuple[int, int]],
-        description="Negative conditioning based on image resolution",
-    ),
-    "negative_target_size": InputParam(
-        "negative_target_size",
-        type_hint=Optional[Tuple[int, int]],
-        description="Negative conditioning based on target resolution",
-    ),
-    "crops_coords_top_left": InputParam(
-        "crops_coords_top_left",
-        type_hint=Tuple[int, int],
-        default=(0, 0),
-        description="Top-left coordinates for SDXL's micro-conditioning",
-    ),
-    "negative_crops_coords_top_left": InputParam(
-        "negative_crops_coords_top_left",
-        type_hint=Tuple[int, int],
-        default=(0, 0),
-        description="Negative conditioning crop coordinates",
-    ),
-    "aesthetic_score": InputParam(
-        "aesthetic_score", type_hint=float, default=6.0, description="Simulates aesthetic score of generated image"
-    ),
-    "negative_aesthetic_score": InputParam(
-        "negative_aesthetic_score", type_hint=float, default=2.0, description="Simulates negative aesthetic score"
-    ),
-    "eta": InputParam("eta", type_hint=float, default=0.0, description="Parameter η in the DDIM paper"),
-    "output_type": InputParam(
-        "output_type", type_hint=str, default="pil", description="Output format (pil/tensor/np.array)"
-    ),
-    "ip_adapter_image": InputParam(
-        "ip_adapter_image",
-        type_hint=PipelineImageInput,
-        required=True,
-        description="Image(s) to be used as IP adapter",
-    ),
-    "control_image": InputParam(
-        "control_image", type_hint=PipelineImageInput, required=True, description="ControlNet input condition"
-    ),
-    "control_guidance_start": InputParam(
-        "control_guidance_start",
-        type_hint=Union[float, List[float]],
-        default=0.0,
-        description="When ControlNet starts applying",
-    ),
-    "control_guidance_end": InputParam(
-        "control_guidance_end",
-        type_hint=Union[float, List[float]],
-        default=1.0,
-        description="When ControlNet stops applying",
-    ),
-    "controlnet_conditioning_scale": InputParam(
-        "controlnet_conditioning_scale",
-        type_hint=Union[float, List[float]],
-        default=1.0,
-        description="Scale factor for ControlNet outputs",
-    ),
-    "guess_mode": InputParam(
-        "guess_mode",
-        type_hint=bool,
-        default=False,
-        description="Enables ControlNet encoder to recognize input without prompts",
-    ),
-    "control_mode": InputParam(
-        "control_mode", type_hint=List[int], required=True, description="Control mode for union controlnet"
-    ),
-}
-
-SDXL_INTERMEDIATE_INPUTS_SCHEMA = {
-    "prompt_embeds": InputParam(
-        "prompt_embeds",
-        type_hint=torch.Tensor,
-        required=True,
-        description="Text embeddings used to guide image generation",
-    ),
-    "negative_prompt_embeds": InputParam(
-        "negative_prompt_embeds", type_hint=torch.Tensor, description="Negative text embeddings"
-    ),
-    "pooled_prompt_embeds": InputParam(
-        "pooled_prompt_embeds", type_hint=torch.Tensor, required=True, description="Pooled text embeddings"
-    ),
-    "negative_pooled_prompt_embeds": InputParam(
-        "negative_pooled_prompt_embeds", type_hint=torch.Tensor, description="Negative pooled text embeddings"
-    ),
-    "batch_size": InputParam("batch_size", type_hint=int, required=True, description="Number of prompts"),
-    "dtype": InputParam("dtype", type_hint=torch.dtype, description="Data type of model tensor inputs"),
-    "preprocess_kwargs": InputParam(
-        "preprocess_kwargs", type_hint=Optional[dict], description="Kwargs for ImageProcessor"
-    ),
-    "latents": InputParam(
-        "latents", type_hint=torch.Tensor, required=True, description="Initial latents for denoising process"
-    ),
-    "timesteps": InputParam("timesteps", type_hint=torch.Tensor, required=True, description="Timesteps for inference"),
-    "num_inference_steps": InputParam(
-        "num_inference_steps", type_hint=int, required=True, description="Number of denoising steps"
-    ),
-    "latent_timestep": InputParam(
-        "latent_timestep", type_hint=torch.Tensor, required=True, description="Initial noise level timestep"
-    ),
-    "image_latents": InputParam(
-        "image_latents", type_hint=torch.Tensor, required=True, description="Latents representing reference image"
-    ),
-    "mask": InputParam("mask", type_hint=torch.Tensor, required=True, description="Mask for inpainting"),
-    "masked_image_latents": InputParam(
-        "masked_image_latents", type_hint=torch.Tensor, description="Masked image latents for inpainting"
-    ),
-    "add_time_ids": InputParam(
-        "add_time_ids", type_hint=torch.Tensor, required=True, description="Time ids for conditioning"
-    ),
-    "negative_add_time_ids": InputParam(
-        "negative_add_time_ids", type_hint=torch.Tensor, description="Negative time ids"
-    ),
-    "timestep_cond": InputParam("timestep_cond", type_hint=torch.Tensor, description="Timestep conditioning for LCM"),
-    "noise": InputParam("noise", type_hint=torch.Tensor, description="Noise added to image latents"),
-    "crops_coords": InputParam("crops_coords", type_hint=Optional[Tuple[int]], description="Crop coordinates"),
-    "ip_adapter_embeds": InputParam(
-        "ip_adapter_embeds", type_hint=List[torch.Tensor], description="Image embeddings for IP-Adapter"
-    ),
-    "negative_ip_adapter_embeds": InputParam(
-        "negative_ip_adapter_embeds",
-        type_hint=List[torch.Tensor],
-        description="Negative image embeddings for IP-Adapter",
-    ),
-    "images": InputParam(
-        "images",
-        type_hint=Union[List[PIL.Image.Image], List[torch.Tensor], List[np.array]],
-        required=True,
-        description="Generated images",
-    ),
-}
-
-SDXL_PARAM_SCHEMA = {**SDXL_INPUTS_SCHEMA, **SDXL_INTERMEDIATE_INPUTS_SCHEMA}
-
-
-DEFAULT_PARAM_MAPS = {
-    "prompt": {
-        "label": "Prompt",
-        "type": "string",
-        "default": "a bear sitting in a chair drinking a milkshake",
-        "display": "textarea",
-    },
-    "negative_prompt": {
-        "label": "Negative Prompt",
-        "type": "string",
-        "default": "deformed, ugly, wrong proportion, low res, bad anatomy, worst quality, low quality",
-        "display": "textarea",
-    },
-    "num_inference_steps": {
-        "label": "Steps",
-        "type": "int",
-        "default": 25,
-        "min": 1,
-        "max": 1000,
-    },
-    "seed": {
-        "label": "Seed",
-        "type": "int",
-        "default": 0,
-        "min": 0,
-        "display": "random",
-    },
-    "width": {
-        "label": "Width",
-        "type": "int",
-        "display": "text",
-        "default": 1024,
-        "min": 8,
-        "max": 8192,
-        "step": 8,
-        "group": "dimensions",
-    },
-    "height": {
-        "label": "Height",
-        "type": "int",
-        "display": "text",
-        "default": 1024,
-        "min": 8,
-        "max": 8192,
-        "step": 8,
-        "group": "dimensions",
-    },
-    "images": {
-        "label": "Images",
-        "type": "image",
-        "display": "output",
-    },
-    "image": {
-        "label": "Image",
-        "type": "image",
-        "display": "input",
-    },
-}
-
-DEFAULT_TYPE_MAPS = {
-    "int": {
-        "type": "int",
-        "default": 0,
-        "min": 0,
-    },
-    "float": {
-        "type": "float",
-        "default": 0.0,
-        "min": 0.0,
-    },
-    "str": {
-        "type": "string",
-        "default": "",
-    },
-    "bool": {
-        "type": "boolean",
-        "default": False,
-    },
-    "image": {
-        "type": "image",
-    },
-}
-
-DEFAULT_MODEL_KEYS = ["unet", "vae", "text_encoder", "tokenizer", "controlnet", "transformer", "image_encoder"]
-DEFAULT_CATEGORY = "Modular Diffusers"
-DEFAULT_EXCLUDE_MODEL_KEYS = ["processor", "feature_extractor", "safety_checker"]
-DEFAULT_PARAMS_GROUPS_KEYS = {
-    "text_encoders": ["text_encoder", "tokenizer"],
-    "ip_adapter_embeds": ["ip_adapter_embeds"],
-    "prompt_embeddings": ["prompt_embeds"],
-}
-
-
-def get_group_name(name, group_params_keys=DEFAULT_PARAMS_GROUPS_KEYS):
-    """
-    Get the group name for a given parameter name, if not part of a group, return None e.g. "prompt_embeds" ->
-    "text_embeds", "text_encoder" -> "text_encoders", "prompt" -> None
-    """
-    if name is None:
-        return None
-    for group_name, group_keys in group_params_keys.items():
-        for group_key in group_keys:
-            if group_key in name:
-                return group_name
-    return None
-
-
-class ModularNode(ConfigMixin):
-    """
-    A ModularNode is a base class to build UI nodes using diffusers. Currently only supports Mellon. It is a wrapper
-    around a ModularPipelineBlocks object.
-
-    <Tip warning={true}>
-
-        This is an experimental feature and is likely to change in the future.
-
-    </Tip>
-    """
-
-    config_name = "node_config.json"
-
-    @classmethod
-    def from_pretrained(
-        cls,
-        pretrained_model_name_or_path: str,
-        trust_remote_code: Optional[bool] = None,
-        **kwargs,
-    ):
-        blocks = ModularPipelineBlocks.from_pretrained(
-            pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
-        )
-        return cls(blocks, **kwargs)
-
-    def __init__(self, blocks, category=DEFAULT_CATEGORY, label=None, **kwargs):
-        self.blocks = blocks
-
-        if label is None:
-            label = self.blocks.__class__.__name__
-        # blocks param name -> mellon param name
-        self.name_mapping = {}
-
-        input_params = {}
-        # pass or create a default param dict for each input
-        # e.g. for prompt,
-        #       prompt = {
-        #               "name": "text_input", # the name of the input in node definition, could be different from the input name in diffusers
-        #               "label": "Prompt",
-        #               "type": "string",
-        #               "default": "a bear sitting in a chair drinking a milkshake",
-        #               "display": "textarea"}
-        # if type is not specified, it'll be a "custom" param of its own type
-        # e.g. you can pass ModularNode(scheduler = {name :"scheduler"})
-        #  it will get this spec in node definition {"scheduler": {"label": "Scheduler", "type": "scheduler", "display": "input"}}
-        #  name can be a dict, in that case, it is part of a "dict" input in mellon nodes, e.g. text_encoder= {name: {"text_encoders": "text_encoder"}}
-        inputs = self.blocks.inputs + self.blocks.intermediate_inputs
-        for inp in inputs:
-            param = kwargs.pop(inp.name, None)
-            if param:
-                # user can pass a param dict for all inputs, e.g. ModularNode(prompt = {...})
-                input_params[inp.name] = param
-                mellon_name = param.pop("name", inp.name)
-                if mellon_name != inp.name:
-                    self.name_mapping[inp.name] = mellon_name
-                continue
-
-            if inp.name not in DEFAULT_PARAM_MAPS and not inp.required and not get_group_name(inp.name):
-                continue
-
-            if inp.name in DEFAULT_PARAM_MAPS:
-                # first check if it's in the default param map, if so, directly use that
-                param = DEFAULT_PARAM_MAPS[inp.name].copy()
-            elif get_group_name(inp.name):
-                param = get_group_name(inp.name)
-                if inp.name not in self.name_mapping:
-                    self.name_mapping[inp.name] = param
-            else:
-                # if not, check if it's in the SDXL input schema, if so,
-                # 1. use the type hint to determine the type
-                # 2. use the default param dict for the type e.g. if "steps" is a "int" type, {"steps": {"type": "int", "default": 0, "min": 0}}
-                if inp.type_hint is not None:
-                    type_str = str(inp.type_hint).lower()
-                else:
-                    inp_spec = SDXL_PARAM_SCHEMA.get(inp.name, None)
-                    type_str = str(inp_spec.type_hint).lower() if inp_spec else ""
-                for type_key, type_param in DEFAULT_TYPE_MAPS.items():
-                    if type_key in type_str:
-                        param = type_param.copy()
-                        param["label"] = inp.name
-                        param["display"] = "input"
-                        break
-                else:
-                    param = inp.name
-            # add the param dict to the inp_params dict
-            input_params[inp.name] = param
-
-        component_params = {}
-        for comp in self.blocks.expected_components:
-            param = kwargs.pop(comp.name, None)
-            if param:
-                component_params[comp.name] = param
-                mellon_name = param.pop("name", comp.name)
-                if mellon_name != comp.name:
-                    self.name_mapping[comp.name] = mellon_name
-                continue
-
-            to_exclude = False
-            for exclude_key in DEFAULT_EXCLUDE_MODEL_KEYS:
-                if exclude_key in comp.name:
-                    to_exclude = True
-                    break
-            if to_exclude:
-                continue
-
-            if get_group_name(comp.name):
-                param = get_group_name(comp.name)
-                if comp.name not in self.name_mapping:
-                    self.name_mapping[comp.name] = param
-            elif comp.name in DEFAULT_MODEL_KEYS:
-                param = {"label": comp.name, "type": "diffusers_auto_model", "display": "input"}
-            else:
-                param = comp.name
-            # add the param dict to the model_params dict
-            component_params[comp.name] = param
-
-        output_params = {}
-        if isinstance(self.blocks, SequentialPipelineBlocks):
-            last_block_name = list(self.blocks.sub_blocks.keys())[-1]
-            outputs = self.blocks.sub_blocks[last_block_name].intermediate_outputs
-        else:
-            outputs = self.blocks.intermediate_outputs
-
-        for out in outputs:
-            param = kwargs.pop(out.name, None)
-            if param:
-                output_params[out.name] = param
-                mellon_name = param.pop("name", out.name)
-                if mellon_name != out.name:
-                    self.name_mapping[out.name] = mellon_name
-                continue
-
-            if out.name in DEFAULT_PARAM_MAPS:
-                param = DEFAULT_PARAM_MAPS[out.name].copy()
-                param["display"] = "output"
-            else:
-                group_name = get_group_name(out.name)
-                if group_name:
-                    param = group_name
-                    if out.name not in self.name_mapping:
-                        self.name_mapping[out.name] = param
-                else:
-                    param = out.name
-            # add the param dict to the outputs dict
-            output_params[out.name] = param
-
-        if len(kwargs) > 0:
-            logger.warning(f"Unused kwargs: {kwargs}")
-
-        register_dict = {
-            "category": category,
-            "label": label,
-            "input_params": input_params,
-            "component_params": component_params,
-            "output_params": output_params,
-            "name_mapping": self.name_mapping,
-        }
-        self.register_to_config(**register_dict)
-
-    def setup(self, components_manager, collection=None):
-        self.pipeline = self.blocks.init_pipeline(components_manager=components_manager, collection=collection)
-        self._components_manager = components_manager
-
-    @property
-    def mellon_config(self):
-        return self._convert_to_mellon_config()
-
-    def _convert_to_mellon_config(self):
-        node = {}
-        node["label"] = self.config.label
-        node["category"] = self.config.category
-
-        node_param = {}
-        for inp_name, inp_param in self.config.input_params.items():
-            if inp_name in self.name_mapping:
-                mellon_name = self.name_mapping[inp_name]
-            else:
-                mellon_name = inp_name
-            if isinstance(inp_param, str):
-                param = {
-                    "label": inp_param,
-                    "type": inp_param,
-                    "display": "input",
-                }
-            else:
-                param = inp_param
-
-            if mellon_name not in node_param:
-                node_param[mellon_name] = param
-            else:
-                logger.debug(f"Input param {mellon_name} already exists in node_param, skipping {inp_name}")
-
-        for comp_name, comp_param in self.config.component_params.items():
-            if comp_name in self.name_mapping:
-                mellon_name = self.name_mapping[comp_name]
-            else:
-                mellon_name = comp_name
-            if isinstance(comp_param, str):
-                param = {
-                    "label": comp_param,
-                    "type": comp_param,
-                    "display": "input",
-                }
-            else:
-                param = comp_param
-
-            if mellon_name not in node_param:
-                node_param[mellon_name] = param
-            else:
-                logger.debug(f"Component param {comp_param} already exists in node_param, skipping {comp_name}")
-
-        for out_name, out_param in self.config.output_params.items():
-            if out_name in self.name_mapping:
-                mellon_name = self.name_mapping[out_name]
-            else:
-                mellon_name = out_name
-            if isinstance(out_param, str):
-                param = {
-                    "label": out_param,
-                    "type": out_param,
-                    "display": "output",
-                }
-            else:
-                param = out_param
-
-            if mellon_name not in node_param:
-                node_param[mellon_name] = param
-            else:
-                logger.debug(f"Output param {out_param} already exists in node_param, skipping {out_name}")
-        node["params"] = node_param
-        return node
-
-    def save_mellon_config(self, file_path):
-        """
-        Save the Mellon configuration to a JSON file.
-
-        Args:
-            file_path (str or Path): Path where the JSON file will be saved
-
-        Returns:
-            Path: Path to the saved config file
-        """
-        file_path = Path(file_path)
-
-        # Create directory if it doesn't exist
-        os.makedirs(file_path.parent, exist_ok=True)
-
-        # Create a combined dictionary with module definition and name mapping
-        config = {"module": self.mellon_config, "name_mapping": self.name_mapping}
-
-        # Save the config to file
-        with open(file_path, "w", encoding="utf-8") as f:
-            json.dump(config, f, indent=2)
-
-        logger.info(f"Mellon config and name mapping saved to {file_path}")
-
-        return file_path
-
-    @classmethod
-    def load_mellon_config(cls, file_path):
-        """
-        Load a Mellon configuration from a JSON file.
-
-        Args:
-            file_path (str or Path): Path to the JSON file containing Mellon config
-
-        Returns:
-            dict: The loaded combined configuration containing 'module' and 'name_mapping'
-        """
-        file_path = Path(file_path)
-
-        if not file_path.exists():
-            raise FileNotFoundError(f"Config file not found: {file_path}")
-
-        with open(file_path, "r", encoding="utf-8") as f:
-            config = json.load(f)
-
-        logger.info(f"Mellon config loaded from {file_path}")
-
-        return config
-
-    def process_inputs(self, **kwargs):
-        params_components = {}
-        for comp_name, comp_param in self.config.component_params.items():
-            logger.debug(f"component: {comp_name}")
-            mellon_comp_name = self.name_mapping.get(comp_name, comp_name)
-            if mellon_comp_name in kwargs:
-                if isinstance(kwargs[mellon_comp_name], dict) and comp_name in kwargs[mellon_comp_name]:
-                    comp = kwargs[mellon_comp_name].pop(comp_name)
-                else:
-                    comp = kwargs.pop(mellon_comp_name)
-                if comp:
-                    params_components[comp_name] = self._components_manager.get_one(comp["model_id"])
-
-        params_run = {}
-        for inp_name, inp_param in self.config.input_params.items():
-            logger.debug(f"input: {inp_name}")
-            mellon_inp_name = self.name_mapping.get(inp_name, inp_name)
-            if mellon_inp_name in kwargs:
-                if isinstance(kwargs[mellon_inp_name], dict) and inp_name in kwargs[mellon_inp_name]:
-                    inp = kwargs[mellon_inp_name].pop(inp_name)
-                else:
-                    inp = kwargs.pop(mellon_inp_name)
-                if inp is not None:
-                    params_run[inp_name] = inp
-
-        return_output_names = list(self.config.output_params.keys())
-
-        return params_components, params_run, return_output_names
-
-    def execute(self, **kwargs):
-        params_components, params_run, return_output_names = self.process_inputs(**kwargs)
-
-        self.pipeline.update_components(**params_components)
-        output = self.pipeline(**params_run, output=return_output_names)
-        return output
--- a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
+++ b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
@@ -577,9 +577,8 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
    def inputs(self) -> List[InputParam]:
        return [
            InputParam(name="batch_size", required=True),
-            InputParam(
-                name="resized_image", required=True, type_hint=torch.Tensor, description="The resized image input"
-            ),
+            InputParam(name="image_height", required=True),
+            InputParam(name="image_width", required=True),
            InputParam(name="height", required=True),
            InputParam(name="width", required=True),
            InputParam(name="prompt_embeds_mask"),
@@ -612,10 +611,6 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
        block_state = self.get_block_state(state)

        # for edit, image size can be different from the target size (height/width)
-        image = (
-            block_state.resized_image[0] if isinstance(block_state.resized_image, list) else block_state.resized_image
-        )
-        image_width, image_height = image.size

        block_state.img_shapes = [
            [
@@ -624,7 +619,11 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
                    block_state.height // components.vae_scale_factor // 2,
                    block_state.width // components.vae_scale_factor // 2,
                ),
-                (1, image_height // components.vae_scale_factor // 2, image_width // components.vae_scale_factor // 2),
+                (
+                    1,
+                    block_state.image_height // components.vae_scale_factor // 2,
+                    block_state.image_width // components.vae_scale_factor // 2,
+                ),
            ]
        ] * block_state.batch_size

--- a/src/diffusers/modular_pipelines/qwenimage/encoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -496,7 +496,7 @@ class QwenImageEditTextEncoderStep(ModularPipelineBlocks):
        )

        if components.requires_unconditional_embeds:
-            negative_prompt = block_state.negative_prompt or ""
+            negative_prompt = block_state.negative_prompt or " "
            block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit(
                components.text_encoder,
                components.processor,
--- a/src/diffusers/modular_pipelines/qwenimage/inputs.py
+++ b/src/diffusers/modular_pipelines/qwenimage/inputs.py
@@ -307,6 +307,13 @@ class QwenImageInputsDynamicStep(ModularPipelineBlocks):

        return inputs

+    @property
+    def intermediate_outputs(self) -> List[OutputParam]:
+        return [
+            OutputParam(name="image_height", type_hint=int, description="The height of the image latents"),
+            OutputParam(name="image_width", type_hint=int, description="The width of the image latents"),
+        ]
+
    @property
    def expected_components(self) -> List[ComponentSpec]:
        return [
@@ -327,6 +334,11 @@ class QwenImageInputsDynamicStep(ModularPipelineBlocks):
            block_state.height = block_state.height or height
            block_state.width = block_state.width or width

+            if not hasattr(block_state, "image_height"):
+                block_state.image_height = height
+            if not hasattr(block_state, "image_width"):
+                block_state.image_width = width
+
            # 2. Patchify the image latent tensor
            image_latent_tensor = components.pachifier.pack_latents(image_latent_tensor)

--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py
@@ -511,17 +511,42 @@ class QwenImageAutoDecodeStep(AutoPipelineBlocks):
        )


+class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
+    model_name = "qwenimage"
+    block_classes = [
+        QwenImageAutoInputStep,
+        QwenImageOptionalControlNetInputStep,
+        QwenImageAutoBeforeDenoiseStep,
+        QwenImageOptionalControlNetBeforeDenoiseStep,
+        QwenImageAutoDenoiseStep,
+    ]
+    block_names = ["input", "controlnet_input", "before_denoise", "controlnet_before_denoise", "denoise", "decode"]
+
+    @property
+    def description(self):
+        return (
+            "Core step that performs the denoising process. \n"
+            + " - `QwenImageAutoInputStep` (input) standardizes the inputs for the denoising step.\n"
+            + " - `QwenImageOptionalControlNetInputStep` (controlnet_input) prepares the controlnet input.\n"
+            + " - `QwenImageAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
+            + " - `QwenImageOptionalControlNetBeforeDenoiseStep` (controlnet_before_denoise) prepares the controlnet input for the denoising step.\n"
+            + " - `QwenImageAutoDenoiseStep` (denoise) iteratively denoises the latents.\n"
+            + " - `QwenImageAutoDecodeStep` (decode) decodes the latents into images.\n\n"
+            + "This step support text-to-image, image-to-image, inpainting, and controlnet tasks for QwenImage:\n"
+            + " - for image-to-image generation, you need to provide `image_latents`\n"
+            + " - for inpainting, you need to provide `processed_mask_image` and `image_latents`\n"
+            + " - to run the controlnet workflow, you need to provide `control_image_latents`\n"
+            + " - for text-to-image generation, all you need to provide is prompt embeddings"
+        )
+
+
 ## 1.10 QwenImage/auto block & presets
 AUTO_BLOCKS = InsertableDict(
    [
        ("text_encoder", QwenImageTextEncoderStep()),
        ("vae_encoder", QwenImageAutoVaeEncoderStep()),
        ("controlnet_vae_encoder", QwenImageOptionalControlNetVaeEncoderStep()),
-        ("input", QwenImageAutoInputStep()),
-        ("controlnet_input", QwenImageOptionalControlNetInputStep()),
-        ("before_denoise", QwenImageAutoBeforeDenoiseStep()),
-        ("controlnet_before_denoise", QwenImageOptionalControlNetBeforeDenoiseStep()),
-        ("denoise", QwenImageAutoDenoiseStep()),
+        ("denoise", QwenImageCoreDenoiseStep()),
        ("decode", QwenImageAutoDecodeStep()),
    ]
 )
@@ -699,7 +724,7 @@ class QwenImageEditAutoVaeEncoderStep(AutoPipelineBlocks):
 class QwenImageEditAutoInputStep(AutoPipelineBlocks):
    block_classes = [QwenImageInpaintInputStep, QwenImageEditInputStep]
    block_names = ["edit_inpaint", "edit"]
-    block_trigger_inputs = ["processed_mask_image", "image"]
+    block_trigger_inputs = ["processed_mask_image", "image_latents"]

    @property
    def description(self):
@@ -800,13 +825,34 @@ class QwenImageEditAutoDenoiseStep(AutoPipelineBlocks):

 ## 2.7 QwenImage-Edit/auto blocks & presets

+
+class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
+    model_name = "qwenimage-edit"
+    block_classes = [
+        QwenImageEditAutoInputStep,
+        QwenImageEditAutoBeforeDenoiseStep,
+        QwenImageEditAutoDenoiseStep,
+    ]
+    block_names = ["input", "before_denoise", "denoise"]
+
+    @property
+    def description(self):
+        return (
+            "Core step that performs the denoising process. \n"
+            + " - `QwenImageEditAutoInputStep` (input) standardizes the inputs for the denoising step.\n"
+            + " - `QwenImageEditAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
+            + " - `QwenImageEditAutoDenoiseStep` (denoise) iteratively denoises the latents.\n\n"
+            + "This step support edit (img2img) and edit inpainting workflow for QwenImage Edit:\n"
+            + " - When `processed_mask_image` is provided, it will be used for edit inpainting task.\n"
+            + " - When `image_latents` is provided, it will be used for edit (img2img) task.\n"
+        )
+
+
 EDIT_AUTO_BLOCKS = InsertableDict(
    [
        ("text_encoder", QwenImageEditVLEncoderStep()),
        ("vae_encoder", QwenImageEditAutoVaeEncoderStep()),
-        ("input", QwenImageEditAutoInputStep()),
-        ("before_denoise", QwenImageEditAutoBeforeDenoiseStep()),
-        ("denoise", QwenImageEditAutoDenoiseStep()),
+        ("denoise", QwenImageEditCoreDenoiseStep()),
        ("decode", QwenImageAutoDecodeStep()),
    ]
 )
--- a/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_pipeline.py
@@ -104,6 +104,8 @@ class QwenImageModularPipeline(ModularPipeline, QwenImageLoraLoaderMixin):
    </Tip>
    """

+    default_blocks_name = "QwenImageAutoBlocks"
+
    @property
    def default_height(self):
        return self.default_sample_size * self.vae_scale_factor
@@ -158,6 +160,8 @@ class QwenImageEditModularPipeline(ModularPipeline, QwenImageLoraLoaderMixin):
    </Tip>
    """

+    default_blocks_name = "QwenImageEditAutoBlocks"
+
    # YiYi TODO: qwen edit should not provide default height/width, should be derived from the resized input image (after adjustment) produced by the resize step.
    @property
    def default_height(self):
--- a/src/diffusers/modular_pipelines/qwenimage/node_utils.py
+++ b/src/diffusers/modular_pipelines/qwenimage/node_utils.py
@@ -0,0 +1,95 @@
+# Copyright 2025 Qwen-Image Team and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# mellon nodes
+QwenImage_NODE_TYPES_PARAMS_MAP = {
+    "controlnet": {
+        "inputs": [
+            "control_image",
+            "controlnet_conditioning_scale",
+            "control_guidance_start",
+            "control_guidance_end",
+            "height",
+            "width",
+        ],
+        "model_inputs": [
+            "controlnet",
+            "vae",
+        ],
+        "outputs": [
+            "controlnet_out",
+        ],
+        "block_names": ["controlnet_vae_encoder"],
+    },
+    "denoise": {
+        "inputs": [
+            "embeddings",
+            "width",
+            "height",
+            "seed",
+            "num_inference_steps",
+            "guidance_scale",
+            "image_latents",
+            "strength",
+            "controlnet",
+        ],
+        "model_inputs": [
+            "unet",
+            "guider",
+            "scheduler",
+        ],
+        "outputs": [
+            "latents",
+            "latents_preview",
+        ],
+        "block_names": ["denoise"],
+    },
+    "vae_encoder": {
+        "inputs": [
+            "image",
+            "width",
+            "height",
+        ],
+        "model_inputs": [
+            "vae",
+        ],
+        "outputs": [
+            "image_latents",
+        ],
+    },
+    "text_encoder": {
+        "inputs": [
+            "prompt",
+            "negative_prompt",
+        ],
+        "model_inputs": [
+            "text_encoders",
+        ],
+        "outputs": [
+            "embeddings",
+        ],
+    },
+    "decoder": {
+        "inputs": [
+            "latents",
+        ],
+        "model_inputs": [
+            "vae",
+        ],
+        "outputs": [
+            "images",
+        ],
+    },
+}
--- a/src/diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py
+++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py
@@ -262,37 +262,37 @@ class StableDiffusionXLInputStep(ModularPipelineBlocks):
            OutputParam(
                "prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",  # already in intermedites state but declare here again for guider_input_fields
+                kwargs_type="denoiser_input_fields",  # already in intermedites state but declare here again for denoiser_input_fields
                description="text embeddings used to guide the image generation",
            ),
            OutputParam(
                "negative_prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",  # already in intermedites state but declare here again for guider_input_fields
+                kwargs_type="denoiser_input_fields",  # already in intermedites state but declare here again for denoiser_input_fields
                description="negative text embeddings used to guide the image generation",
            ),
            OutputParam(
                "pooled_prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",  # already in intermedites state but declare here again for guider_input_fields
+                kwargs_type="denoiser_input_fields",  # already in intermedites state but declare here again for denoiser_input_fields
                description="pooled text embeddings used to guide the image generation",
            ),
            OutputParam(
                "negative_pooled_prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",  # already in intermedites state but declare here again for guider_input_fields
+                kwargs_type="denoiser_input_fields",  # already in intermedites state but declare here again for denoiser_input_fields
                description="negative pooled text embeddings used to guide the image generation",
            ),
            OutputParam(
                "ip_adapter_embeds",
                type_hint=List[torch.Tensor],
-                kwargs_type="guider_input_fields",  # already in intermedites state but declare here again for guider_input_fields
+                kwargs_type="denoiser_input_fields",  # already in intermedites state but declare here again for denoiser_input_fields
                description="image embeddings for IP-Adapter",
            ),
            OutputParam(
                "negative_ip_adapter_embeds",
                type_hint=List[torch.Tensor],
-                kwargs_type="guider_input_fields",  # already in intermedites state but declare here again for guider_input_fields
+                kwargs_type="denoiser_input_fields",  # already in intermedites state but declare here again for denoiser_input_fields
                description="negative image embeddings for IP-Adapter",
            ),
        ]
@@ -1120,13 +1120,13 @@ class StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep(ModularPipelineB
            OutputParam(
                "add_time_ids",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="The time ids to condition the denoising process",
            ),
            OutputParam(
                "negative_add_time_ids",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="The negative time ids to condition the denoising process",
            ),
            OutputParam("timestep_cond", type_hint=torch.Tensor, description="The timestep cond to use for LCM"),
@@ -1331,13 +1331,13 @@ class StableDiffusionXLPrepareAdditionalConditioningStep(ModularPipelineBlocks):
            OutputParam(
                "add_time_ids",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="The time ids to condition the denoising process",
            ),
            OutputParam(
                "negative_add_time_ids",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="The negative time ids to condition the denoising process",
            ),
            OutputParam("timestep_cond", type_hint=torch.Tensor, description="The timestep cond to use for LCM"),
--- a/src/diffusers/modular_pipelines/stable_diffusion_xl/denoise.py
+++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/denoise.py
@@ -183,14 +183,14 @@ class StableDiffusionXLLoopDenoiser(ModularPipelineBlocks):
                description="The guidance scale embedding to use for Latent Consistency Models(LCMs). Can be generated in prepare_additional_conditioning step.",
            ),
            InputParam(
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description=(
                    "All conditional model inputs that need to be prepared with guider. "
                    "It should contain prompt_embeds/negative_prompt_embeds, "
                    "add_time_ids/negative_add_time_ids, "
                    "pooled_prompt_embeds/negative_pooled_prompt_embeds, "
                    "and ip_adapter_embeds/negative_ip_adapter_embeds (optional)."
-                    "please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
+                    "please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
                ),
            ),
        ]
@@ -307,14 +307,14 @@ class StableDiffusionXLControlNetLoopDenoiser(ModularPipelineBlocks):
                description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
            ),
            InputParam(
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description=(
                    "All conditional model inputs that need to be prepared with guider. "
                    "It should contain prompt_embeds/negative_prompt_embeds, "
                    "add_time_ids/negative_add_time_ids, "
                    "pooled_prompt_embeds/negative_pooled_prompt_embeds, "
                    "and ip_adapter_embeds/negative_ip_adapter_embeds (optional)."
-                    "please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
+                    "please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
                ),
            ),
            InputParam(
--- a/src/diffusers/modular_pipelines/stable_diffusion_xl/encoders.py
+++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/encoders.py
@@ -258,25 +258,25 @@ class StableDiffusionXLTextEncoderStep(ModularPipelineBlocks):
            OutputParam(
                "prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="text embeddings used to guide the image generation",
            ),
            OutputParam(
                "negative_prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="negative text embeddings used to guide the image generation",
            ),
            OutputParam(
                "pooled_prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="pooled text embeddings used to guide the image generation",
            ),
            OutputParam(
                "negative_pooled_prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="negative pooled text embeddings used to guide the image generation",
            ),
        ]
--- a/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py
+++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py
@@ -82,19 +82,17 @@ class StableDiffusionXLAutoIPAdapterStep(AutoPipelineBlocks):
 # before_denoise: text2img
 class StableDiffusionXLBeforeDenoiseStep(SequentialPipelineBlocks):
    block_classes = [
-        StableDiffusionXLInputStep,
        StableDiffusionXLSetTimestepsStep,
        StableDiffusionXLPrepareLatentsStep,
        StableDiffusionXLPrepareAdditionalConditioningStep,
    ]
-    block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
+    block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]

    @property
    def description(self):
        return (
            "Before denoise step that prepare the inputs for the denoise step.\n"
            + "This is a sequential pipeline blocks:\n"
-            + " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
            + " - `StableDiffusionXLSetTimestepsStep` is used to set the timesteps\n"
            + " - `StableDiffusionXLPrepareLatentsStep` is used to prepare the latents\n"
            + " - `StableDiffusionXLPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
@@ -104,19 +102,17 @@ class StableDiffusionXLBeforeDenoiseStep(SequentialPipelineBlocks):
 # before_denoise: img2img
 class StableDiffusionXLImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
    block_classes = [
-        StableDiffusionXLInputStep,
        StableDiffusionXLImg2ImgSetTimestepsStep,
        StableDiffusionXLImg2ImgPrepareLatentsStep,
        StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep,
    ]
-    block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
+    block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]

    @property
    def description(self):
        return (
            "Before denoise step that prepare the inputs for the denoise step for img2img task.\n"
            + "This is a sequential pipeline blocks:\n"
-            + " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
            + " - `StableDiffusionXLImg2ImgSetTimestepsStep` is used to set the timesteps\n"
            + " - `StableDiffusionXLImg2ImgPrepareLatentsStep` is used to prepare the latents\n"
            + " - `StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
@@ -126,19 +122,17 @@ class StableDiffusionXLImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
 # before_denoise: inpainting
 class StableDiffusionXLInpaintBeforeDenoiseStep(SequentialPipelineBlocks):
    block_classes = [
-        StableDiffusionXLInputStep,
        StableDiffusionXLImg2ImgSetTimestepsStep,
        StableDiffusionXLInpaintPrepareLatentsStep,
        StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep,
    ]
-    block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
+    block_names = ["set_timesteps", "prepare_latents", "prepare_add_cond"]

    @property
    def description(self):
        return (
            "Before denoise step that prepare the inputs for the denoise step for inpainting task.\n"
            + "This is a sequential pipeline blocks:\n"
-            + " - `StableDiffusionXLInputStep` is used to adjust the batch size of the model inputs\n"
            + " - `StableDiffusionXLImg2ImgSetTimestepsStep` is used to set the timesteps\n"
            + " - `StableDiffusionXLInpaintPrepareLatentsStep` is used to prepare the latents\n"
            + " - `StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep` is used to prepare the additional conditioning\n"
@@ -255,25 +249,48 @@ class StableDiffusionXLAutoDecodeStep(AutoPipelineBlocks):
        )


+class StableDiffusionXLCoreDenoiseStep(SequentialPipelineBlocks):
+    block_classes = [
+        StableDiffusionXLInputStep,
+        StableDiffusionXLAutoBeforeDenoiseStep,
+        StableDiffusionXLAutoControlNetInputStep,
+        StableDiffusionXLAutoDenoiseStep,
+    ]
+    block_names = ["input", "before_denoise", "controlnet_input", "denoise"]
+
+    @property
+    def description(self):
+        return (
+            "Core step that performs the denoising process. \n"
+            + " - `StableDiffusionXLInputStep` (input) standardizes the inputs for the denoising step.\n"
+            + " - `StableDiffusionXLAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
+            + " - `StableDiffusionXLAutoControlNetInputStep` (controlnet_input) prepares the controlnet input.\n"
+            + " - `StableDiffusionXLAutoDenoiseStep` (denoise) iteratively denoises the latents.\n\n"
+            + "This step support text-to-image, image-to-image, inpainting, with or without controlnet/controlnet_union/ip_adapter for Stable Diffusion XL:\n"
+            + "- for image-to-image generation, you need to provide `image_latents`\n"
+            + "- for inpainting, you need to provide `mask_image` and `image_latents`\n"
+            + "- to run the controlnet workflow, you need to provide `control_image`\n"
+            + "- to run the controlnet_union workflow, you need to provide `control_image` and `control_mode`\n"
+            + "- to run the ip_adapter workflow, you need to load ip_adapter into your unet and provide `ip_adapter_embeds`\n"
+            + "- for text-to-image generation, all you need to provide is prompt embeddings\n"
+        )
+
+
 # ip-adapter, controlnet, text2img, img2img, inpainting
 class StableDiffusionXLAutoBlocks(SequentialPipelineBlocks):
    block_classes = [
        StableDiffusionXLTextEncoderStep,
        StableDiffusionXLAutoIPAdapterStep,
        StableDiffusionXLAutoVaeEncoderStep,
-        StableDiffusionXLAutoBeforeDenoiseStep,
-        StableDiffusionXLAutoControlNetInputStep,
-        StableDiffusionXLAutoDenoiseStep,
+        StableDiffusionXLCoreDenoiseStep,
        StableDiffusionXLAutoDecodeStep,
    ]
    block_names = [
        "text_encoder",
        "ip_adapter",
-        "image_encoder",
-        "before_denoise",
-        "controlnet_input",
+        "vae_encoder",
        "denoise",
-        "decoder",
+        "decode",
    ]

    @property
@@ -321,7 +338,7 @@ TEXT2IMAGE_BLOCKS = InsertableDict(
 IMAGE2IMAGE_BLOCKS = InsertableDict(
    [
        ("text_encoder", StableDiffusionXLTextEncoderStep),
-        ("image_encoder", StableDiffusionXLVaeEncoderStep),
+        ("vae_encoder", StableDiffusionXLVaeEncoderStep),
        ("input", StableDiffusionXLInputStep),
        ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
        ("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep),
@@ -334,7 +351,7 @@ IMAGE2IMAGE_BLOCKS = InsertableDict(
 INPAINT_BLOCKS = InsertableDict(
    [
        ("text_encoder", StableDiffusionXLTextEncoderStep),
-        ("image_encoder", StableDiffusionXLInpaintVaeEncoderStep),
+        ("vae_encoder", StableDiffusionXLInpaintVaeEncoderStep),
        ("input", StableDiffusionXLInputStep),
        ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
        ("prepare_latents", StableDiffusionXLInpaintPrepareLatentsStep),
@@ -361,10 +378,8 @@ AUTO_BLOCKS = InsertableDict(
    [
        ("text_encoder", StableDiffusionXLTextEncoderStep),
        ("ip_adapter", StableDiffusionXLAutoIPAdapterStep),
-        ("image_encoder", StableDiffusionXLAutoVaeEncoderStep),
-        ("before_denoise", StableDiffusionXLAutoBeforeDenoiseStep),
-        ("controlnet_input", StableDiffusionXLAutoControlNetInputStep),
-        ("denoise", StableDiffusionXLAutoDenoiseStep),
+        ("vae_encoder", StableDiffusionXLAutoVaeEncoderStep),
+        ("denoise", StableDiffusionXLCoreDenoiseStep),
        ("decode", StableDiffusionXLAutoDecodeStep),
    ]
 )
--- a/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py
+++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py
@@ -54,6 +54,8 @@ class StableDiffusionXLModularPipeline(
    </Tip>
    """

+    default_blocks_name = "StableDiffusionXLAutoBlocks"
+
    @property
    def default_height(self):
        return self.default_sample_size * self.vae_scale_factor
--- a/src/diffusers/modular_pipelines/stable_diffusion_xl/node_utils.py
+++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/node_utils.py
@@ -0,0 +1,99 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+SDXL_NODE_TYPES_PARAMS_MAP = {
+    "controlnet": {
+        "inputs": [
+            "control_image",
+            "controlnet_conditioning_scale",
+            "control_guidance_start",
+            "control_guidance_end",
+            "height",
+            "width",
+        ],
+        "model_inputs": [
+            "controlnet",
+        ],
+        "outputs": [
+            "controlnet_out",
+        ],
+        "block_names": [None],
+    },
+    "denoise": {
+        "inputs": [
+            "embeddings",
+            "width",
+            "height",
+            "seed",
+            "num_inference_steps",
+            "guidance_scale",
+            "image_latents",
+            "strength",
+            # custom adapters coming in as inputs
+            "controlnet",
+            # ip_adapter is optional and custom; include if available
+            "ip_adapter",
+        ],
+        "model_inputs": [
+            "unet",
+            "guider",
+            "scheduler",
+        ],
+        "outputs": [
+            "latents",
+            "latents_preview",
+        ],
+        "block_names": ["denoise"],
+    },
+    "vae_encoder": {
+        "inputs": [
+            "image",
+            "width",
+            "height",
+        ],
+        "model_inputs": [
+            "vae",
+        ],
+        "outputs": [
+            "image_latents",
+        ],
+        "block_names": ["vae_encoder"],
+    },
+    "text_encoder": {
+        "inputs": [
+            "prompt",
+            "negative_prompt",
+        ],
+        "model_inputs": [
+            "text_encoders",
+        ],
+        "outputs": [
+            "embeddings",
+        ],
+        "block_names": ["text_encoder"],
+    },
+    "decoder": {
+        "inputs": [
+            "latents",
+        ],
+        "model_inputs": [
+            "vae",
+        ],
+        "outputs": [
+            "images",
+        ],
+        "block_names": ["decode"],
+    },
+}
--- a/src/diffusers/modular_pipelines/wan/before_denoise.py
+++ b/src/diffusers/modular_pipelines/wan/before_denoise.py
@@ -146,13 +146,13 @@ class WanInputStep(ModularPipelineBlocks):
            OutputParam(
                "prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",  # already in intermedites state but declare here again for guider_input_fields
+                kwargs_type="denoiser_input_fields",  # already in intermedites state but declare here again for denoiser_input_fields
                description="text embeddings used to guide the image generation",
            ),
            OutputParam(
                "negative_prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",  # already in intermedites state but declare here again for guider_input_fields
+                kwargs_type="denoiser_input_fields",  # already in intermedites state but declare here again for denoiser_input_fields
                description="negative text embeddings used to guide the image generation",
            ),
        ]
--- a/src/diffusers/modular_pipelines/wan/denoise.py
+++ b/src/diffusers/modular_pipelines/wan/denoise.py
@@ -79,11 +79,11 @@ class WanLoopDenoiser(ModularPipelineBlocks):
                description="The number of inference steps to use for the denoising process. Can be generated in set_timesteps step.",
            ),
            InputParam(
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description=(
                    "All conditional model inputs that need to be prepared with guider. "
                    "It should contain prompt_embeds/negative_prompt_embeds. "
-                    "Please add `kwargs_type=guider_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
+                    "Please add `kwargs_type=denoiser_input_fields` to their parameter spec (`OutputParam`) when they are created and added to the pipeline state"
                ),
            ),
        ]
--- a/src/diffusers/modular_pipelines/wan/encoders.py
+++ b/src/diffusers/modular_pipelines/wan/encoders.py
@@ -89,13 +89,13 @@ class WanTextEncoderStep(ModularPipelineBlocks):
            OutputParam(
                "prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="text embeddings used to guide the image generation",
            ),
            OutputParam(
                "negative_prompt_embeds",
                type_hint=torch.Tensor,
-                kwargs_type="guider_input_fields",
+                kwargs_type="denoiser_input_fields",
                description="negative text embeddings used to guide the image generation",
            ),
        ]
--- a/src/diffusers/modular_pipelines/wan/modular_pipeline.py
+++ b/src/diffusers/modular_pipelines/wan/modular_pipeline.py
@@ -37,6 +37,8 @@ class WanModularPipeline(
    </Tip>
    """

+    default_blocks_name = "WanAutoBlocks"
+
    @property
    def default_height(self):
        return self.default_sample_height * self.vae_scale_factor_spatial