diff --git a/src/diffusers/modular_pipelines/modular_pipeline_utils.py b/src/diffusers/modular_pipelines/modular_pipeline_utils.py
index a65aa43b2a..5ef1b98f1b 100644
--- a/src/diffusers/modular_pipelines/modular_pipeline_utils.py
+++ b/src/diffusers/modular_pipelines/modular_pipeline_utils.py
@@ -397,6 +397,7 @@ INPUT_PARAM_TEMPLATES = {
         "description": "Additional kwargs for attention processors.",
     },
     "denoiser_input_fields": {
+        "name": None,
         "kwargs_type": "denoiser_input_fields",
         "description": "conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.",
     },
@@ -509,6 +510,7 @@ OUTPUT_PARAM_TEMPLATES = {
 }
 
 
+@dataclass
 class InputParam:
     """Specification for an input parameter."""
 
@@ -519,20 +521,22 @@ class InputParam:
     description: str = ""
     kwargs_type: str = None
 
-    def __post_init__(self):
-        if self.required and self.default is not None:
-            raise ValueError(f"InputParam '{self.name}' cannot be both required and have a default value")
-
     def __repr__(self):
         return f"<{self.name}: {'required' if self.required else 'optional'}, default={self.default}>"
 
     @classmethod
-    def template(cls, name: str, note: str = None, **overrides) -> "InputParam":
+    def template(cls, template_name: str, note: str = None, **overrides) -> "InputParam":
         """Get template for name if exists, otherwise raise ValueError."""
-        if name not in INPUT_PARAM_TEMPLATES:
-            raise ValueError(f"InputParam template for {name} not found")
+        if template_name not in INPUT_PARAM_TEMPLATES:
+            raise ValueError(f"InputParam template for {template_name} not found")
 
-        template_kwargs = INPUT_PARAM_TEMPLATES[name].copy()
+        template_kwargs = INPUT_PARAM_TEMPLATES[template_name].copy()
+        
+        # Determine the actual param name:
+        # 1. From overrides if provided
+        # 2. From template if present
+        # 3. Fall back to template_name
+        name = overrides.pop("name", template_kwargs.pop("name", template_name))
         
         if note and "description" in template_kwargs:
             template_kwargs["description"] = f"{template_kwargs['description']} ({note})"
@@ -541,6 +545,7 @@ class InputParam:
         return cls(name=name, **template_kwargs)
 
 
+@dataclass
 class OutputParam:
     """Specification for an output parameter."""
 
@@ -555,12 +560,18 @@ class OutputParam:
         )
 
     @classmethod
-    def template(cls, name: str, note: str = None, **overrides) -> "OutputParam":
+    def template(cls, template_name: str, note: str = None, **overrides) -> "OutputParam":
         """Get template for name if exists, otherwise raise ValueError."""
-        if name not in OUTPUT_PARAM_TEMPLATES:
-            raise ValueError(f"OutputParam template for {name} not found")
+        if template_name not in OUTPUT_PARAM_TEMPLATES:
+            raise ValueError(f"OutputParam template for {template_name} not found")
         
-        template_kwargs = OUTPUT_PARAM_TEMPLATES[name].copy()
+        template_kwargs = OUTPUT_PARAM_TEMPLATES[template_name].copy()
+        
+        # Determine the actual param name:
+        # 1. From overrides if provided
+        # 2. From template if present
+        # 3. Fall back to template_name
+        name = overrides.pop("name", template_kwargs.pop("name", template_name))
         
         if note and "description" in template_kwargs:
             template_kwargs["description"] = f"{template_kwargs['description']} ({note})"
diff --git a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
index b87c3555aa..fc795b5f5a 100644
--- a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
+++ b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
@@ -146,8 +146,8 @@ class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
-            OutputParam(name="height", type_hint=int, description="updated to default value if not provided"),
-            OutputParam(name="width", type_hint=int, description="updated to default value if not provided"),
+            OutputParam(name="height", type_hint=int, description="if not set, updated to default value"),
+            OutputParam(name="width", type_hint=int, description="if not set, updated to default value"),
             OutputParam(
                 name="latents",
                 type_hint=torch.Tensor,
@@ -230,8 +230,8 @@ class QwenImageLayeredPrepareLatentsStep(ModularPipelineBlocks):
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
-            OutputParam(name="height", type_hint=int, description="updated to default value if not provided"),
-            OutputParam(name="width", type_hint=int, description="updated to default value if not provided"),
+            OutputParam(name="height", type_hint=int, description="if not set, updated to default value"),
+            OutputParam(name="width", type_hint=int, description="if not set, updated to default value"),
             OutputParam(
                 name="latents",
                 type_hint=torch.Tensor,
@@ -307,8 +307,13 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
                 type_hint=torch.Tensor,
                 description="The initial random noised, can be generated in prepare latent step.",
             ),
-            InputParam.template("image_latents", note="Can be generated from vae encoder and packed in input step."),
-            InputParam.template("timesteps", required=True, note="can be generated in set_timesteps step."),
+            InputParam.template("image_latents", note="Can be generated from vae encoder and updated in input step."),
+            InputParam(
+                name="timesteps", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+            ),
         ]
 
     @property
@@ -322,7 +327,7 @@ class QwenImagePrepareLatentsWithStrengthStep(ModularPipelineBlocks):
             OutputParam(
                 name="latents",
                 type_hint=torch.Tensor,
-                description="The scalednoisy latents to use for inpainting/image-to-image denoising.",
+                description="The scaled noisy latents to use for inpainting/image-to-image denoising.",
             ),
         ]
 
@@ -383,8 +388,8 @@ class QwenImageCreateMaskLatentsStep(ModularPipelineBlocks):
                 type_hint=torch.Tensor,
                 description="The processed mask to use for the inpainting process.",
             ),
-            InputParam.template("height", required=True, note="should be updated in prepare latents step."),
-            InputParam.template("width", required=True, note="should be updated in prepare latents step."),
+            InputParam.template("height", required=True),
+            InputParam.template("width", required=True),
             InputParam.template("dtype"),
         ]
 
@@ -447,7 +452,12 @@ class QwenImageSetTimestepsStep(ModularPipelineBlocks):
         return [
             InputParam.template("num_inference_steps"),
             InputParam.template("sigmas"),
-            InputParam.template("latents", required=True, description="The initial random noised latents for the denoising process, used to calculate the image sequence length. Can be generated in prepare latents step."),
+            InputParam(
+                name="latents", 
+                required=True,
+                type_hint=torch.Tensor,
+                description="The initial random noised latents for the denoising process. Can be generated in prepare latents step."
+            ),
         ]
 
     @property
@@ -456,7 +466,6 @@ class QwenImageSetTimestepsStep(ModularPipelineBlocks):
             OutputParam(
                 name="timesteps", type_hint=torch.Tensor, description="The timesteps to use for the denoising process"
             ),
-            OutputParam(name="num_inference_steps", type_hint=int, description="The number of denoising steps to perform at inference time"),
         ]
 
     def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -> PipelineState:
@@ -515,8 +524,11 @@ class QwenImageLayeredSetTimestepsStep(ModularPipelineBlocks):
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
-            OutputParam(name="timesteps", type_hint=torch.Tensor, description="The timesteps to use for the denoising process"),
-            OutputParam(name="num_inference_steps", type_hint=int, description="The number of denoising steps to perform at inference time"),
+            OutputParam(
+                name="timesteps", 
+                type_hint=torch.Tensor, 
+                description="The timesteps to use for the denoising process."
+            ),
         ]
 
     @torch.no_grad()
@@ -568,7 +580,12 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
         return [
             InputParam.template("num_inference_steps"),
             InputParam.template("sigmas"),
-            InputParam.template("latents", required=True, description="The latents to use for the denoising process. Can be generated in prepare latents step."),
+            InputParam(
+                "latents", 
+                required=True, 
+                type_hint=torch.Tensor,
+                description="The latents to use for the denoising process. Can be generated in prepare latents step."
+            ),
             InputParam.template("strength", default=0.9),
         ]
 
@@ -583,7 +600,7 @@ class QwenImageSetTimestepsWithStrengthStep(ModularPipelineBlocks):
             OutputParam(
                 name="num_inference_steps",
                 type_hint=int,
-                description="The number of denoising steps to perform at inference time",
+                description="The number of denoising steps to perform at inference time. Updated based on strength.",
             ),
         ]
 
@@ -643,8 +660,8 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam.template("batch_size"),
-            InputParam.template("height", note="should be updated in prepare latents step."),
-            InputParam.template("width", note="should be updated in prepare latents step."),
+            InputParam.template("height", required=True),
+            InputParam.template("width", required=True),
             InputParam.template("prompt_embeds_mask"),
             InputParam.template("negative_prompt_embeds_mask"),
         ]
@@ -711,8 +728,8 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
             InputParam.template("batch_size"),
             InputParam(name="image_height", required=True, type_hint=int, description="The height of the reference image. Can be generated in input step."),
             InputParam(name="image_width", required=True, type_hint=int, description="The width of the reference image. Can be generated in input step."),
-            InputParam.template("height", required=True, note="should be updated in prepare latents step."),
-            InputParam.template("width", required=True, note="should be updated in prepare latents step."),
+            InputParam.template("height", required=True),
+            InputParam.template("width", required=True),
             InputParam.template("prompt_embeds_mask"),
             InputParam.template("negative_prompt_embeds_mask"),
         ]
@@ -788,10 +805,10 @@ class QwenImageEditPlusRoPEInputsStep(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam.template("batch_size"),
-            InputParam(name="image_height", required=True, type_hint=List[int], descrption="The heights of the reference images. Can be generated in input step."),
+            InputParam(name="image_height", required=True, type_hint=List[int], description="The heights of the reference images. Can be generated in input step."),
             InputParam(name="image_width", required=True, type_hint=List[int], description="The widths of the reference images. Can be generated in input step."),
-            InputParam.template("height", required=True, note="should be updated in prepare latents step."),
-            InputParam.template("width", required=True, note="should be updated in prepare latents step."),
+            InputParam.template("height", required=True),
+            InputParam.template("width", required=True),
             InputParam.template("prompt_embeds_mask"),
             InputParam.template("negative_prompt_embeds_mask"),
         ]
@@ -863,8 +880,8 @@ class QwenImageLayeredRoPEInputsStep(ModularPipelineBlocks):
         return [
             InputParam.template("batch_size"),
             InputParam.template("layers"),
-            InputParam.template("height", required=True, note="should be updated in prepare latents step."),
-            InputParam.template("width", required=True, note="should be updated in prepare latents step."),
+            InputParam.template("height", required=True),
+            InputParam.template("width", required=True),
             InputParam.template("prompt_embeds_mask"),
             InputParam.template("negative_prompt_embeds_mask"),
         ]
@@ -950,8 +967,18 @@ class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
             InputParam.template("control_guidance_start"),
             InputParam.template("control_guidance_end"),
             InputParam.template("controlnet_conditioning_scale"),
-            InputParam("control_image_latents", required=True, type_hint=torch.Tensor, description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step."),
-            InputParam.template("timesteps", required=True, note="Can be generated in set_timesteps step."),
+            InputParam(
+                name="control_image_latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The control image latents to use for the denoising process. Can be generated in controlnet vae encoder step."
+            ),
+            InputParam(
+                name="timesteps", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+            ),
         ]
 
     @property
diff --git a/src/diffusers/modular_pipelines/qwenimage/decoders.py b/src/diffusers/modular_pipelines/qwenimage/decoders.py
index 499f017288..4476e1db9b 100644
--- a/src/diffusers/modular_pipelines/qwenimage/decoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/decoders.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List
+from typing import Any, Dict, List
 
 import torch
 
@@ -47,15 +47,24 @@ class QwenImageAfterDenoiseStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam.template("height", required=True, note="should be updated in input and prepare latents step."),
-            InputParam.template("width", required=True, note="should be updated in input and prepare latents step."),
-            InputParam.template("latents", required=True, description="The latents to decode, can be generated in the denoise step."),
+            InputParam.template("height", required=True),
+            InputParam.template("width", required=True),
+            InputParam(
+                name="latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The latents to decode, can be generated in the denoise step."
+            ),
         ]
 
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
-            OutputParam.template("latents", note="unpacked to B, C, 1, H, W"),
+            OutputParam(
+                name="latents", 
+                type_hint=torch.Tensor, 
+                description="The denoisedlatents unpacked to B, C, 1, H, W"
+            ),
         ]
 
     @torch.no_grad()
@@ -87,9 +96,14 @@ class QwenImageLayeredAfterDenoiseStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam.template("latents", required=True, description="The latents to decode, can be generated in the denoise step."),
-            InputParam.template("height", required=True, note="should be updated in prepare latents step."),
-            InputParam.template("width", required=True, note="should be updated in prepare latents step."),
+            InputParam(
+                name="latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The denoised latents to decode, can be generated in the denoise step."
+            ),
+            InputParam.template("height", required=True),
+            InputParam.template("width", required=True),
             InputParam.template("layers"),
         ]
 
@@ -135,7 +149,12 @@ class QwenImageDecoderStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam.template("latents", required=True, description="The latents to decode, can be generated in the denoise step and unpacked in the after denoise step."),
+            InputParam(
+                name="latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step."
+            ),
         ]
 
     @property
@@ -192,7 +211,12 @@ class QwenImageLayeredDecoderStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam.template("latents", required=True, description="The latents to decode, can be generated in the denoise step and unpacked in the after denoise step."),
+            InputParam(
+                name="latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The denoised latents to decode, can be generated in the denoise step and unpacked in the after denoise step."
+            ),
             InputParam.template("output_type"),
         ]
 
@@ -266,7 +290,12 @@ class QwenImageProcessImagesOutputStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam("images", required=True, description="the generated image tensor from decoders step"),
+            InputParam(
+                name="images", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="the generated image tensor from decoders step"
+            ),
             InputParam.template("output_type"),
         ]
 
@@ -315,9 +344,17 @@ class QwenImageInpaintProcessImagesOutputStep(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam("images", required=True, description="the generated image tensor from decoders step"),
+            InputParam(
+                name="images", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="the generated image tensor from decoders step"
+            ),
             InputParam.template("output_type"),
-            InputParam("mask_overlay_kwargs", description="The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep."),
+            InputParam(
+                name="mask_overlay_kwargs", 
+                type_hint=Dict[str, Any],
+                description="The kwargs for the postprocess step to apply the mask overlay. generated in InpaintProcessImagesInputStep."),
         ]
 
     @property
diff --git a/src/diffusers/modular_pipelines/qwenimage/denoise.py b/src/diffusers/modular_pipelines/qwenimage/denoise.py
index 49fde3fd6a..ad6a9677ac 100644
--- a/src/diffusers/modular_pipelines/qwenimage/denoise.py
+++ b/src/diffusers/modular_pipelines/qwenimage/denoise.py
@@ -49,7 +49,12 @@ class QwenImageLoopBeforeDenoiser(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam.template("latents", required=True, description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."),
+            InputParam(
+                name="latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."
+            ),
         ]
 
     @torch.no_grad()
@@ -74,8 +79,13 @@ class QwenImageEditLoopBeforeDenoiser(ModularPipelineBlocks):
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam.template("latents", required=True, description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."),
-            InputParam.template("image_latents", note="Can be encoded in vae_encoder step and packed in prepare_image_latents step."),
+            InputParam(
+                name="latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The initial latents to use for the denoising process. Can be generated in prepare_latent step."
+            ),
+            InputParam.template("image_latents", note="generated in vae encoder step and updated in input step."),
         ]
 
     @torch.no_grad()
@@ -119,10 +129,13 @@ class QwenImageLoopBeforeDenoiserControlNet(ModularPipelineBlocks):
                 type_hint=torch.Tensor,
                 description="The control image to use for the denoising process. Can be generated in prepare_controlnet_inputs step.",
             ),
-            InputParam.template("controlnet_conditioning_scale", note="Can be generated in prepare_controlnet_inputs step."),
-            InputParam.template("controlnet_keep", note="Can be generated in prepare_controlnet_inputs step."),
-            InputParam.template("num_inference_steps", required=True, note="Can be updated in set_timesteps step."),
-            InputParam.template("denoiser_input_fields")
+            InputParam.template("controlnet_conditioning_scale", note="updated in prepare_controlnet_inputs step."),
+            InputParam(
+                name="controlnet_keep", 
+                required=True, 
+                type_hint=List[float], 
+                description="The controlnet keep values. Can be generated in prepare_controlnet_inputs step."
+            ),
         ]
 
     @torch.no_grad()
@@ -184,8 +197,13 @@ class QwenImageLoopDenoiser(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam.template("attention_kwargs"),
-            InputParam.template("latents", required=True, description="The latents to use for the denoising process. Can be generated in prepare_latents step."),
-            InputParam.template("num_inference_steps", required=True, note="should be updated in set_timesteps step."),
+            InputParam(
+                name="latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The latents to use for the denoising process. Can be generated in prepare_latents step."
+            ),
+            InputParam.template("num_inference_steps"),
             InputParam.template("denoiser_input_fields"),
             InputParam(
                 "img_shapes",
@@ -275,8 +293,13 @@ class QwenImageEditLoopDenoiser(ModularPipelineBlocks):
     def inputs(self) -> List[InputParam]:
         return [
             InputParam.template("attention_kwargs"),
-            InputParam.template("latents", required=True, description="The latents to use for the denoising process. Can be generated in prepare_latents step."),
-            InputParam.template("num_inference_steps", required=True, note="should be updated in set_timesteps step."),
+            InputParam(
+                name="latents", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The latents to use for the denoising process. Can be generated in prepare_latents step."
+            ),
+            InputParam.template("num_inference_steps"),
             InputParam.template("denoiser_input_fields"),
             InputParam(
                 "img_shapes",
@@ -404,14 +427,19 @@ class QwenImageLoopAfterDenoiserInpaint(ModularPipelineBlocks):
                 type_hint=torch.Tensor,
                 description="The mask to use for the inpainting process. Can be generated in inpaint prepare latents step.",
             ),
-            InputParam.template("image_latents", note="Can be generated from vae encoder and packed in input step."),
+            InputParam.template("image_latents", note="Can be generated from vae encoder step and updated in input step."),
             InputParam(
                 "initial_noise",
                 required=True,
                 type_hint=torch.Tensor,
                 description="The initial noise to use for the inpainting process. Can be generated in inpaint prepare latents step.",
             ),
-            InputParam.template("timesteps", required=True, note="should be updated in set_timesteps step."),
+            InputParam(
+                "timesteps", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+            ),
         ]
 
     @torch.no_grad()
@@ -452,8 +480,13 @@ class QwenImageDenoiseLoopWrapper(LoopSequentialPipelineBlocks):
     @property
     def loop_inputs(self) -> List[InputParam]:
         return [
-            InputParam.template("timesteps", required=True, note="should be generated in set_timesteps step."),
-            InputParam.template("num_inference_steps", required=True, note="should be updated in set_timesteps step."),
+            InputParam(
+                name="timesteps", 
+                required=True, 
+                type_hint=torch.Tensor, 
+                description="The timesteps to use for the denoising process. Can be generated in set_timesteps step."
+            ),
+            InputParam.template("num_inference_steps", required=True),
         ]
 
     @torch.no_grad()
diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py
index 82a3b68119..9a83f0d717 100644
--- a/src/diffusers/modular_pipelines/qwenimage/encoders.py
+++ b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -1145,7 +1145,7 @@ class QwenImageEditPlusProcessImagesInputStep(ModularPipelineBlocks):
 
     @property
     def description(self) -> str:
-        return "Image Preprocess step. Images can be resized first using QwenImageEditResizeStep."
+        return "Image Preprocess step. Images can be resized first. If a list of images is provided, will return a list of processed images."
 
     @property
     def expected_components(self) -> List[ComponentSpec]:
diff --git a/src/diffusers/modular_pipelines/qwenimage/inputs.py b/src/diffusers/modular_pipelines/qwenimage/inputs.py
index bd2f79ae7c..b237031b91 100644
--- a/src/diffusers/modular_pipelines/qwenimage/inputs.py
+++ b/src/diffusers/modular_pipelines/qwenimage/inputs.py
@@ -139,8 +139,8 @@ class QwenImageTextInputsStep(ModularPipelineBlocks):
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
-            OutputParam.template("batch_size"),
-            OutputParam.template("dtype"),
+            OutputParam(name="batch_size", type_hint=int, description="The batch size of the prompt embeddings"),
+            OutputParam(name="dtype", type_hint=torch.dtype, description="The data type of the prompt embeddings"),
             OutputParam.template("prompt_embeds", note="batch-expanded"),
             OutputParam.template("prompt_embeds_mask", note="batch-expanded"),
             OutputParam.template("negative_prompt_embeds", note="batch-expanded"),
@@ -307,8 +307,8 @@ class QwenImageAdditionalInputsStep(ModularPipelineBlocks):
 
         # `height`/`width` are not new outputs, but they will be updated if any image latent inputs are provided
         if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, note="updated based on image size if not provided"))
-            outputs.append(OutputParam(name="width", type_hint=int, note="updated based on image size if not provided"))
+            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
+            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
 
         # image latent inputs are modified in place (patchified and batch-expanded)
         for input_param in self._image_latent_inputs:
@@ -476,8 +476,8 @@ class QwenImageEditPlusAdditionalInputsStep(ModularPipelineBlocks):
         
         # `height`/`width` are updated if any image latent inputs are provided
         if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, description="updated based on image size if not provided"))
-            outputs.append(OutputParam(name="width", type_hint=int, description="updated based on image size if not provided"))
+            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
+            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
 
         # image latent inputs are modified in place (patchified, concatenated, and batch-expanded)
         for input_param in self._image_latent_inputs:
@@ -658,8 +658,8 @@ class QwenImageLayeredAdditionalInputsStep(ModularPipelineBlocks):
         ]
 
         if len(self._image_latent_inputs) > 0:
-            outputs.append(OutputParam(name="height", type_hint=int, description="updated based on image size if not provided"))
-            outputs.append(OutputParam(name="width", type_hint=int, description="updated based on image size if not provided"))
+            outputs.append(OutputParam(name="height", type_hint=int, description="if not provided, updated to image height"))
+            outputs.append(OutputParam(name="width", type_hint=int, description="if not provided, updated to image width"))
 
         # Add outputs for image latent inputs (patchified with layered pachifier and batch-expanded)
         for input_param in self._image_latent_inputs:
@@ -759,8 +759,8 @@ class QwenImageControlNetInputsStep(ModularPipelineBlocks):
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
             OutputParam(name="control_image_latents", type_hint=torch.Tensor, description="The control image latents (patchified and batch-expanded)."),
-            OutputParam(name="height", type_hint=int, description="updated based on control image size if not provided"),
-            OutputParam(name="width", type_hint=int, description="updated based on control image size if not provided"),
+            OutputParam(name="height", type_hint=int, description="if not provided, updated to control image height"),
+            OutputParam(name="width", type_hint=int, description="if not provided, updated to control image width"),
         ]
 
     @torch.no_grad()
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
index 42593a93f9..46f0b6f6ff 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+import torch
 from ...utils import logging
 from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam
+from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
 from .before_denoise import (
     QwenImageControlNetBeforeDenoiserStep,
     QwenImageCreateMaskLatentsStep,
@@ -319,7 +319,7 @@ class QwenImageImg2ImgInputStep(SequentialPipelineBlocks):
     """
 
     model_name = "qwenimage"
-    block_classes = [QwenImageTextInputsStep(), QwenImageAdditionalInputsStep(image_latent_inputs=["image_latents"])]
+    block_classes = [QwenImageTextInputsStep(), QwenImageAdditionalInputsStep()]
     block_names = ["text_inputs", "additional_inputs"]
 
     @property
@@ -373,7 +373,7 @@ class QwenImageInpaintInputStep(SequentialPipelineBlocks):
     block_classes = [
         QwenImageTextInputsStep(),
         QwenImageAdditionalInputsStep(
-            image_latent_inputs=["image_latents"], additional_batch_inputs=["processed_mask_image"]
+            additional_batch_inputs=[InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")]
         ),
     ]
     block_names = ["text_inputs", "additional_inputs"]
@@ -512,7 +512,7 @@ class QwenImageCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -598,7 +598,7 @@ class QwenImageInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -682,7 +682,7 @@ class QwenImageImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -777,7 +777,7 @@ class QwenImageControlNetCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -880,7 +880,7 @@ class QwenImageControlNetInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -981,7 +981,7 @@ class QwenImageControlNetImg2ImgCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -1042,7 +1042,7 @@ class QwenImageAutoCoreDenoiseStep(ConditionalPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -1279,5 +1279,5 @@ class QwenImageAutoBlocks(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.images(),
+            OutputParam.template("images"),
         ]
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
index 46e8881b95..158763ce91 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 from typing import Optional
+import torch
 
 from ...utils import logging
 from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam
+from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
 from .before_denoise import (
     QwenImageCreateMaskLatentsStep,
     QwenImageEditRoPEInputsStep,
@@ -206,7 +207,7 @@ class QwenImageEditInpaintVaeEncoderStep(SequentialPipelineBlocks):
     block_classes = [
         QwenImageEditResizeStep(),
         QwenImageEditInpaintProcessImagesInputStep(),
-        QwenImageVaeEncoderStep(input_name="processed_image", output_name="image_latents"),
+        QwenImageVaeEncoderStep(),
     ]
     block_names = ["resize", "preprocess", "encode"]
 
@@ -286,7 +287,7 @@ class QwenImageEditInputStep(SequentialPipelineBlocks):
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageTextInputsStep(),
-        QwenImageAdditionalInputsStep(image_latent_inputs=["image_latents"]),
+        QwenImageAdditionalInputsStep(),
     ]
     block_names = ["text_inputs", "additional_inputs"]
 
@@ -344,8 +345,7 @@ class QwenImageEditInpaintInputStep(SequentialPipelineBlocks):
     model_name = "qwenimage-edit"
     block_classes = [
         QwenImageTextInputsStep(),
-        QwenImageAdditionalInputsStep(
-            image_latent_inputs=["image_latents"], additional_batch_inputs=["processed_mask_image"]
+        QwenImageAdditionalInputsStep(additional_batch_inputs=[InputParam(name="processed_mask_image", type_hint=torch.Tensor, description="The processed mask image")]
         ),
     ]
     block_names = ["text_inputs", "additional_inputs"]
@@ -485,7 +485,7 @@ class QwenImageEditCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -571,7 +571,7 @@ class QwenImageEditInpaintCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -605,7 +605,7 @@ class QwenImageEditAutoCoreDenoiseStep(ConditionalPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -698,7 +698,7 @@ class QwenImageEditAutoDecodeStep(AutoPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -816,5 +816,5 @@ class QwenImageEditAutoBlocks(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.images(),
+            OutputParam.template("images"),
         ]
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
index 1fb967bf13..a16dee1c75 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_edit_plus.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+import torch
 from ...utils import logging
 from ..modular_pipeline import SequentialPipelineBlocks
-from ..modular_pipeline_utils import InsertableDict, OutputParam
+from ..modular_pipeline_utils import InsertableDict, OutputParam, InputParam
 from .before_denoise import (
     QwenImageEditPlusRoPEInputsStep,
     QwenImagePrepareLatentsStep,
@@ -211,7 +211,7 @@ class QwenImageEditPlusInputStep(SequentialPipelineBlocks):
     model_name = "qwenimage-edit-plus"
     block_classes = [
         QwenImageTextInputsStep(),
-        QwenImageEditPlusAdditionalInputsStep(image_latent_inputs=["image_latents"]),
+        QwenImageEditPlusAdditionalInputsStep(),
     ]
     block_names = ["text_inputs", "additional_inputs"]
 
@@ -302,7 +302,7 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -446,5 +446,5 @@ class QwenImageEditPlusAutoBlocks(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.images(),
+            OutputParam.template("images"),
         ]
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
index 7d6c2ea063..2471750f2e 100644
--- a/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
+++ b/src/diffusers/modular_pipelines/qwenimage/modular_blocks_qwenimage_layered.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+import torch
 from ...utils import logging
 from ..modular_pipeline import SequentialPipelineBlocks
 from ..modular_pipeline_utils import InsertableDict, OutputParam
@@ -255,7 +255,7 @@ class QwenImageLayeredInputStep(SequentialPipelineBlocks):
     model_name = "qwenimage-layered"
     block_classes = [
         QwenImageTextInputsStep(),
-        QwenImageLayeredAdditionalInputsStep(image_latent_inputs=["image_latents"]),
+        QwenImageLayeredAdditionalInputsStep(),
     ]
     block_names = ["text_inputs", "additional_inputs"]
 
@@ -342,7 +342,7 @@ class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.latents(),
+            OutputParam.template("latents"),
         ]
 
 
@@ -484,5 +484,5 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
     @property
     def outputs(self):
         return [
-            OutputParam.images(),
+            OutputParam.template("images"),
         ]