diff --git a/src/diffusers/modular_pipelines/stable_diffusion_xl/after_denoise.py b/src/diffusers/modular_pipelines/stable_diffusion_xl/after_denoise.py index 6ce59b5c35..ca848e2098 100644 --- a/src/diffusers/modular_pipelines/stable_diffusion_xl/after_denoise.py +++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/after_denoise.py @@ -41,7 +41,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name -class StableDiffusionXLDecodeLatentsStep(PipelineBlock): +class StableDiffusionXLDecodeStep(PipelineBlock): model_name = "stable-diffusion-xl" @@ -187,63 +187,17 @@ class StableDiffusionXLInpaintOverlayMaskStep(PipelineBlock): return components, state -# YiYi TODO: remove this, we don't need this in modular -class StableDiffusionXLOutputStep(PipelineBlock): - model_name = "stable-diffusion-xl" - - @property - def description(self) -> str: - return "final step to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or a plain tuple." - - @property - def inputs(self) -> List[Tuple[str, Any]]: - return [InputParam("return_dict", default=True)] - - @property - def intermediates_inputs(self) -> List[str]: - return [InputParam("images", required=True, type_hint=Union[List[PIL.Image.Image], List[torch.Tensor], List[np.array]], description="The generated images from the decode step.")] - - @property - def intermediates_outputs(self) -> List[str]: - return [OutputParam("images", description="The final images output, can be a tuple or a `StableDiffusionXLPipelineOutput`")] - - - @torch.no_grad() - def __call__(self, components, state: PipelineState) -> PipelineState: - block_state = self.get_block_state(state) - - if not block_state.return_dict: - block_state.images = (block_state.images,) - else: - block_state.images = StableDiffusionXLPipelineOutput(images=block_state.images) - self.add_block_state(state, block_state) - return components, state - - -# After denoise -class StableDiffusionXLDecodeStep(SequentialPipelineBlocks): - block_classes = [StableDiffusionXLDecodeLatentsStep, StableDiffusionXLOutputStep] - block_names = ["decode", "output"] - - @property - def description(self): - return """Decode step that decode the denoised latents into images outputs. -This is a sequential pipeline blocks: - - `StableDiffusionXLDecodeLatentsStep` is used to decode the denoised latents into images - - `StableDiffusionXLOutputStep` is used to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or a plain tuple.""" - class StableDiffusionXLInpaintDecodeStep(SequentialPipelineBlocks): - block_classes = [StableDiffusionXLDecodeLatentsStep, StableDiffusionXLInpaintOverlayMaskStep, StableDiffusionXLOutputStep] - block_names = ["decode", "mask_overlay", "output"] + block_classes = [StableDiffusionXLDecodeStep, StableDiffusionXLInpaintOverlayMaskStep] + block_names = ["decode", "mask_overlay"] @property def description(self): return "Inpaint decode step that decode the denoised latents into images outputs.\n" + \ "This is a sequential pipeline blocks:\n" + \ - " - `StableDiffusionXLDecodeLatentsStep` is used to decode the denoised latents into images\n" + \ - " - `StableDiffusionXLInpaintOverlayMaskStep` is used to overlay the mask on the image\n" + \ - " - `StableDiffusionXLOutputStep` is used to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or a plain tuple." + " - `StableDiffusionXLDecodeStep` is used to decode the denoised latents into images\n" + \ + " - `StableDiffusionXLInpaintOverlayMaskStep` is used to overlay the mask on the image" class StableDiffusionXLAutoDecodeStep(AutoPipelineBlocks): diff --git a/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_loader.py b/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_loader.py index 53f2757109..4af942af64 100644 --- a/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_loader.py +++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_loader.py @@ -107,7 +107,6 @@ SDXL_INPUTS_SCHEMA = { "negative_aesthetic_score": InputParam("negative_aesthetic_score", type_hint=float, default=2.0, description="Simulates negative aesthetic score"), "eta": InputParam("eta", type_hint=float, default=0.0, description="Parameter η in the DDIM paper"), "output_type": InputParam("output_type", type_hint=str, default="pil", description="Output format (pil/tensor/np.array)"), - "return_dict": InputParam("return_dict", type_hint=bool, default=True, description="Whether to return a StableDiffusionXLPipelineOutput"), "ip_adapter_image": InputParam("ip_adapter_image", type_hint=PipelineImageInput, required=True, description="Image(s) to be used as IP adapter"), "control_image": InputParam("control_image", type_hint=PipelineImageInput, required=True, description="ControlNet input condition"), "control_guidance_start": InputParam("control_guidance_start", type_hint=Union[float, List[float]], default=0.0, description="When ControlNet starts applying"),