From 9f3c0fdcd859905c2c13ec47f10eb0250d2576ac Mon Sep 17 00:00:00 2001 From: Pavle Padjin Date: Thu, 30 Oct 2025 04:09:40 +0100 Subject: [PATCH 1/4] Avoiding graph break by changing the way we infer dtype in vae.decoder (#12512) * Changing the way we infer dtype to avoid force evaluation of lazy tensors * changing way to infer dtype to ensure type consistency * more robust infering of dtype * removing the upscale dtype entirely --- src/diffusers/models/autoencoders/vae.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/diffusers/models/autoencoders/vae.py b/src/diffusers/models/autoencoders/vae.py index 7b17196125..9c6031a988 100644 --- a/src/diffusers/models/autoencoders/vae.py +++ b/src/diffusers/models/autoencoders/vae.py @@ -286,11 +286,9 @@ class Decoder(nn.Module): sample = self.conv_in(sample) - upscale_dtype = next(iter(self.up_blocks.parameters())).dtype if torch.is_grad_enabled() and self.gradient_checkpointing: # middle sample = self._gradient_checkpointing_func(self.mid_block, sample, latent_embeds) - sample = sample.to(upscale_dtype) # up for up_block in self.up_blocks: @@ -298,7 +296,6 @@ class Decoder(nn.Module): else: # middle sample = self.mid_block(sample, latent_embeds) - sample = sample.to(upscale_dtype) # up for up_block in self.up_blocks: From df8dd778177c3d2272f74cbebd880d7abd9f5ec9 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Fri, 31 Oct 2025 00:14:24 +0530 Subject: [PATCH 2/4] [Modular] Fix for custom block kwargs (#12561) update --- src/diffusers/modular_pipelines/modular_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/modular_pipelines/modular_pipeline.py b/src/diffusers/modular_pipelines/modular_pipeline.py index bf067555a8..55c261ab2f 100644 --- a/src/diffusers/modular_pipelines/modular_pipeline.py +++ b/src/diffusers/modular_pipelines/modular_pipeline.py @@ -335,7 +335,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin): ) expected_kwargs, optional_kwargs = block_cls._get_signature_keys(block_cls) block_kwargs = { - name: kwargs.pop(name) for name in kwargs if name in expected_kwargs or name in optional_kwargs + name: kwargs.get(name) for name in kwargs if name in expected_kwargs or name in optional_kwargs } return block_cls(**block_kwargs) From d54622c2679d700b425ad61abce9b80fc36212c0 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Fri, 31 Oct 2025 13:47:02 +0530 Subject: [PATCH 3/4] [Modular] Allow custom blocks to be saved to `local_dir` (#12381) update Co-authored-by: YiYi Xu --- src/diffusers/modular_pipelines/modular_pipeline.py | 2 +- src/diffusers/utils/dynamic_modules_utils.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/diffusers/modular_pipelines/modular_pipeline.py b/src/diffusers/modular_pipelines/modular_pipeline.py index 55c261ab2f..ef1673c057 100644 --- a/src/diffusers/modular_pipelines/modular_pipeline.py +++ b/src/diffusers/modular_pipelines/modular_pipeline.py @@ -305,6 +305,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin): "cache_dir", "force_download", "local_files_only", + "local_dir", "proxies", "resume_download", "revision", @@ -331,7 +332,6 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin): module_file=module_file, class_name=class_name, **hub_kwargs, - **kwargs, ) expected_kwargs, optional_kwargs = block_cls._get_signature_keys(block_cls) block_kwargs = { diff --git a/src/diffusers/utils/dynamic_modules_utils.py b/src/diffusers/utils/dynamic_modules_utils.py index 627b1e0604..b2ef5a29e0 100644 --- a/src/diffusers/utils/dynamic_modules_utils.py +++ b/src/diffusers/utils/dynamic_modules_utils.py @@ -254,6 +254,7 @@ def get_cached_module_file( token: Optional[Union[bool, str]] = None, revision: Optional[str] = None, local_files_only: bool = False, + local_dir: Optional[str] = None, ): """ Prepares Downloads a module from a local folder or a distant repo and returns its path inside the cached @@ -332,6 +333,7 @@ def get_cached_module_file( force_download=force_download, proxies=proxies, local_files_only=local_files_only, + local_dir=local_dir, ) submodule = "git" module_file = pretrained_model_name_or_path + ".py" @@ -355,6 +357,7 @@ def get_cached_module_file( force_download=force_download, proxies=proxies, local_files_only=local_files_only, + local_dir=local_dir, token=token, ) submodule = os.path.join("local", "--".join(pretrained_model_name_or_path.split("/"))) @@ -415,6 +418,7 @@ def get_cached_module_file( token=token, revision=revision, local_files_only=local_files_only, + local_dir=local_dir, ) return os.path.join(full_submodule, module_file) @@ -431,7 +435,7 @@ def get_class_from_dynamic_module( token: Optional[Union[bool, str]] = None, revision: Optional[str] = None, local_files_only: bool = False, - **kwargs, + local_dir: Optional[str] = None, ): """ Extracts a class from a module file, present in the local folder or repository of a model. @@ -496,5 +500,6 @@ def get_class_from_dynamic_module( token=token, revision=revision, local_files_only=local_files_only, + local_dir=local_dir, ) return get_class_in_module(class_name, final_module) From 051c8a1c0f5c393a447bef18081fdf94c2a3ab9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Friedrich=20Sch=C3=B6ller?= Date: Fri, 31 Oct 2025 21:25:13 +0100 Subject: [PATCH 4/4] Fix Stable Diffusion 3.x pooled prompt embedding with multiple images (#12306) --- .../controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py | 2 +- .../pipeline_stable_diffusion_3_controlnet_inpainting.py | 2 +- src/diffusers/pipelines/pag/pipeline_pag_sd_3.py | 2 +- src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py | 2 +- .../pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py | 2 +- .../stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py | 2 +- .../stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py index f67a0e2112..d605eac1f2 100644 --- a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +++ b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py @@ -355,7 +355,7 @@ class StableDiffusion3ControlNetPipeline( prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) - pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt, 1) + pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt) pooled_prompt_embeds = pooled_prompt_embeds.view(batch_size * num_images_per_prompt, -1) return prompt_embeds, pooled_prompt_embeds diff --git a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py index 68984da4dc..9d0158c6b6 100644 --- a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +++ b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py @@ -373,7 +373,7 @@ class StableDiffusion3ControlNetInpaintingPipeline( prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) - pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt, 1) + pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt) pooled_prompt_embeds = pooled_prompt_embeds.view(batch_size * num_images_per_prompt, -1) return prompt_embeds, pooled_prompt_embeds diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py index bc281428e2..941b675099 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py @@ -326,7 +326,7 @@ class StableDiffusion3PAGPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSin prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) - pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt, 1) + pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt) pooled_prompt_embeds = pooled_prompt_embeds.view(batch_size * num_images_per_prompt, -1) return prompt_embeds, pooled_prompt_embeds diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py index 22a8dac238..f40dd52fc2 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py @@ -342,7 +342,7 @@ class StableDiffusion3PAGImg2ImgPipeline(DiffusionPipeline, SD3LoraLoaderMixin, prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) - pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt, 1) + pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt) pooled_prompt_embeds = pooled_prompt_embeds.view(batch_size * num_images_per_prompt, -1) return prompt_embeds, pooled_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py index 3b7b26dc63..660d9801df 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py @@ -336,7 +336,7 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) - pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt, 1) + pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt) pooled_prompt_embeds = pooled_prompt_embeds.view(batch_size * num_images_per_prompt, -1) return prompt_embeds, pooled_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py index db047f1992..9b11bc8781 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py @@ -361,7 +361,7 @@ class StableDiffusion3Img2ImgPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) - pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt, 1) + pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt) pooled_prompt_embeds = pooled_prompt_embeds.view(batch_size * num_images_per_prompt, -1) return prompt_embeds, pooled_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py index c95fa530c8..b947cbff09 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py @@ -367,7 +367,7 @@ class StableDiffusion3InpaintPipeline(DiffusionPipeline, SD3LoraLoaderMixin, Fro prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) - pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt, 1) + pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt) pooled_prompt_embeds = pooled_prompt_embeds.view(batch_size * num_images_per_prompt, -1) return prompt_embeds, pooled_prompt_embeds