diff --git a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py index e5083df286..c51d94e86b 100644 --- a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +++ b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py @@ -1137,6 +1137,9 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin): cross_attention_kwargs=cross_attention_kwargs, encoder_attention_mask=encoder_attention_mask, ) + # To support T2I-Adapter-XL + if is_adapter and len(down_block_additional_residuals) > 0: + sample += down_block_additional_residuals.pop(0) if is_controlnet: sample = sample + mid_block_additional_residual