From bc7a4d4917456afd70913be85bd25c556c25862c Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Sat, 21 Oct 2023 18:47:18 +0200 Subject: [PATCH] [`PEFT`] Fix scale unscale with LoRA adapters (#5417) * fix scale unscale v1 * final fixes + CI * fix slow trst * oops * fix copies * oops * oops * fix * style * fix copies --------- Co-authored-by: Sayak Paul --- src/diffusers/models/unet_2d_condition.py | 2 +- .../alt_diffusion/pipeline_alt_diffusion.py | 2 +- .../pipeline_alt_diffusion_img2img.py | 2 +- .../controlnet/pipeline_controlnet.py | 2 +- .../controlnet/pipeline_controlnet_img2img.py | 2 +- .../controlnet/pipeline_controlnet_inpaint.py | 2 +- .../pipeline_controlnet_inpaint_sd_xl.py | 4 +- .../controlnet/pipeline_controlnet_sd_xl.py | 4 +- .../pipeline_controlnet_sd_xl_img2img.py | 4 +- .../pipeline_cycle_diffusion.py | 2 +- .../pipeline_stable_diffusion.py | 2 +- ...line_stable_diffusion_attend_and_excite.py | 2 +- .../pipeline_stable_diffusion_depth2img.py | 2 +- .../pipeline_stable_diffusion_diffedit.py | 2 +- .../pipeline_stable_diffusion_gligen.py | 2 +- ...line_stable_diffusion_gligen_text_image.py | 2 +- .../pipeline_stable_diffusion_img2img.py | 2 +- .../pipeline_stable_diffusion_inpaint.py | 2 +- ...ipeline_stable_diffusion_inpaint_legacy.py | 2 +- .../pipeline_stable_diffusion_k_diffusion.py | 2 +- .../pipeline_stable_diffusion_ldm3d.py | 2 +- ...pipeline_stable_diffusion_model_editing.py | 2 +- .../pipeline_stable_diffusion_panorama.py | 2 +- .../pipeline_stable_diffusion_paradigms.py | 2 +- .../pipeline_stable_diffusion_pix2pix_zero.py | 2 +- .../pipeline_stable_diffusion_sag.py | 2 +- .../pipeline_stable_diffusion_upscale.py | 2 +- .../pipeline_stable_unclip.py | 2 +- .../pipeline_stable_unclip_img2img.py | 2 +- .../pipeline_stable_diffusion_xl.py | 4 +- .../pipeline_stable_diffusion_xl_img2img.py | 4 +- .../pipeline_stable_diffusion_xl_inpaint.py | 4 +- .../pipeline_stable_diffusion_adapter.py | 2 +- .../pipeline_stable_diffusion_xl_adapter.py | 4 +- .../pipeline_text_to_video_synth.py | 2 +- .../pipeline_text_to_video_synth_img2img.py | 2 +- .../unidiffuser/pipeline_unidiffuser.py | 2 +- .../versatile_diffusion/modeling_text_unet.py | 2 +- src/diffusers/utils/peft_utils.py | 18 +++-- tests/lora/test_lora_layers_peft.py | 78 ++++++++++++++++++- 40 files changed, 133 insertions(+), 53 deletions(-) diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py index 1a242ff165..6a59e5ea5c 100644 --- a/src/diffusers/models/unet_2d_condition.py +++ b/src/diffusers/models/unet_2d_condition.py @@ -1153,7 +1153,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin) if USE_PEFT_BACKEND: # remove `lora_scale` from each PEFT layer - unscale_lora_layers(self) + unscale_lora_layers(self, lora_scale) if not return_dict: return (sample,) diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py index f2e3c457bc..3c24db1fdc 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py @@ -442,7 +442,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py index 1e10c9b04d..d9acf9daf2 100644 --- a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +++ b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py @@ -441,7 +441,7 @@ class AltDiffusionImg2ImgPipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index f52b222ee1..41df612588 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -424,7 +424,7 @@ class StableDiffusionControlNetPipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index edeadb1189..83eafc1040 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -448,7 +448,7 @@ class StableDiffusionControlNetImg2ImgPipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index d25809a2e7..3c0d1943a3 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -575,7 +575,7 @@ class StableDiffusionControlNetInpaintPipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index cf51fbe571..2b66bfd1c7 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -476,12 +476,12 @@ class StableDiffusionXLControlNetInpaintPipeline( if self.text_encoder is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) if self.text_encoder_2 is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder_2) + unscale_lora_layers(self.text_encoder_2, lora_scale) return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 5957366586..786c208e16 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -444,12 +444,12 @@ class StableDiffusionXLControlNetPipeline( if self.text_encoder is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) if self.text_encoder_2 is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder_2) + unscale_lora_layers(self.text_encoder_2, lora_scale) return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index 033544e893..5f9abb444f 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -488,12 +488,12 @@ class StableDiffusionXLControlNetImg2ImgPipeline( if self.text_encoder is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) if self.text_encoder_2 is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder_2) + unscale_lora_layers(self.text_encoder_2, lora_scale) return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index d45e35d5cb..16024bb544 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -438,7 +438,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index a9d28144e5..3b86da9ad5 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -434,7 +434,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py index 153efae876..f897b51941 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_attend_and_excite.py @@ -469,7 +469,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py index d73cf769e3..2acdc1c529 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py @@ -343,7 +343,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py index 451ef690a7..9a9acba214 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py @@ -614,7 +614,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py index ce7faaed2a..90c3885168 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py @@ -411,7 +411,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline): if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py index 67f3fe0e94..eef5fbef58 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py @@ -436,7 +436,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline): if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 2532c15696..a6c25987a7 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -435,7 +435,7 @@ class StableDiffusionImg2ImgPipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index c6361c6166..62f1186eb0 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -505,7 +505,7 @@ class StableDiffusionInpaintPipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 513c660c30..364c52d738 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -427,7 +427,7 @@ class StableDiffusionInpaintPipelineLegacy( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py index e0bb9b6e0b..69ba8796b0 100755 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -341,7 +341,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py index 2e514a5510..c33956fe2f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py @@ -405,7 +405,7 @@ class StableDiffusionLDM3DPipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py index 6c78d190d9..e006c808b4 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_model_editing.py @@ -374,7 +374,7 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py index bac1f83fb3..c479a33bfb 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py @@ -358,7 +358,7 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py index 161f656fee..3f14971902 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py @@ -389,7 +389,7 @@ class StableDiffusionParadigmsPipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py index 6d4286a046..4877041399 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py @@ -579,7 +579,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline): if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py index 6a78d4da45..fc2b827ccc 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py @@ -381,7 +381,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin) if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index f3d92119b8..00ed46ffc6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -372,7 +372,7 @@ class StableDiffusionUpscalePipeline( if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py index 3bce80fdb5..6539a4c629 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py @@ -479,7 +479,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py index a17a674b70..4441e643e2 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py @@ -433,7 +433,7 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 2658b58de2..3722782b1f 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -436,12 +436,12 @@ class StableDiffusionXLPipeline( if self.text_encoder is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) if self.text_encoder_2 is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder_2) + unscale_lora_layers(self.text_encoder_2, lora_scale) return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index 75eb02a486..825c74ce07 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -440,12 +440,12 @@ class StableDiffusionXLImg2ImgPipeline( if self.text_encoder is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) if self.text_encoder_2 is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder_2) + unscale_lora_layers(self.text_encoder_2, lora_scale) return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index 4af25afbeb..535cc72683 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -590,12 +590,12 @@ class StableDiffusionXLInpaintPipeline( if self.text_encoder is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) if self.text_encoder_2 is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder_2) + unscale_lora_layers(self.text_encoder_2, lora_scale) return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py index dca9e5fc3d..b0f20199b4 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py @@ -429,7 +429,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline): if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index b606b9b50c..4e8f6a9d83 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -450,12 +450,12 @@ class StableDiffusionXLAdapterPipeline( if self.text_encoder is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) if self.text_encoder_2 is not None: if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder_2) + unscale_lora_layers(self.text_encoder_2, lora_scale) return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py index 83c3159694..6ca2e48414 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py @@ -361,7 +361,7 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index f5ac19c29d..2f128aa448 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -423,7 +423,7 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index 0d5880ac0d..4f3e003de0 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -556,7 +556,7 @@ class UniDiffuserPipeline(DiffusionPipeline): if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND: # Retrieve the original scale by scaling back the LoRA layers - unscale_lora_layers(self.text_encoder) + unscale_lora_layers(self.text_encoder, lora_scale) return prompt_embeds, negative_prompt_embeds diff --git a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py index 63f6e7d638..4462963a8e 100644 --- a/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +++ b/src/diffusers/pipelines/versatile_diffusion/modeling_text_unet.py @@ -1371,7 +1371,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin): if USE_PEFT_BACKEND: # remove `lora_scale` from each PEFT layer - unscale_lora_layers(self) + unscale_lora_layers(self, lora_scale) if not return_dict: return (sample,) diff --git a/src/diffusers/utils/peft_utils.py b/src/diffusers/utils/peft_utils.py index efc977518b..940ad7fa14 100644 --- a/src/diffusers/utils/peft_utils.py +++ b/src/diffusers/utils/peft_utils.py @@ -16,6 +16,7 @@ PEFT utilities: Utilities related to peft library """ import collections import importlib +from typing import Optional from packaging import version @@ -91,21 +92,28 @@ def scale_lora_layers(model, weight): module.scale_layer(weight) -def unscale_lora_layers(model): +def unscale_lora_layers(model, weight: Optional[float] = None): """ Removes the previously passed weight given to the LoRA layers of the model. Args: model (`torch.nn.Module`): The model to scale. - weight (`float`): - The weight to be given to the LoRA layers. + weight (`float`, *optional*): + The weight to be given to the LoRA layers. If no scale is passed the scale of the lora layer will be + re-initialized to the correct value. If 0.0 is passed, we will re-initialize the scale with the correct + value. """ from peft.tuners.tuners_utils import BaseTunerLayer for module in model.modules(): if isinstance(module, BaseTunerLayer): - module.unscale_layer() + if weight is not None and weight != 0: + module.unscale_layer(weight) + elif weight is not None and weight == 0: + for adapter_name in module.active_adapters: + # if weight == 0 unscale should re-set the scale to the original value. + module.set_scale(adapter_name, 1.0) def get_peft_kwargs(rank_dict, network_alpha_dict, peft_state_dict, is_unet=True): @@ -184,7 +192,7 @@ def set_weights_and_activate_adapters(model, adapter_names, weights): module.set_adapter(adapter_name) else: module.active_adapter = adapter_name - module.scale_layer(weight) + module.set_scale(adapter_name, weight) # set multiple active adapters for module in model.modules(): diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py index 198ff53340..0f61218e4f 100644 --- a/tests/lora/test_lora_layers_peft.py +++ b/tests/lora/test_lora_layers_peft.py @@ -775,6 +775,79 @@ class PeftLoraLoaderMixinTests: "output with no lora and output with lora disabled should give same results", ) + def test_simple_inference_with_text_unet_multi_adapter_weighted(self): + """ + Tests a simple inference with lora attached to text encoder and unet, attaches + multiple adapters and set them + """ + components, _, text_lora_config, unet_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(self.torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) + + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0)).images + + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + + pipe.unet.add_adapter(unet_lora_config, "adapter-1") + pipe.unet.add_adapter(unet_lora_config, "adapter-2") + + self.assertTrue(self.check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + self.assertTrue(self.check_if_lora_correctly_set(pipe.unet), "Lora not correctly set in Unet") + + if self.has_two_text_encoders: + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") + self.assertTrue( + self.check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + + pipe.set_adapters("adapter-1") + + output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0)).images + + pipe.set_adapters("adapter-2") + output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0)).images + + pipe.set_adapters(["adapter-1", "adapter-2"]) + + output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0)).images + + # Fuse and unfuse should lead to the same results + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), + "Adapter 1 and 2 should give different results", + ) + + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 1 and mixed adapters should give different results", + ) + + self.assertFalse( + np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 2 and mixed adapters should give different results", + ) + + pipe.set_adapters(["adapter-1", "adapter-2"], [0.5, 0.6]) + output_adapter_mixed_weighted = pipe(**inputs, generator=torch.manual_seed(0)).images + + self.assertFalse( + np.allclose(output_adapter_mixed_weighted, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Weighted adapter and mixed adapter should give different results", + ) + + pipe.disable_lora() + + output_disabled = pipe(**inputs, generator=torch.manual_seed(0)).images + + self.assertTrue( + np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), + "output with no lora and output with lora disabled should give same results", + ) + def test_lora_fuse_nan(self): components, _, text_lora_config, unet_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) @@ -1073,7 +1146,6 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): expected_slice_scale = np.array([0.538, 0.539, 0.540, 0.540, 0.542, 0.539, 0.538, 0.541, 0.539]) predicted_slice = images[0, -3:, -3:, -1].flatten() - # import pdb; pdb.set_trace() self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel") @@ -1106,7 +1178,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): output_type="np", ).images predicted_slice = images[0, -3:, -3:, -1].flatten() - expected_slice_scale = np.array([0.5977, 0.5985, 0.6039, 0.5976, 0.6025, 0.6036, 0.5946, 0.5979, 0.5998]) + expected_slice_scale = np.array([0.5888, 0.5897, 0.5946, 0.5888, 0.5935, 0.5946, 0.5857, 0.5891, 0.5909]) self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3)) # Lora disabled @@ -1120,7 +1192,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): output_type="np", ).images predicted_slice = images[0, -3:, -3:, -1].flatten() - expected_slice_scale = np.array([0.54625, 0.5473, 0.5495, 0.5465, 0.5476, 0.5461, 0.5452, 0.5485, 0.5493]) + expected_slice_scale = np.array([0.5456, 0.5466, 0.5487, 0.5458, 0.5469, 0.5454, 0.5446, 0.5479, 0.5487]) self.assertTrue(np.allclose(expected_slice_scale, predicted_slice, atol=1e-3, rtol=1e-3))