From 76810eca2bbc5ce1e47997e7d9ff690a003fa70f Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Tue, 23 Sep 2025 10:29:16 -0700 Subject: [PATCH 1/6] [docs] Schedulers (#12246) * init * toctree * scheduler suggestions * toctree --- docs/source/en/_toctree.yml | 6 +- docs/source/en/using-diffusers/models.md | 120 ------ .../en/using-diffusers/scheduler_features.md | 235 ----------- docs/source/en/using-diffusers/schedulers.md | 397 +++++++++++------- 4 files changed, 236 insertions(+), 522 deletions(-) delete mode 100644 docs/source/en/using-diffusers/models.md delete mode 100644 docs/source/en/using-diffusers/scheduler_features.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 14dbfe3ea1..856874d519 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -23,11 +23,7 @@ - local: using-diffusers/reusing_seeds title: Reproducibility - local: using-diffusers/schedulers - title: Load schedulers and models - - local: using-diffusers/models - title: Models - - local: using-diffusers/scheduler_features - title: Scheduler features + title: Schedulers - local: using-diffusers/other-formats title: Model files and layouts - local: using-diffusers/push_to_hub diff --git a/docs/source/en/using-diffusers/models.md b/docs/source/en/using-diffusers/models.md deleted file mode 100644 index 22c78d490a..0000000000 --- a/docs/source/en/using-diffusers/models.md +++ /dev/null @@ -1,120 +0,0 @@ - - -[[open-in-colab]] - -# Models - -A diffusion model relies on a few individual models working together to generate an output. These models are responsible for denoising, encoding inputs, and decoding latents into the actual outputs. - -This guide will show you how to load models. - -## Loading a model - -All models are loaded with the [`~ModelMixin.from_pretrained`] method, which downloads and caches the latest model version. If the latest files are available in the local cache, [`~ModelMixin.from_pretrained`] reuses files in the cache. - -Pass the `subfolder` argument to [`~ModelMixin.from_pretrained`] to specify where to load the model weights from. Omit the `subfolder` argument if the repository doesn't have a subfolder structure or if you're loading a standalone model. - -```py -from diffusers import QwenImageTransformer2DModel - -model = QwenImageTransformer2DModel.from_pretrained("Qwen/Qwen-Image", subfolder="transformer") -``` - -## AutoModel - -[`AutoModel`] detects the model class from a `model_index.json` file or a model's `config.json` file. It fetches the correct model class from these files and delegates the actual loading to the model class. [`AutoModel`] is useful for automatic model type detection without needing to know the exact model class beforehand. - -```py -from diffusers import AutoModel - -model = AutoModel.from_pretrained( - "Qwen/Qwen-Image", subfolder="transformer" -) -``` - -## Model data types - -Use the `torch_dtype` argument in [`~ModelMixin.from_pretrained`] to load a model with a specific data type. This allows you to load a model in a lower precision to reduce memory usage. - -```py -import torch -from diffusers import QwenImageTransformer2DModel - -model = QwenImageTransformer2DModel.from_pretrained( - "Qwen/Qwen-Image", - subfolder="transformer", - torch_dtype=torch.bfloat16 -) -``` - -[nn.Module.to](https://docs.pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.to) can also convert to a specific data type on the fly. However, it converts *all* weights to the requested data type unlike `torch_dtype` which respects `_keep_in_fp32_modules`. This argument preserves layers in `torch.float32` for numerical stability and best generation quality (see example [_keep_in_fp32_modules](https://github.com/huggingface/diffusers/blob/f864a9a352fa4a220d860bfdd1782e3e5af96382/src/diffusers/models/transformers/transformer_wan.py#L374)) - -```py -from diffusers import QwenImageTransformer2DModel - -model = QwenImageTransformer2DModel.from_pretrained( - "Qwen/Qwen-Image", subfolder="transformer" -) -model = model.to(dtype=torch.float16) -``` - -## Device placement - -Use the `device_map` argument in [`~ModelMixin.from_pretrained`] to place a model on an accelerator like a GPU. It is especially helpful where there are multiple GPUs. - -Diffusers currently provides three options to `device_map` for individual models, `"cuda"`, `"balanced"` and `"auto"`. Refer to the table below to compare the three placement strategies. - -| parameter | description | -|---|---| -| `"cuda"` | places pipeline on a supported accelerator (CUDA) | -| `"balanced"` | evenly distributes pipeline on all GPUs | -| `"auto"` | distribute model from fastest device first to slowest | - -Use the `max_memory` argument in [`~ModelMixin.from_pretrained`] to allocate a maximum amount of memory to use on each device. By default, Diffusers uses the maximum amount available. - -```py -import torch -from diffusers import QwenImagePipeline - -max_memory = {0: "16GB", 1: "16GB"} -pipeline = QwenImagePipeline.from_pretrained( - "Qwen/Qwen-Image", - torch_dtype=torch.bfloat16, - device_map="cuda", - max_memory=max_memory -) -``` - -The `hf_device_map` attribute allows you to access and view the `device_map`. - -```py -print(transformer.hf_device_map) -# {'': device(type='cuda')} -``` - -## Saving models - -Save a model with the [`~ModelMixin.save_pretrained`] method. - -```py -from diffusers import QwenImageTransformer2DModel - -model = QwenImageTransformer2DModel.from_pretrained("Qwen/Qwen-Image", subfolder="transformer") -model.save_pretrained("./local/model") -``` - -For large models, it is helpful to use `max_shard_size` to save a model as multiple shards. A shard can be loaded faster and save memory (refer to the [parallel loading](./loading#parallel-loading) docs for more details), especially if there is more than one GPU. - -```py -model.save_pretrained("./local/model", max_shard_size="5GB") -``` diff --git a/docs/source/en/using-diffusers/scheduler_features.md b/docs/source/en/using-diffusers/scheduler_features.md deleted file mode 100644 index f7977d53d5..0000000000 --- a/docs/source/en/using-diffusers/scheduler_features.md +++ /dev/null @@ -1,235 +0,0 @@ - - -# Scheduler features - -The scheduler is an important component of any diffusion model because it controls the entire denoising (or sampling) process. There are many types of schedulers, some are optimized for speed and some for quality. With Diffusers, you can modify the scheduler configuration to use custom noise schedules, sigmas, and rescale the noise schedule. Changing these parameters can have profound effects on inference quality and speed. - -This guide will demonstrate how to use these features to improve inference quality. - -> [!TIP] -> Diffusers currently only supports the `timesteps` and `sigmas` parameters for a select list of schedulers and pipelines. Feel free to open a [feature request](https://github.com/huggingface/diffusers/issues/new/choose) if you want to extend these parameters to a scheduler and pipeline that does not currently support it! - -## Timestep schedules - -The timestep or noise schedule determines the amount of noise at each sampling step. The scheduler uses this to generate an image with the corresponding amount of noise at each step. The timestep schedule is generated from the scheduler's default configuration, but you can customize the scheduler to use new and optimized sampling schedules that aren't in Diffusers yet. - -For example, [Align Your Steps (AYS)](https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/) is a method for optimizing a sampling schedule to generate a high-quality image in as little as 10 steps. The optimal [10-step schedule](https://github.com/huggingface/diffusers/blob/a7bf77fc284810483f1e60afe34d1d27ad91ce2e/src/diffusers/schedulers/scheduling_utils.py#L51) for Stable Diffusion XL is: - -```py -from diffusers.schedulers import AysSchedules - -sampling_schedule = AysSchedules["StableDiffusionXLTimesteps"] -print(sampling_schedule) -"[999, 845, 730, 587, 443, 310, 193, 116, 53, 13]" -``` - -You can use the AYS sampling schedule in a pipeline by passing it to the `timesteps` parameter. - -```py -pipeline = StableDiffusionXLPipeline.from_pretrained( - "SG161222/RealVisXL_V4.0", - torch_dtype=torch.float16, - variant="fp16", -).to("cuda") -pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, algorithm_type="sde-dpmsolver++") - -prompt = "A cinematic shot of a cute little rabbit wearing a jacket and doing a thumbs up" -generator = torch.Generator(device="cpu").manual_seed(2487854446) -image = pipeline( - prompt=prompt, - negative_prompt="", - generator=generator, - timesteps=sampling_schedule, -).images[0] -``` - -
-
- -
AYS timestep schedule 10 steps
-
-
- -
Linearly-spaced timestep schedule 10 steps
-
-
- -
Linearly-spaced timestep schedule 25 steps
-
-
- -## Timestep spacing - -The way sample steps are selected in the schedule can affect the quality of the generated image, especially with respect to [rescaling the noise schedule](#rescale-noise-schedule), which can enable a model to generate much brighter or darker images. Diffusers provides three timestep spacing methods: - -- `leading` creates evenly spaced steps -- `linspace` includes the first and last steps and evenly selects the remaining intermediate steps -- `trailing` only includes the last step and evenly selects the remaining intermediate steps starting from the end - -It is recommended to use the `trailing` spacing method because it generates higher quality images with more details when there are fewer sample steps. But the difference in quality is not as obvious for more standard sample step values. - -```py -import torch -from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler - -pipeline = StableDiffusionXLPipeline.from_pretrained( - "SG161222/RealVisXL_V4.0", - torch_dtype=torch.float16, - variant="fp16", -).to("cuda") -pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing") - -prompt = "A cinematic shot of a cute little black cat sitting on a pumpkin at night" -generator = torch.Generator(device="cpu").manual_seed(2487854446) -image = pipeline( - prompt=prompt, - negative_prompt="", - generator=generator, - num_inference_steps=5, -).images[0] -image -``` - -
-
- -
trailing spacing after 5 steps
-
-
- -
leading spacing after 5 steps
-
-
- -## Sigmas - -The `sigmas` parameter is the amount of noise added at each timestep according to the timestep schedule. Like the `timesteps` parameter, you can customize the `sigmas` parameter to control how much noise is added at each step. When you use a custom `sigmas` value, the `timesteps` are calculated from the custom `sigmas` value and the default scheduler configuration is ignored. - -For example, you can manually pass the [sigmas](https://github.com/huggingface/diffusers/blob/6529ee67ec02fcf58d2fd9242164ea002b351d75/src/diffusers/schedulers/scheduling_utils.py#L55) for something like the 10-step AYS schedule from before to the pipeline. - -```py -import torch - -from diffusers import DiffusionPipeline, EulerDiscreteScheduler - -model_id = "stabilityai/stable-diffusion-xl-base-1.0" -pipeline = DiffusionPipeline.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0", - torch_dtype=torch.float16, - variant="fp16", -).to("cuda") -pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config) - -sigmas = [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.0] -prompt = "anthropomorphic capybara wearing a suit and working with a computer" -generator = torch.Generator(device='cuda').manual_seed(123) -image = pipeline( - prompt=prompt, - num_inference_steps=10, - sigmas=sigmas, - generator=generator -).images[0] -``` - -When you take a look at the scheduler's `timesteps` parameter, you'll see that it is the same as the AYS timestep schedule because the `timestep` schedule is calculated from the `sigmas`. - -```py -print(f" timesteps: {pipe.scheduler.timesteps}") -"timesteps: tensor([999., 845., 730., 587., 443., 310., 193., 116., 53., 13.], device='cuda:0')" -``` - -### Karras sigmas - -> [!TIP] -> Refer to the scheduler API [overview](../api/schedulers/overview) for a list of schedulers that support Karras sigmas. -> -> Karras sigmas should not be used for models that weren't trained with them. For example, the base Stable Diffusion XL model shouldn't use Karras sigmas but the [DreamShaperXL](https://hf.co/Lykon/dreamshaper-xl-1-0) model can since they are trained with Karras sigmas. - -Karras scheduler's use the timestep schedule and sigmas from the [Elucidating the Design Space of Diffusion-Based Generative Models](https://hf.co/papers/2206.00364) paper. This scheduler variant applies a smaller amount of noise per step as it approaches the end of the sampling process compared to other schedulers, and can increase the level of details in the generated image. - -Enable Karras sigmas by setting `use_karras_sigmas=True` in the scheduler. - -```py -import torch -from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler - -pipeline = StableDiffusionXLPipeline.from_pretrained( - "SG161222/RealVisXL_V4.0", - torch_dtype=torch.float16, - variant="fp16", -).to("cuda") -pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True) - -prompt = "A cinematic shot of a cute little rabbit wearing a jacket and doing a thumbs up" -generator = torch.Generator(device="cpu").manual_seed(2487854446) -image = pipeline( - prompt=prompt, - negative_prompt="", - generator=generator, -).images[0] -``` - -
-
- -
Karras sigmas enabled
-
-
- -
Karras sigmas disabled
-
-
- -## Rescale noise schedule - -In the [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://hf.co/papers/2305.08891) paper, the authors discovered that common noise schedules allowed some signal to leak into the last timestep. This signal leakage at inference can cause models to only generate images with medium brightness. By enforcing a zero signal-to-noise ratio (SNR) for the timstep schedule and sampling from the last timestep, the model can be improved to generate very bright or dark images. - -> [!TIP] -> For inference, you need a model that has been trained with *v_prediction*. To train your own model with *v_prediction*, add the following flag to the [train_text_to_image.py](https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image.py) or [train_text_to_image_lora.py](https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image_lora.py) scripts. -> -> ```bash -> --prediction_type="v_prediction" -> ``` - -For example, load the [ptx0/pseudo-journey-v2](https://hf.co/ptx0/pseudo-journey-v2) checkpoint which was trained with `v_prediction` and the [`DDIMScheduler`]. Configure the following parameters in the [`DDIMScheduler`]: - -* `rescale_betas_zero_snr=True` to rescale the noise schedule to zero SNR -* `timestep_spacing="trailing"` to start sampling from the last timestep - -Set `guidance_rescale` in the pipeline to prevent over-exposure. A lower value increases brightness but some of the details may appear washed out. - -```py -from diffusers import DiffusionPipeline, DDIMScheduler - -pipeline = DiffusionPipeline.from_pretrained("ptx0/pseudo-journey-v2", use_safetensors=True) - -pipeline.scheduler = DDIMScheduler.from_config( - pipeline.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing" -) -pipeline.to("cuda") -prompt = "cinematic photo of a snowy mountain at night with the northern lights aurora borealis overhead, 35mm photograph, film, professional, 4k, highly detailed" -generator = torch.Generator(device="cpu").manual_seed(23) -image = pipeline(prompt, guidance_rescale=0.7, generator=generator).images[0] -image -``` - -
-
- -
default Stable Diffusion v2-1 image
-
-
- -
image with zero SNR and trailing timestep spacing enabled
-
-
diff --git a/docs/source/en/using-diffusers/schedulers.md b/docs/source/en/using-diffusers/schedulers.md index 6d928f8037..0e236e4e3e 100644 --- a/docs/source/en/using-diffusers/schedulers.md +++ b/docs/source/en/using-diffusers/schedulers.md @@ -10,200 +10,273 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# Load schedulers and models - [[open-in-colab]] -Diffusion pipelines are a collection of interchangeable schedulers and models that can be mixed and matched to tailor a pipeline to a specific use case. The scheduler encapsulates the entire denoising process such as the number of denoising steps and the algorithm for finding the denoised sample. A scheduler is not parameterized or trained so they don't take very much memory. The model is usually only concerned with the forward pass of going from a noisy input to a less noisy sample. +# Schedulers -This guide will show you how to load schedulers and models to customize a pipeline. You'll use the [stable-diffusion-v1-5/stable-diffusion-v1-5](https://hf.co/stable-diffusion-v1-5/stable-diffusion-v1-5) checkpoint throughout this guide, so let's load it first. +A scheduler is an algorithm that provides instructions to the denoising process such as how much noise to remove at a certain step. It takes the model prediction from step *t* and applies an update for how to compute the next sample at step *t-1*. Different schedulers produce different results; some are faster while others are more accurate. + +Diffusers supports many schedulers and allows you to modify their timestep schedules, timestep spacing, and more, to generate high-quality images in fewer steps. + +This guide will show you how to load and customize schedulers. + +## Loading schedulers + +Schedulers don't have any parameters and are defined in a configuration file. Access the `.scheduler` attribute of a pipeline to view the configuration. ```py import torch from diffusers import DiffusionPipeline pipeline = DiffusionPipeline.from_pretrained( - "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True -).to("cuda") -``` - -You can see what scheduler this pipeline uses with the `pipeline.scheduler` attribute. - -```py + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, device_map="cuda" +) pipeline.scheduler -PNDMScheduler { - "_class_name": "PNDMScheduler", - "_diffusers_version": "0.21.4", - "beta_end": 0.012, - "beta_schedule": "scaled_linear", - "beta_start": 0.00085, - "clip_sample": false, - "num_train_timesteps": 1000, - "set_alpha_to_one": false, - "skip_prk_steps": true, - "steps_offset": 1, - "timestep_spacing": "leading", - "trained_betas": null -} ``` -## Load a scheduler - -Schedulers are defined by a configuration file that can be used by a variety of schedulers. Load a scheduler with the [`SchedulerMixin.from_pretrained`] method, and specify the `subfolder` parameter to load the configuration file into the correct subfolder of the pipeline repository. - -For example, to load the [`DDIMScheduler`]: - -```py -from diffusers import DDIMScheduler, DiffusionPipeline - -ddim = DDIMScheduler.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="scheduler") -``` - -Then you can pass the newly loaded scheduler to the pipeline. - -```python -pipeline = DiffusionPipeline.from_pretrained( - "stable-diffusion-v1-5/stable-diffusion-v1-5", scheduler=ddim, torch_dtype=torch.float16, use_safetensors=True -).to("cuda") -``` - -## Compare schedulers - -Schedulers have their own unique strengths and weaknesses, making it difficult to quantitatively compare which scheduler works best for a pipeline. You typically have to make a trade-off between denoising speed and denoising quality. We recommend trying out different schedulers to find one that works best for your use case. Call the `pipeline.scheduler.compatibles` attribute to see what schedulers are compatible with a pipeline. - -Let's compare the [`LMSDiscreteScheduler`], [`EulerDiscreteScheduler`], [`EulerAncestralDiscreteScheduler`], and the [`DPMSolverMultistepScheduler`] on the following prompt and seed. - -```py -import torch -from diffusers import DiffusionPipeline - -pipeline = DiffusionPipeline.from_pretrained( - "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True -).to("cuda") - -prompt = "A photograph of an astronaut riding a horse on Mars, high resolution, high definition." -generator = torch.Generator(device="cuda").manual_seed(8) -``` - -To change the pipelines scheduler, use the [`~ConfigMixin.from_config`] method to load a different scheduler's `pipeline.scheduler.config` into the pipeline. - - - - -[`LMSDiscreteScheduler`] typically generates higher quality images than the default scheduler. - -```py -from diffusers import LMSDiscreteScheduler - -pipeline.scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config) -image = pipeline(prompt, generator=generator).images[0] -image -``` - - - - -[`EulerDiscreteScheduler`] can generate higher quality images in just 30 steps. - -```py -from diffusers import EulerDiscreteScheduler - -pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config) -image = pipeline(prompt, generator=generator).images[0] -image -``` - - - - -[`EulerAncestralDiscreteScheduler`] can generate higher quality images in just 30 steps. - -```py -from diffusers import EulerAncestralDiscreteScheduler - -pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config) -image = pipeline(prompt, generator=generator).images[0] -image -``` - - - - -[`DPMSolverMultistepScheduler`] provides a balance between speed and quality and can generate higher quality images in just 20 steps. +Load a different scheduler with [`~SchedulerMixin.from_pretrained`] and specify the `subfolder` argument to load the configuration file into the correct subfolder of the pipeline repository. Pass the new scheduler to the existing pipeline. ```py from diffusers import DPMSolverMultistepScheduler -pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config) -image = pipeline(prompt, generator=generator).images[0] +dpm = DPMSolverMultistepScheduler.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler" +) +pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + scheduler=dpm, + torch_dtype=torch.float16, + device_map="cuda" +) +pipeline.scheduler +``` + +## Timestep schedules + +Timestep or noise schedule decides how noise is distributed over the denoising process. The schedule can be linear or more concentrated toward the beginning or end. It is a precomputed sequence of noise levels generated from the scheduler's default configuration, but it can be customized to use other schedules. + +> [!TIP] +> The `timesteps` argument is only supported for a select list of schedulers and pipelines. Feel free to open a feature request if you want to extend these parameters to a scheduler and pipeline that does not currently support it! + +The example below uses the [Align Your Steps (AYS)](https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/) schedule which can generate a high-quality image in 10 steps, significantly speeding up generation and reducing computation time. + +Import the schedule and pass it to the `timesteps` argument in the pipeline. + +```py +import torch +from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler +from diffusers.schedulers import AysSchedules + +sampling_schedule = AysSchedules["StableDiffusionXLTimesteps"] +print(sampling_schedule) +"[999, 845, 730, 587, 443, 310, 193, 116, 53, 13]" + +pipeline = DiffusionPipeline.from_pretrained( + "SG161222/RealVisXL_V4.0", + torch_dtype=torch.float16, + device_map="cuda" +) +pipeline.scheduler = DPMSolverMultistepScheduler.from_config( + pipeline.scheduler.config, algorithm_type="sde-dpmsolver++" +) + +prompt = "A cinematic shot of a cute little rabbit wearing a jacket and doing a thumbs up" +image = pipeline( + prompt=prompt, + negative_prompt="", + timesteps=sampling_schedule, +).images[0] +``` + +
+
+ +
AYS timestep schedule 10 steps
+
+
+ +
Linearly-spaced timestep schedule 10 steps
+
+
+ +
Linearly-spaced timestep schedule 25 steps
+
+
+ +### Rescaling schedules + +Denoising should begin with pure noise and the signal-to-noise (SNR) ration should be zero. However, some models don't actually start from pure noise which makes it difficult to generate images at brightness extremes. + +> [!TIP] +> Train your own model with `v_prediction` by adding the `--prediction_type="v_prediction"` flag to your training script. You can also [search](https://huggingface.co/search/full-text?q=v_prediction&type=model) for existing models trained with `v_prediction`. + +To fix this, a model must be trained with `v_prediction`. If a model is trained with `v_prediction`, then enable the following arguments in the scheduler. + +- Set `rescale_betas_zero_snr=True` to rescale the noise schedule to the very last timestep with exactly zero SNR +- Set `timestep_spacing="trailing"` to force sampling from the last timestep with pure noise + +```py +from diffusers import DiffusionPipeline, DDIMScheduler + +pipeline = DiffusionPipeline.from_pretrained("ptx0/pseudo-journey-v2", device_map="cuda") + +pipeline.scheduler = DDIMScheduler.from_config( + pipeline.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing" +) +``` + +Set `guidance_rescale` in the pipeline to avoid overexposed images. A lower value increases brightness, but some details may appear washed out. + +```py +prompt = """ +cinematic photo of a snowy mountain at night with the northern lights aurora borealis +overhead, 35mm photograph, film, professional, 4k, highly detailed +""" +image = pipeline(prompt, guidance_rescale=0.7).images[0] +``` + +
+
+ +
default Stable Diffusion v2-1 image
+
+
+ +
image with zero SNR and trailing timestep spacing enabled
+
+
+ +## Timestep spacing + +Timestep spacing refers to the specific steps *t* to sample from from the schedule. Diffusers provides three spacing types as shown below. + +| spacing strategy | spacing calculation | example timesteps | +|---|---|---| +| `leading` | evenly spaced steps | `[900, 800, 700, ..., 100, 0]` | +| `linspace` | include first and last steps and evenly divide remaining intermediate steps | `[1000, 888.89, 777.78, ..., 111.11, 0]` | +| `trailing` | include last step and evenly divide remaining intermediate steps beginning from the end | `[999, 899, 799, 699, 599, 499, 399, 299, 199, 99]` | + +Pass the spacing strategy to the `timestep_spacing` argument in the scheduler. + +> [!TIP] +> The `trailing` strategy typically produces higher quality images with more details with fewer steps, but the difference in quality is not as obvious for more standard step values. + +```py +import torch +from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler + +pipeline = DiffusionPipeline.from_pretrained( + "SG161222/RealVisXL_V4.0", + torch_dtype=torch.float16, + device_map="cuda" +) +pipeline.scheduler = DPMSolverMultistepScheduler.from_config( + pipeline.scheduler.config, timestep_spacing="trailing" +) + +prompt = "A cinematic shot of a cute little black cat sitting on a pumpkin at night" +image = pipeline( + prompt=prompt, + negative_prompt="", + num_inference_steps=5, +).images[0] image ``` -
-
-
- -
LMSDiscreteScheduler
+ +
trailing spacing after 5 steps
- -
EulerDiscreteScheduler
-
-
-
-
- -
EulerAncestralDiscreteScheduler
-
-
- -
DPMSolverMultistepScheduler
+ +
leading spacing after 5 steps
-Most images look very similar and are comparable in quality. Again, it often comes down to your specific use case so a good approach is to run multiple different schedulers and compare the results. +## Sigmas -## Models +Sigmas is a measure of how noisy a sample is at a certain step as defined by the schedule. When using custom `sigmas`, the `timesteps` are calculated from these values instead of the default scheduler configuration. -Models are loaded from the [`ModelMixin.from_pretrained`] method, which downloads and caches the latest version of the model weights and configurations. If the latest files are available in the local cache, [`~ModelMixin.from_pretrained`] reuses files in the cache instead of re-downloading them. +> [!TIP] +> The `sigmas` argument is only supported for a select list of schedulers and pipelines. Feel free to open a feature request if you want to extend these parameters to a scheduler and pipeline that does not currently support it! -Models can be loaded from a subfolder with the `subfolder` argument. For example, the model weights for [stable-diffusion-v1-5/stable-diffusion-v1-5](https://hf.co/stable-diffusion-v1-5/stable-diffusion-v1-5) are stored in the [unet](https://hf.co/stable-diffusion-v1-5/stable-diffusion-v1-5/tree/main/unet) subfolder. - -```python -from diffusers import UNet2DConditionModel - -unet = UNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet", use_safetensors=True) -``` - -They can also be directly loaded from a [repository](https://huggingface.co/google/ddpm-cifar10-32/tree/main). - -```python -from diffusers import UNet2DModel - -unet = UNet2DModel.from_pretrained("google/ddpm-cifar10-32", use_safetensors=True) -``` - -To load and save model variants, specify the `variant` argument in [`ModelMixin.from_pretrained`] and [`ModelMixin.save_pretrained`]. - -```python -from diffusers import UNet2DConditionModel - -unet = UNet2DConditionModel.from_pretrained( - "stable-diffusion-v1-5/stable-diffusion-v1-5", subfolder="unet", variant="non_ema", use_safetensors=True -) -unet.save_pretrained("./local-unet", variant="non_ema") -``` - -Use the `torch_dtype` argument in [`~ModelMixin.from_pretrained`] to specify the dtype to load a model in. +Pass the custom sigmas to the `sigmas` argument in the pipeline. The example below uses the [sigmas](https://github.com/huggingface/diffusers/blob/6529ee67ec02fcf58d2fd9242164ea002b351d75/src/diffusers/schedulers/scheduling_utils.py#L55) from the 10-step AYS schedule. ```py -from diffusers import AutoModel +import torch +from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler -unet = AutoModel.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", torch_dtype=torch.float16 +pipeline = DiffusionPipeline.from_pretrained( + "SG161222/RealVisXL_V4.0", + torch_dtype=torch.float16, + device_map="cuda" ) +pipeline.scheduler = DPMSolverMultistepScheduler.from_config( + pipeline.scheduler.config, algorithm_type="sde-dpmsolver++" +) + +sigmas = [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.0] +prompt = "A cinematic shot of a cute little rabbit wearing a jacket and doing a thumbs up" +image = pipeline( + prompt=prompt, + negative_prompt="", + sigmas=sigmas, +).images[0] ``` -You can also use the [torch.Tensor.to](https://docs.pytorch.org/docs/stable/generated/torch.Tensor.to.html) method to convert to the specified dtype on the fly. It converts *all* weights unlike the `torch_dtype` argument that respects the `_keep_in_fp32_modules`. This is important for models whose layers must remain in fp32 for numerical stability and best generation quality (see example [here](https://github.com/huggingface/diffusers/blob/f864a9a352fa4a220d860bfdd1782e3e5af96382/src/diffusers/models/transformers/transformer_wan.py#L374)). +### Karras sigmas + +[Karras sigmas](https://huggingface.co/papers/2206.00364) resamples the noise schedule for more efficient sampling by clustering sigmas more densely in the middle of the sequence where structure reconstruction is critical, while using fewer sigmas at the beginning and end where noise changes have less impact. This can increase the level of details in a generated image. + +Set `use_karras_sigmas=True` in the scheduler to enable it. + +```py +import torch +from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler + +pipeline = DiffusionPipeline.from_pretrained( + "SG161222/RealVisXL_V4.0", + torch_dtype=torch.float16, + device_map="cuda" +) +pipeline.scheduler = DPMSolverMultistepScheduler.from_config( + pipeline.scheduler.config, + algorithm_type="sde-dpmsolver++", + use_karras_sigmas=True, +) + +prompt = "A cinematic shot of a cute little rabbit wearing a jacket and doing a thumbs up" +image = pipeline( + prompt=prompt, + negative_prompt="", + sigmas=sigmas, +).images[0] +``` + +
+
+ +
Karras sigmas enabled
+
+
+ +
Karras sigmas disabled
+
+
+ +Refer to the scheduler API [overview](../api/schedulers/overview) for a list of schedulers that support Karras sigmas. It should only be used for models trained with Karras sigmas. + +## Choosing a scheduler + +It's important to try different schedulers to find the best one for your use case. Here are a few recommendations to help you get started. + +- DPM++ 2M SDE Karras is generally a good all-purpose option. +- [`TCDScheduler`] works well for distilled models. +- [`FlowMatchEulerDiscreteScheduler`] and [`FlowMatchHeunDiscreteScheduler`] for FlowMatch models. +- [`EulerDiscreteScheduler`] or [`EulerAncestralDiscreteScheduler`] for generating anime style images. +- DPM++ 2M paired with [`LCMScheduler`] on SDXL for generating realistic images. + +## Resources + +- Read the [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) paper for more details about rescaling the noise schedule to enforce zero SNR. \ No newline at end of file From 80de641c1c7973dd83cdb9ebb0946affb28e00f1 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Tue, 23 Sep 2025 19:31:42 +0200 Subject: [PATCH 2/6] Allow Automodel to support custom model code (#12353) * update * update --- src/diffusers/models/auto_model.py | 67 +++++++++++++------- src/diffusers/utils/dynamic_modules_utils.py | 5 ++ 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/src/diffusers/models/auto_model.py b/src/diffusers/models/auto_model.py index bfe386f1f6..ada0d54e54 100644 --- a/src/diffusers/models/auto_model.py +++ b/src/diffusers/models/auto_model.py @@ -19,6 +19,7 @@ from huggingface_hub.utils import validate_hf_hub_args from ..configuration_utils import ConfigMixin from ..utils import logging +from ..utils.dynamic_modules_utils import get_class_from_dynamic_module, resolve_trust_remote_code logger = logging.get_logger(__name__) @@ -114,6 +115,8 @@ class AutoModel(ConfigMixin): disable_mmap ('bool', *optional*, defaults to 'False'): Whether to disable mmap when loading a Safetensors model. This option can perform better when the model is on a network mount or hard drive, which may not handle the seeky-ness of mmap very well. + trust_remote_cocde (`bool`, *optional*, defaults to `False`): + Whether to trust remote code @@ -140,22 +143,22 @@ class AutoModel(ConfigMixin): You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. ``` """ - cache_dir = kwargs.pop("cache_dir", None) - force_download = kwargs.pop("force_download", False) - proxies = kwargs.pop("proxies", None) - token = kwargs.pop("token", None) - local_files_only = kwargs.pop("local_files_only", False) - revision = kwargs.pop("revision", None) subfolder = kwargs.pop("subfolder", None) + trust_remote_code = kwargs.pop("trust_remote_code", False) - load_config_kwargs = { - "cache_dir": cache_dir, - "force_download": force_download, - "proxies": proxies, - "token": token, - "local_files_only": local_files_only, - "revision": revision, - } + hub_kwargs_names = [ + "cache_dir", + "force_download", + "local_files_only", + "proxies", + "resume_download", + "revision", + "token", + ] + hub_kwargs = {name: kwargs.pop(name, None) for name in hub_kwargs_names} + + # load_config_kwargs uses the same hub kwargs minus subfolder and resume_download + load_config_kwargs = {k: v for k, v in hub_kwargs.items() if k not in ["subfolder", "resume_download"]} library = None orig_class_name = None @@ -189,15 +192,35 @@ class AutoModel(ConfigMixin): else: raise ValueError(f"Couldn't find model associated with the config file at {pretrained_model_or_path}.") - from ..pipelines.pipeline_loading_utils import ALL_IMPORTABLE_CLASSES, get_class_obj_and_candidates + has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"] + trust_remote_code = resolve_trust_remote_code(trust_remote_code, pretrained_model_or_path, has_remote_code) + if not (has_remote_code and trust_remote_code): + raise ValueError( + "Selected model repository does not happear to have any custom code or does not have a valid `config.json` file." + ) - model_cls, _ = get_class_obj_and_candidates( - library_name=library, - class_name=orig_class_name, - importable_classes=ALL_IMPORTABLE_CLASSES, - pipelines=None, - is_pipeline_module=False, - ) + if has_remote_code and trust_remote_code: + class_ref = config["auto_map"][cls.__name__] + module_file, class_name = class_ref.split(".") + module_file = module_file + ".py" + model_cls = get_class_from_dynamic_module( + pretrained_model_or_path, + subfolder=subfolder, + module_file=module_file, + class_name=class_name, + **hub_kwargs, + **kwargs, + ) + else: + from ..pipelines.pipeline_loading_utils import ALL_IMPORTABLE_CLASSES, get_class_obj_and_candidates + + model_cls, _ = get_class_obj_and_candidates( + library_name=library, + class_name=orig_class_name, + importable_classes=ALL_IMPORTABLE_CLASSES, + pipelines=None, + is_pipeline_module=False, + ) if model_cls is None: raise ValueError(f"AutoModel can't find a model linked to {orig_class_name}.") diff --git a/src/diffusers/utils/dynamic_modules_utils.py b/src/diffusers/utils/dynamic_modules_utils.py index 674eb65773..de947a12e2 100644 --- a/src/diffusers/utils/dynamic_modules_utils.py +++ b/src/diffusers/utils/dynamic_modules_utils.py @@ -247,6 +247,7 @@ def find_pipeline_class(loaded_module): def get_cached_module_file( pretrained_model_name_or_path: Union[str, os.PathLike], module_file: str, + subfolder: Optional[str] = None, cache_dir: Optional[Union[str, os.PathLike]] = None, force_download: bool = False, proxies: Optional[Dict[str, str]] = None, @@ -353,6 +354,7 @@ def get_cached_module_file( resolved_module_file = hf_hub_download( pretrained_model_name_or_path, module_file, + subfolder=subfolder, cache_dir=cache_dir, force_download=force_download, proxies=proxies, @@ -410,6 +412,7 @@ def get_cached_module_file( get_cached_module_file( pretrained_model_name_or_path, f"{module_needed}.py", + subfolder=subfolder, cache_dir=cache_dir, force_download=force_download, proxies=proxies, @@ -424,6 +427,7 @@ def get_cached_module_file( def get_class_from_dynamic_module( pretrained_model_name_or_path: Union[str, os.PathLike], module_file: str, + subfolder: Optional[str] = None, class_name: Optional[str] = None, cache_dir: Optional[Union[str, os.PathLike]] = None, force_download: bool = False, @@ -497,6 +501,7 @@ def get_class_from_dynamic_module( final_module = get_cached_module_file( pretrained_model_name_or_path, module_file, + subfolder=subfolder, cache_dir=cache_dir, force_download=force_download, proxies=proxies, From a72bc0c4bb817d382a59b38bba8d9a71661f56cb Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Tue, 23 Sep 2025 10:59:46 -0700 Subject: [PATCH 3/6] [docs] Attention backends (#12320) * init * feedback * update * feedback * fixes --- docs/source/en/_toctree.yml | 2 + .../en/optimization/attention_backends.md | 106 ++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 docs/source/en/optimization/attention_backends.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 856874d519..4879a7bf04 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -64,6 +64,8 @@ title: Accelerate inference - local: optimization/cache title: Caching + - local: optimization/attention_backends + title: Attention backends - local: optimization/memory title: Reduce memory usage - local: optimization/speed-memory-optims diff --git a/docs/source/en/optimization/attention_backends.md b/docs/source/en/optimization/attention_backends.md new file mode 100644 index 0000000000..04c8b4ba92 --- /dev/null +++ b/docs/source/en/optimization/attention_backends.md @@ -0,0 +1,106 @@ + + +# Attention backends + +> [!TIP] +> The attention dispatcher is an experimental feature. Please open an issue if you have any feedback or encounter any problems. + +Diffusers provides several optimized attention algorithms that are more memory and computationally efficient through it's *attention dispatcher*. The dispatcher acts as a router for managing and switching between different attention implementations and provides a unified interface for interacting with them. + +Refer to the table below for an overview of the available attention families and to the [Available backends](#available-backends) section for a more complete list. + +| attention family | main feature | +|---|---| +| FlashAttention | minimizes memory reads/writes through tiling and recomputation | +| SageAttention | quantizes attention to int8 | +| PyTorch native | built-in PyTorch implementation using [scaled_dot_product_attention](./fp16#scaled-dot-product-attention) | +| xFormers | memory-efficient attention with support for various attention kernels | + +This guide will show you how to set and use the different attention backends. + +## set_attention_backend + +The [`~ModelMixin.set_attention_backend`] method iterates through all the modules in the model and sets the appropriate attention backend to use. The attention backend setting persists until [`~ModelMixin.reset_attention_backend`] is called. + +The example below demonstrates how to enable the `_flash_3_hub` implementation for FlashAttention-3 from the [kernel](https://github.com/huggingface/kernels) library, which allows you to instantly use optimized compute kernels from the Hub without requiring any setup. + +> [!TIP] +> FlashAttention-3 is not supported for non-Hopper architectures, in which case, use FlashAttention with `set_attention_backend("flash")`. + +```py +import torch +from diffusers import QwenImagePipeline + +pipeline = QwenImagePipeline.from_pretrained( + "Qwen/Qwen-Image", torch_dtype=torch.bfloat16, device_map="cuda" +) +pipeline.transformer.set_attention_backend("_flash_3_hub") + +prompt = """ +cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California +highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain +""" +pipeline(prompt).images[0] +``` + +To restore the default attention backend, call [`~ModelMixin.reset_attention_backend`]. + +```py +pipeline.transformer.reset_attention_backend() +``` + +## attention_backend context manager + +The [attention_backend](https://github.com/huggingface/diffusers/blob/5e181eddfe7e44c1444a2511b0d8e21d177850a0/src/diffusers/models/attention_dispatch.py#L225) context manager temporarily sets an attention backend for a model within the context. Outside the context, the default attention (PyTorch's native scaled dot product attention) is used. This is useful if you want to use different backends for different parts of a pipeline or if you want to test the different backends. + +```py +import torch +from diffusers import QwenImagePipeline + +pipeline = QwenImagePipeline.from_pretrained( + "Qwen/Qwen-Image", torch_dtype=torch.bfloat16, device_map="cuda" +) +prompt = """ +cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California +highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain +""" + +with attention_backend("_flash_3_hub"): + image = pipeline(prompt).images[0] +``` + +## Available backends + +Refer to the table below for a complete list of available attention backends and their variants. + +| Backend Name | Family | Description | +|--------------|--------|-------------| +| `native` | [PyTorch native](https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend) | Default backend using PyTorch's scaled_dot_product_attention | +| `flex` | [FlexAttention](https://docs.pytorch.org/docs/stable/nn.attention.flex_attention.html#module-torch.nn.attention.flex_attention) | PyTorch FlexAttention implementation | +| `_native_cudnn` | [PyTorch native](https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend) | CuDNN-optimized attention | +| `_native_efficient` | [PyTorch native](https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend) | Memory-efficient attention | +| `_native_flash` | [PyTorch native](https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend) | PyTorch's FlashAttention | +| `_native_math` | [PyTorch native](https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend) | Math-based attention (fallback) | +| `_native_npu` | [PyTorch native](https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend) | NPU-optimized attention | +| `_native_xla` | [PyTorch native](https://docs.pytorch.org/docs/stable/generated/torch.nn.attention.SDPBackend.html#torch.nn.attention.SDPBackend) | XLA-optimized attention | +| `flash` | [FlashAttention](https://github.com/Dao-AILab/flash-attention) | FlashAttention-2 | +| `flash_varlen` | [FlashAttention](https://github.com/Dao-AILab/flash-attention) | Variable length FlashAttention | +| `_flash_3` | [FlashAttention](https://github.com/Dao-AILab/flash-attention) | FlashAttention-3 | +| `_flash_varlen_3` | [FlashAttention](https://github.com/Dao-AILab/flash-attention) | Variable length FlashAttention-3 | +| `_flash_3_hub` | [FlashAttention](https://github.com/Dao-AILab/flash-attention) | FlashAttention-3 from kernels | +| `sage` | [SageAttention](https://github.com/thu-ml/SageAttention) | Quantized attention (INT8 QK) | +| `sage_varlen` | [SageAttention](https://github.com/thu-ml/SageAttention) | Variable length SageAttention | +| `_sage_qk_int8_pv_fp8_cuda` | [SageAttention](https://github.com/thu-ml/SageAttention) | INT8 QK + FP8 PV (CUDA) | +| `_sage_qk_int8_pv_fp8_cuda_sm90` | [SageAttention](https://github.com/thu-ml/SageAttention) | INT8 QK + FP8 PV (SM90) | +| `_sage_qk_int8_pv_fp16_cuda` | [SageAttention](https://github.com/thu-ml/SageAttention) | INT8 QK + FP16 PV (CUDA) | +| `_sage_qk_int8_pv_fp16_triton` | [SageAttention](https://github.com/thu-ml/SageAttention) | INT8 QK + FP16 PV (Triton) | +| `xformers` | [xFormers](https://github.com/facebookresearch/xformers) | Memory-efficient attention | From 09e777a3e13cf811e35da57abfe6ce239d9b0f15 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Wed, 24 Sep 2025 08:36:50 +0530 Subject: [PATCH 4/6] [tests] Single scheduler in lora tests (#12315) * single scheduler please. * up * up * up --- tests/lora/test_lora_layers_auraflow.py | 1 - tests/lora/test_lora_layers_cogvideox.py | 2 - tests/lora/test_lora_layers_cogview4.py | 36 +- tests/lora/test_lora_layers_flux.py | 6 +- tests/lora/test_lora_layers_hunyuanvideo.py | 1 - tests/lora/test_lora_layers_ltx_video.py | 1 - tests/lora/test_lora_layers_lumina2.py | 46 +- tests/lora/test_lora_layers_mochi.py | 1 - tests/lora/test_lora_layers_qwenimage.py | 1 - tests/lora/test_lora_layers_sana.py | 5 +- tests/lora/test_lora_layers_sd3.py | 1 - tests/lora/test_lora_layers_wan.py | 1 - tests/lora/test_lora_layers_wanvace.py | 4 +- tests/lora/utils.py | 2083 +++++++++---------- 14 files changed, 1045 insertions(+), 1144 deletions(-) diff --git a/tests/lora/test_lora_layers_auraflow.py b/tests/lora/test_lora_layers_auraflow.py index 67084dd6d0..91f63c4b56 100644 --- a/tests/lora/test_lora_layers_auraflow.py +++ b/tests/lora/test_lora_layers_auraflow.py @@ -43,7 +43,6 @@ from .utils import PeftLoraLoaderMixinTests # noqa: E402 class AuraFlowLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = AuraFlowPipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { diff --git a/tests/lora/test_lora_layers_cogvideox.py b/tests/lora/test_lora_layers_cogvideox.py index 16147f35c7..fa57b4c9c2 100644 --- a/tests/lora/test_lora_layers_cogvideox.py +++ b/tests/lora/test_lora_layers_cogvideox.py @@ -21,7 +21,6 @@ from transformers import AutoTokenizer, T5EncoderModel from diffusers import ( AutoencoderKLCogVideoX, - CogVideoXDDIMScheduler, CogVideoXDPMScheduler, CogVideoXPipeline, CogVideoXTransformer3DModel, @@ -44,7 +43,6 @@ class CogVideoXLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = CogVideoXPipeline scheduler_cls = CogVideoXDPMScheduler scheduler_kwargs = {"timestep_spacing": "trailing"} - scheduler_classes = [CogVideoXDDIMScheduler, CogVideoXDPMScheduler] transformer_kwargs = { "num_attention_heads": 4, diff --git a/tests/lora/test_lora_layers_cogview4.py b/tests/lora/test_lora_layers_cogview4.py index 3b8a56c403..9c62d2f0b8 100644 --- a/tests/lora/test_lora_layers_cogview4.py +++ b/tests/lora/test_lora_layers_cogview4.py @@ -50,7 +50,6 @@ class TokenizerWrapper: class CogView4LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = CogView4Pipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { @@ -124,30 +123,29 @@ class CogView4LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): """ Tests a simple usecase where users could use saving utilities for LoRA through save_pretrained """ - for scheduler_cls in self.scheduler_classes: - components, _, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, _, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - with tempfile.TemporaryDirectory() as tmpdirname: - pipe.save_pretrained(tmpdirname) + with tempfile.TemporaryDirectory() as tmpdirname: + pipe.save_pretrained(tmpdirname) - pipe_from_pretrained = self.pipeline_class.from_pretrained(tmpdirname) - pipe_from_pretrained.to(torch_device) + pipe_from_pretrained = self.pipeline_class.from_pretrained(tmpdirname) + pipe_from_pretrained.to(torch_device) - images_lora_save_pretrained = pipe_from_pretrained(**inputs, generator=torch.manual_seed(0))[0] + images_lora_save_pretrained = pipe_from_pretrained(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose(images_lora, images_lora_save_pretrained, atol=1e-3, rtol=1e-3), - "Loading from saved checkpoints should give same results.", - ) + self.assertTrue( + np.allclose(images_lora, images_lora_save_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results.", + ) @parameterized.expand([("block_level", True), ("leaf_level", False)]) @require_torch_accelerator diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py index e6048f509f..6c22a34889 100644 --- a/tests/lora/test_lora_layers_flux.py +++ b/tests/lora/test_lora_layers_flux.py @@ -55,9 +55,8 @@ from .utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa @require_peft_backend class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = FluxPipeline - scheduler_cls = FlowMatchEulerDiscreteScheduler() + scheduler_cls = FlowMatchEulerDiscreteScheduler scheduler_kwargs = {} - scheduler_classes = [FlowMatchEulerDiscreteScheduler] transformer_kwargs = { "patch_size": 1, "in_channels": 4, @@ -282,9 +281,8 @@ class FluxLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): class FluxControlLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = FluxControlPipeline - scheduler_cls = FlowMatchEulerDiscreteScheduler() + scheduler_cls = FlowMatchEulerDiscreteScheduler scheduler_kwargs = {} - scheduler_classes = [FlowMatchEulerDiscreteScheduler] transformer_kwargs = { "patch_size": 1, "in_channels": 8, diff --git a/tests/lora/test_lora_layers_hunyuanvideo.py b/tests/lora/test_lora_layers_hunyuanvideo.py index 62d045f836..7ea0f1fcc9 100644 --- a/tests/lora/test_lora_layers_hunyuanvideo.py +++ b/tests/lora/test_lora_layers_hunyuanvideo.py @@ -51,7 +51,6 @@ from .utils import PeftLoraLoaderMixinTests # noqa: E402 class HunyuanVideoLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = HunyuanVideoPipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { diff --git a/tests/lora/test_lora_layers_ltx_video.py b/tests/lora/test_lora_layers_ltx_video.py index a8ad30e448..6ab51a5e51 100644 --- a/tests/lora/test_lora_layers_ltx_video.py +++ b/tests/lora/test_lora_layers_ltx_video.py @@ -37,7 +37,6 @@ from .utils import PeftLoraLoaderMixinTests # noqa: E402 class LTXVideoLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = LTXPipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { diff --git a/tests/lora/test_lora_layers_lumina2.py b/tests/lora/test_lora_layers_lumina2.py index 0ebc831b11..0417b05b33 100644 --- a/tests/lora/test_lora_layers_lumina2.py +++ b/tests/lora/test_lora_layers_lumina2.py @@ -39,7 +39,6 @@ from .utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa class Lumina2LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = Lumina2Pipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { @@ -141,33 +140,30 @@ class Lumina2LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): strict=False, ) def test_lora_fuse_nan(self): - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" - ) + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") - # corrupt one LoRA weight with `inf` values - with torch.no_grad(): - pipe.transformer.layers[0].attn.to_q.lora_A["adapter-1"].weight += float("inf") + # corrupt one LoRA weight with `inf` values + with torch.no_grad(): + pipe.transformer.layers[0].attn.to_q.lora_A["adapter-1"].weight += float("inf") - # with `safe_fusing=True` we should see an Error - with self.assertRaises(ValueError): - pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, safe_fusing=True) + # with `safe_fusing=True` we should see an Error + with self.assertRaises(ValueError): + pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, safe_fusing=True) - # without we should not see an error, but every image will be black - pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, safe_fusing=False) - out = pipe(**inputs)[0] + # without we should not see an error, but every image will be black + pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, safe_fusing=False) + out = pipe(**inputs)[0] - self.assertTrue(np.isnan(out).all()) + self.assertTrue(np.isnan(out).all()) diff --git a/tests/lora/test_lora_layers_mochi.py b/tests/lora/test_lora_layers_mochi.py index 21cc5f11a3..7be81273db 100644 --- a/tests/lora/test_lora_layers_mochi.py +++ b/tests/lora/test_lora_layers_mochi.py @@ -37,7 +37,6 @@ from .utils import PeftLoraLoaderMixinTests # noqa: E402 class MochiLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = MochiPipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { diff --git a/tests/lora/test_lora_layers_qwenimage.py b/tests/lora/test_lora_layers_qwenimage.py index 44ef9b0a37..51de2f8e20 100644 --- a/tests/lora/test_lora_layers_qwenimage.py +++ b/tests/lora/test_lora_layers_qwenimage.py @@ -37,7 +37,6 @@ from .utils import PeftLoraLoaderMixinTests # noqa: E402 class QwenImageLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = QwenImagePipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { diff --git a/tests/lora/test_lora_layers_sana.py b/tests/lora/test_lora_layers_sana.py index a08908c610..3cdb28de75 100644 --- a/tests/lora/test_lora_layers_sana.py +++ b/tests/lora/test_lora_layers_sana.py @@ -31,9 +31,8 @@ from .utils import PeftLoraLoaderMixinTests # noqa: E402 @require_peft_backend class SanaLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = SanaPipeline - scheduler_cls = FlowMatchEulerDiscreteScheduler(shift=7.0) - scheduler_kwargs = {} - scheduler_classes = [FlowMatchEulerDiscreteScheduler] + scheduler_cls = FlowMatchEulerDiscreteScheduler + scheduler_kwargs = {"shift": 7.0} transformer_kwargs = { "patch_size": 1, "in_channels": 4, diff --git a/tests/lora/test_lora_layers_sd3.py b/tests/lora/test_lora_layers_sd3.py index 95f6f325e4..228460eaad 100644 --- a/tests/lora/test_lora_layers_sd3.py +++ b/tests/lora/test_lora_layers_sd3.py @@ -55,7 +55,6 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = StableDiffusion3Pipeline scheduler_cls = FlowMatchEulerDiscreteScheduler scheduler_kwargs = {} - scheduler_classes = [FlowMatchEulerDiscreteScheduler] transformer_kwargs = { "sample_size": 32, "patch_size": 1, diff --git a/tests/lora/test_lora_layers_wan.py b/tests/lora/test_lora_layers_wan.py index 0ba80d2be1..5734509b41 100644 --- a/tests/lora/test_lora_layers_wan.py +++ b/tests/lora/test_lora_layers_wan.py @@ -42,7 +42,6 @@ from .utils import PeftLoraLoaderMixinTests # noqa: E402 class WanLoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = WanPipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { diff --git a/tests/lora/test_lora_layers_wanvace.py b/tests/lora/test_lora_layers_wanvace.py index d8dde32dd8..c3244e150e 100644 --- a/tests/lora/test_lora_layers_wanvace.py +++ b/tests/lora/test_lora_layers_wanvace.py @@ -50,7 +50,6 @@ from .utils import PeftLoraLoaderMixinTests # noqa: E402 class WanVACELoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): pipeline_class = WanVACEPipeline scheduler_cls = FlowMatchEulerDiscreteScheduler - scheduler_classes = [FlowMatchEulerDiscreteScheduler] scheduler_kwargs = {} transformer_kwargs = { @@ -165,9 +164,8 @@ class WanVACELoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): @require_peft_version_greater("0.13.2") def test_lora_exclude_modules_wanvace(self): - scheduler_cls = self.scheduler_classes[0] exclude_module_name = "vace_blocks.0.proj_out" - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components).to(torch_device) _, _, inputs = self.get_dummy_inputs(with_generator=False) diff --git a/tests/lora/utils.py b/tests/lora/utils.py index 72c1dddaa2..ecaa553ce4 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -26,8 +26,6 @@ from parameterized import parameterized from diffusers import ( AutoencoderKL, - DDIMScheduler, - LCMScheduler, UNet2DConditionModel, ) from diffusers.utils import logging @@ -109,7 +107,6 @@ class PeftLoraLoaderMixinTests: scheduler_cls = None scheduler_kwargs = None - scheduler_classes = [DDIMScheduler, LCMScheduler] has_two_text_encoders = False has_three_text_encoders = False @@ -129,13 +126,13 @@ class PeftLoraLoaderMixinTests: text_encoder_target_modules = ["q_proj", "k_proj", "v_proj", "out_proj"] denoiser_target_modules = ["to_q", "to_k", "to_v", "to_out.0"] - def get_dummy_components(self, scheduler_cls=None, use_dora=False, lora_alpha=None): + def get_dummy_components(self, use_dora=False, lora_alpha=None): if self.unet_kwargs and self.transformer_kwargs: raise ValueError("Both `unet_kwargs` and `transformer_kwargs` cannot be specified.") if self.has_two_text_encoders and self.has_three_text_encoders: raise ValueError("Both `has_two_text_encoders` and `has_three_text_encoders` cannot be True.") - scheduler_cls = self.scheduler_cls if scheduler_cls is None else scheduler_cls + scheduler_cls = self.scheduler_cls rank = 4 lora_alpha = rank if lora_alpha is None else lora_alpha @@ -319,152 +316,143 @@ class PeftLoraLoaderMixinTests: """ Tests a simple inference and makes sure it works as expected """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + components, text_lora_config, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs() - output_no_lora = pipe(**inputs)[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + _, _, inputs = self.get_dummy_inputs() + output_no_lora = pipe(**inputs)[0] + self.assertTrue(output_no_lora.shape == self.output_shape) def test_simple_inference_with_text_lora(self): """ Tests a simple inference with lora attached on the text encoder and makes sure it works as expected """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) - output_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" - ) + output_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" + ) @require_peft_version_greater("0.13.1") def test_low_cpu_mem_usage_with_injection(self): """Tests if we can inject LoRA state dict with low_cpu_mem_usage.""" - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - inject_adapter_in_model(text_lora_config, pipe.text_encoder, low_cpu_mem_usage=True) + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + inject_adapter_in_model(text_lora_config, pipe.text_encoder, low_cpu_mem_usage=True) + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder.") + self.assertTrue( + "meta" in {p.device.type for p in pipe.text_encoder.parameters()}, + "The LoRA params should be on 'meta' device.", + ) + + te_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(pipe.text_encoder)) + set_peft_model_state_dict(pipe.text_encoder, te_state_dict, low_cpu_mem_usage=True) + self.assertTrue( + "meta" not in {p.device.type for p in pipe.text_encoder.parameters()}, + "No param should be on 'meta' device.", + ) + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + inject_adapter_in_model(denoiser_lora_config, denoiser, low_cpu_mem_usage=True) + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + self.assertTrue( + "meta" in {p.device.type for p in denoiser.parameters()}, "The LoRA params should be on 'meta' device." + ) + + denoiser_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(denoiser)) + set_peft_model_state_dict(denoiser, denoiser_state_dict, low_cpu_mem_usage=True) + self.assertTrue( + "meta" not in {p.device.type for p in denoiser.parameters()}, "No param should be on 'meta' device." + ) + + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + inject_adapter_in_model(text_lora_config, pipe.text_encoder_2, low_cpu_mem_usage=True) self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder." + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" ) self.assertTrue( - "meta" in {p.device.type for p in pipe.text_encoder.parameters()}, + "meta" in {p.device.type for p in pipe.text_encoder_2.parameters()}, "The LoRA params should be on 'meta' device.", ) - te_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(pipe.text_encoder)) - set_peft_model_state_dict(pipe.text_encoder, te_state_dict, low_cpu_mem_usage=True) + te2_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(pipe.text_encoder_2)) + set_peft_model_state_dict(pipe.text_encoder_2, te2_state_dict, low_cpu_mem_usage=True) self.assertTrue( - "meta" not in {p.device.type for p in pipe.text_encoder.parameters()}, + "meta" not in {p.device.type for p in pipe.text_encoder_2.parameters()}, "No param should be on 'meta' device.", ) - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - inject_adapter_in_model(denoiser_lora_config, denoiser, low_cpu_mem_usage=True) - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") - self.assertTrue( - "meta" in {p.device.type for p in denoiser.parameters()}, "The LoRA params should be on 'meta' device." - ) - - denoiser_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(denoiser)) - set_peft_model_state_dict(denoiser, denoiser_state_dict, low_cpu_mem_usage=True) - self.assertTrue( - "meta" not in {p.device.type for p in denoiser.parameters()}, "No param should be on 'meta' device." - ) - - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - inject_adapter_in_model(text_lora_config, pipe.text_encoder_2, low_cpu_mem_usage=True) - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - self.assertTrue( - "meta" in {p.device.type for p in pipe.text_encoder_2.parameters()}, - "The LoRA params should be on 'meta' device.", - ) - - te2_state_dict = initialize_dummy_state_dict(get_peft_model_state_dict(pipe.text_encoder_2)) - set_peft_model_state_dict(pipe.text_encoder_2, te2_state_dict, low_cpu_mem_usage=True) - self.assertTrue( - "meta" not in {p.device.type for p in pipe.text_encoder_2.parameters()}, - "No param should be on 'meta' device.", - ) - - _, _, inputs = self.get_dummy_inputs() - output_lora = pipe(**inputs)[0] - self.assertTrue(output_lora.shape == self.output_shape) + _, _, inputs = self.get_dummy_inputs() + output_lora = pipe(**inputs)[0] + self.assertTrue(output_lora.shape == self.output_shape) @require_peft_version_greater("0.13.1") @require_transformers_version_greater("4.45.2") def test_low_cpu_mem_usage_with_loading(self): """Tests if we can load LoRA state dict with low_cpu_mem_usage.""" + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + with tempfile.TemporaryDirectory() as tmpdirname: + modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) + lora_state_dicts = self._get_lora_state_dicts(modules_to_save) + self.pipeline_class.save_lora_weights( + save_directory=tmpdirname, safe_serialization=False, **lora_state_dicts + ) - with tempfile.TemporaryDirectory() as tmpdirname: - modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) - lora_state_dicts = self._get_lora_state_dicts(modules_to_save) - self.pipeline_class.save_lora_weights( - save_directory=tmpdirname, safe_serialization=False, **lora_state_dicts - ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) + pipe.unload_lora_weights() + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"), low_cpu_mem_usage=False) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) - pipe.unload_lora_weights() - pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"), low_cpu_mem_usage=False) + for module_name, module in modules_to_save.items(): + self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") - for module_name, module in modules_to_save.items(): - self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") + images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results.", + ) - images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), - "Loading from saved checkpoints should give same results.", - ) + # Now, check for `low_cpu_mem_usage.` + pipe.unload_lora_weights() + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"), low_cpu_mem_usage=True) - # Now, check for `low_cpu_mem_usage.` - pipe.unload_lora_weights() - pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin"), low_cpu_mem_usage=True) + for module_name, module in modules_to_save.items(): + self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") - for module_name, module in modules_to_save.items(): - self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") - - images_lora_from_pretrained_low_cpu = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose( - images_lora_from_pretrained_low_cpu, images_lora_from_pretrained, atol=1e-3, rtol=1e-3 - ), - "Loading from saved checkpoints with `low_cpu_mem_usage` should give same results.", - ) + images_lora_from_pretrained_low_cpu = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + np.allclose(images_lora_from_pretrained_low_cpu, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints with `low_cpu_mem_usage` should give same results.", + ) def test_simple_inference_with_text_lora_and_scale(self): """ @@ -472,411 +460,393 @@ class PeftLoraLoaderMixinTests: and makes sure it works as expected """ attention_kwargs_name = determine_attention_kwargs_name(self.pipeline_class) + components, text_lora_config, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) + output_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" + ) - output_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" - ) + attention_kwargs = {attention_kwargs_name: {"scale": 0.5}} + output_lora_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - attention_kwargs = {attention_kwargs_name: {"scale": 0.5}} - output_lora_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] + self.assertTrue( + not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3), + "Lora + scale should change the output", + ) - self.assertTrue( - not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3), - "Lora + scale should change the output", - ) + attention_kwargs = {attention_kwargs_name: {"scale": 0.0}} + output_lora_0_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - attention_kwargs = {attention_kwargs_name: {"scale": 0.0}} - output_lora_0_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - - self.assertTrue( - np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3), - "Lora + 0 scale should lead to same result as no LoRA", - ) + self.assertTrue( + np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3), + "Lora + 0 scale should lead to same result as no LoRA", + ) def test_simple_inference_with_text_lora_fused(self): """ Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model and makes sure it works as expected """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) - pipe.fuse_lora() - # Fusing should still keep the LoRA layers - self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + pipe.fuse_lora() + # Fusing should still keep the LoRA layers + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) - ouput_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output" - ) + ouput_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(ouput_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output" + ) def test_simple_inference_with_text_lora_unloaded(self): """ Tests a simple inference with lora attached to text encoder, then unloads the lora weights and makes sure it works as expected """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) - pipe.unload_lora_weights() - # unloading should remove the LoRA layers - self.assertFalse( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder" - ) + pipe.unload_lora_weights() + # unloading should remove the LoRA layers + self.assertFalse(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder") - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - self.assertFalse( - check_if_lora_correctly_set(pipe.text_encoder_2), - "Lora not correctly unloaded in text encoder 2", - ) + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + self.assertFalse( + check_if_lora_correctly_set(pipe.text_encoder_2), + "Lora not correctly unloaded in text encoder 2", + ) - ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), - "Fused lora should change the output", - ) + ouput_unloaded = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + np.allclose(ouput_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), + "Fused lora should change the output", + ) def test_simple_inference_with_text_lora_save_load(self): """ Tests a simple usecase where users could use saving utilities for LoRA. """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) - images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - with tempfile.TemporaryDirectory() as tmpdirname: - modules_to_save = self._get_modules_to_save(pipe) - lora_state_dicts = self._get_lora_state_dicts(modules_to_save) + with tempfile.TemporaryDirectory() as tmpdirname: + modules_to_save = self._get_modules_to_save(pipe) + lora_state_dicts = self._get_lora_state_dicts(modules_to_save) - self.pipeline_class.save_lora_weights( - save_directory=tmpdirname, safe_serialization=False, **lora_state_dicts - ) - - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) - pipe.unload_lora_weights() - pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) - - for module_name, module in modules_to_save.items(): - self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") - - images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0))[0] - - self.assertTrue( - np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), - "Loading from saved checkpoints should give same results.", + self.pipeline_class.save_lora_weights( + save_directory=tmpdirname, safe_serialization=False, **lora_state_dicts ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) + pipe.unload_lora_weights() + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) + + for module_name, module in modules_to_save.items(): + self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") + + images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0))[0] + + self.assertTrue( + np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results.", + ) + def test_simple_inference_with_partial_text_lora(self): """ Tests a simple inference with lora attached on the text encoder with different ranks and some adapters removed and makes sure it works as expected """ - for scheduler_cls in self.scheduler_classes: - components, _, _ = self.get_dummy_components(scheduler_cls) - # Verify `StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder` handles different ranks per module (PR#8324). - text_lora_config = LoraConfig( - r=4, - rank_pattern={self.text_encoder_target_modules[i]: i + 1 for i in range(3)}, - lora_alpha=4, - target_modules=self.text_encoder_target_modules, - init_lora_weights=False, - use_dora=False, - ) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, _, _ = self.get_dummy_components() + # Verify `StableDiffusionLoraLoaderMixin.load_lora_into_text_encoder` handles different ranks per module (PR#8324). + text_lora_config = LoraConfig( + r=4, + rank_pattern={self.text_encoder_target_modules[i]: i + 1 for i in range(3)}, + lora_alpha=4, + target_modules=self.text_encoder_target_modules, + init_lora_weights=False, + use_dora=False, + ) + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) - state_dict = {} - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - # Gather the state dict for the PEFT model, excluding `layers.4`, to ensure `load_lora_into_text_encoder` - # supports missing layers (PR#8324). - state_dict = { - f"text_encoder.{module_name}": param - for module_name, param in get_peft_model_state_dict(pipe.text_encoder).items() - if "text_model.encoder.layers.4" not in module_name - } + state_dict = {} + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + # Gather the state dict for the PEFT model, excluding `layers.4`, to ensure `load_lora_into_text_encoder` + # supports missing layers (PR#8324). + state_dict = { + f"text_encoder.{module_name}": param + for module_name, param in get_peft_model_state_dict(pipe.text_encoder).items() + if "text_model.encoder.layers.4" not in module_name + } - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - state_dict.update( - { - f"text_encoder_2.{module_name}": param - for module_name, param in get_peft_model_state_dict(pipe.text_encoder_2).items() - if "text_model.encoder.layers.4" not in module_name - } - ) + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + state_dict.update( + { + f"text_encoder_2.{module_name}": param + for module_name, param in get_peft_model_state_dict(pipe.text_encoder_2).items() + if "text_model.encoder.layers.4" not in module_name + } + ) - output_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" - ) + output_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" + ) - # Unload lora and load it back using the pipe.load_lora_weights machinery - pipe.unload_lora_weights() - pipe.load_lora_weights(state_dict) + # Unload lora and load it back using the pipe.load_lora_weights machinery + pipe.unload_lora_weights() + pipe.load_lora_weights(state_dict) - output_partial_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - not np.allclose(output_partial_lora, output_lora, atol=1e-3, rtol=1e-3), - "Removing adapters should change the output", - ) + output_partial_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + not np.allclose(output_partial_lora, output_lora, atol=1e-3, rtol=1e-3), + "Removing adapters should change the output", + ) def test_simple_inference_save_pretrained_with_text_lora(self): """ Tests a simple usecase where users could use saving utilities for LoRA through save_pretrained """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) - images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config=None) + images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - with tempfile.TemporaryDirectory() as tmpdirname: - pipe.save_pretrained(tmpdirname) + with tempfile.TemporaryDirectory() as tmpdirname: + pipe.save_pretrained(tmpdirname) - pipe_from_pretrained = self.pipeline_class.from_pretrained(tmpdirname) - pipe_from_pretrained.to(torch_device) + pipe_from_pretrained = self.pipeline_class.from_pretrained(tmpdirname) + pipe_from_pretrained.to(torch_device) - if "text_encoder" in self.pipeline_class._lora_loadable_modules: + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + self.assertTrue( + check_if_lora_correctly_set(pipe_from_pretrained.text_encoder), + "Lora not correctly set in text encoder", + ) + + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: self.assertTrue( - check_if_lora_correctly_set(pipe_from_pretrained.text_encoder), - "Lora not correctly set in text encoder", + check_if_lora_correctly_set(pipe_from_pretrained.text_encoder_2), + "Lora not correctly set in text encoder 2", ) - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - self.assertTrue( - check_if_lora_correctly_set(pipe_from_pretrained.text_encoder_2), - "Lora not correctly set in text encoder 2", - ) + images_lora_save_pretrained = pipe_from_pretrained(**inputs, generator=torch.manual_seed(0))[0] - images_lora_save_pretrained = pipe_from_pretrained(**inputs, generator=torch.manual_seed(0))[0] - - self.assertTrue( - np.allclose(images_lora, images_lora_save_pretrained, atol=1e-3, rtol=1e-3), - "Loading from saved checkpoints should give same results.", - ) + self.assertTrue( + np.allclose(images_lora, images_lora_save_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results.", + ) def test_simple_inference_with_text_denoiser_lora_save_load(self): """ Tests a simple usecase where users could use saving utilities for LoRA for Unet + text encoder """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + images_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - with tempfile.TemporaryDirectory() as tmpdirname: - modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) - lora_state_dicts = self._get_lora_state_dicts(modules_to_save) - self.pipeline_class.save_lora_weights( - save_directory=tmpdirname, safe_serialization=False, **lora_state_dicts - ) - - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) - pipe.unload_lora_weights() - pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) - - for module_name, module in modules_to_save.items(): - self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") - - images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), - "Loading from saved checkpoints should give same results.", + with tempfile.TemporaryDirectory() as tmpdirname: + modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) + lora_state_dicts = self._get_lora_state_dicts(modules_to_save) + self.pipeline_class.save_lora_weights( + save_directory=tmpdirname, safe_serialization=False, **lora_state_dicts ) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.bin"))) + pipe.unload_lora_weights() + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.bin")) + + for module_name, module in modules_to_save.items(): + self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") + + images_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + np.allclose(images_lora, images_lora_from_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results.", + ) + def test_simple_inference_with_text_denoiser_lora_and_scale(self): """ Tests a simple inference with lora attached on the text encoder + Unet + scale argument and makes sure it works as expected """ attention_kwargs_name = determine_attention_kwargs_name(self.pipeline_class) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + output_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" + ) - output_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + attention_kwargs = {attention_kwargs_name: {"scale": 0.5}} + output_lora_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] + + self.assertTrue( + not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3), + "Lora + scale should change the output", + ) + + attention_kwargs = {attention_kwargs_name: {"scale": 0.0}} + output_lora_0_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] + + self.assertTrue( + np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3), + "Lora + 0 scale should lead to same result as no LoRA", + ) + + if "text_encoder" in self.pipeline_class._lora_loadable_modules: self.assertTrue( - not np.allclose(output_lora, output_no_lora, atol=1e-3, rtol=1e-3), "Lora should change the output" + pipe.text_encoder.text_model.encoder.layers[0].self_attn.q_proj.scaling["default"] == 1.0, + "The scaling parameter has not been correctly restored!", ) - attention_kwargs = {attention_kwargs_name: {"scale": 0.5}} - output_lora_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - - self.assertTrue( - not np.allclose(output_lora, output_lora_scale, atol=1e-3, rtol=1e-3), - "Lora + scale should change the output", - ) - - attention_kwargs = {attention_kwargs_name: {"scale": 0.0}} - output_lora_0_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - - self.assertTrue( - np.allclose(output_no_lora, output_lora_0_scale, atol=1e-3, rtol=1e-3), - "Lora + 0 scale should lead to same result as no LoRA", - ) - - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - self.assertTrue( - pipe.text_encoder.text_model.encoder.layers[0].self_attn.q_proj.scaling["default"] == 1.0, - "The scaling parameter has not been correctly restored!", - ) - def test_simple_inference_with_text_lora_denoiser_fused(self): """ Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model and makes sure it works as expected - with unet """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, denoiser = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + pipe, denoiser = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules) + pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules) - # Fusing should still keep the LoRA layers - if "text_encoder" in self.pipeline_class._lora_loadable_modules: + # Fusing should still keep the LoRA layers + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser") + + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" ) - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser") - - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) - - output_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output" - ) + output_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(output_fused, output_no_lora, atol=1e-3, rtol=1e-3), "Fused lora should change the output" + ) def test_simple_inference_with_text_denoiser_lora_unloaded(self): """ Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights and makes sure it works as expected """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) - pipe, denoiser = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + pipe, denoiser = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - pipe.unload_lora_weights() - # unloading should remove the LoRA layers - self.assertFalse( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder" - ) - self.assertFalse(check_if_lora_correctly_set(denoiser), "Lora not correctly unloaded in denoiser") + pipe.unload_lora_weights() + # unloading should remove the LoRA layers + self.assertFalse(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly unloaded in text encoder") + self.assertFalse(check_if_lora_correctly_set(denoiser), "Lora not correctly unloaded in denoiser") - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - self.assertFalse( - check_if_lora_correctly_set(pipe.text_encoder_2), - "Lora not correctly unloaded in text encoder 2", - ) + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + self.assertFalse( + check_if_lora_correctly_set(pipe.text_encoder_2), + "Lora not correctly unloaded in text encoder 2", + ) - output_unloaded = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose(output_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), - "Fused lora should change the output", - ) + output_unloaded = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + np.allclose(output_unloaded, output_no_lora, atol=1e-3, rtol=1e-3), + "Fused lora should change the output", + ) def test_simple_inference_with_text_denoiser_lora_unfused( self, expected_atol: float = 1e-3, expected_rtol: float = 1e-3 @@ -885,125 +855,120 @@ class PeftLoraLoaderMixinTests: Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights and makes sure it works as expected """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - pipe, denoiser = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + pipe, denoiser = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules) - self.assertTrue(pipe.num_fused_loras == 1, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") - output_fused_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules) + self.assertTrue(pipe.num_fused_loras == 1, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") + output_fused_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.unfuse_lora(components=self.pipeline_class._lora_loadable_modules) - self.assertTrue(pipe.num_fused_loras == 0, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") - output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe.unfuse_lora(components=self.pipeline_class._lora_loadable_modules) + self.assertTrue(pipe.num_fused_loras == 0, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") + output_unfused_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - # unloading should remove the LoRA layers - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers") + # unloading should remove the LoRA layers + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Unfuse should still keep LoRA layers") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Unfuse should still keep LoRA layers") - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers" - ) + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers" + ) - # Fuse and unfuse should lead to the same results - self.assertTrue( - np.allclose(output_fused_lora, output_unfused_lora, atol=expected_atol, rtol=expected_rtol), - "Fused lora should not change the output", - ) + # Fuse and unfuse should lead to the same results + self.assertTrue( + np.allclose(output_fused_lora, output_unfused_lora, atol=expected_atol, rtol=expected_rtol), + "Fused lora should not change the output", + ) def test_simple_inference_with_text_denoiser_multi_adapter(self): """ Tests a simple inference with lora attached to text encoder and unet, attaches multiple adapters and set them """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") + denoiser.add_adapter(denoiser_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" ) - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") - denoiser.add_adapter(denoiser_lora_config, "adapter-2") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + pipe.set_adapters("adapter-1") + output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(output_no_lora, output_adapter_1, atol=1e-3, rtol=1e-3), + "Adapter outputs should be different.", + ) - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + pipe.set_adapters("adapter-2") + output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(output_no_lora, output_adapter_2, atol=1e-3, rtol=1e-3), + "Adapter outputs should be different.", + ) - pipe.set_adapters("adapter-1") - output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_no_lora, output_adapter_1, atol=1e-3, rtol=1e-3), - "Adapter outputs should be different.", - ) + pipe.set_adapters(["adapter-1", "adapter-2"]) + output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(output_no_lora, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter outputs should be different.", + ) - pipe.set_adapters("adapter-2") - output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_no_lora, output_adapter_2, atol=1e-3, rtol=1e-3), - "Adapter outputs should be different.", - ) + # Fuse and unfuse should lead to the same results + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), + "Adapter 1 and 2 should give different results", + ) - pipe.set_adapters(["adapter-1", "adapter-2"]) - output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_no_lora, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter outputs should be different.", - ) + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 1 and mixed adapters should give different results", + ) - # Fuse and unfuse should lead to the same results - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), - "Adapter 1 and 2 should give different results", - ) + self.assertFalse( + np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 2 and mixed adapters should give different results", + ) - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 1 and mixed adapters should give different results", - ) + pipe.disable_lora() + output_disabled = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 2 and mixed adapters should give different results", - ) - - pipe.disable_lora() - output_disabled = pipe(**inputs, generator=torch.manual_seed(0))[0] - - self.assertTrue( - np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), - "output with no lora and output with lora disabled should give same results", - ) + self.assertTrue( + np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), + "output with no lora and output with lora disabled should give same results", + ) def test_wrong_adapter_name_raises_error(self): adapter_name = "adapter-1" - scheduler_cls = self.scheduler_classes[0] - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -1024,8 +989,7 @@ class PeftLoraLoaderMixinTests: def test_multiple_wrong_adapter_name_raises_error(self): adapter_name = "adapter-1" - scheduler_cls = self.scheduler_classes[0] - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -1054,131 +1018,127 @@ class PeftLoraLoaderMixinTests: Tests a simple inference with lora attached to text encoder and unet, attaches one adapter and set different weights for different blocks (i.e. block lora) """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config) - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config) + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) - weights_1 = {"text_encoder": 2, "unet": {"down": 5}} - pipe.set_adapters("adapter-1", weights_1) - output_weights_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + weights_1 = {"text_encoder": 2, "unet": {"down": 5}} + pipe.set_adapters("adapter-1", weights_1) + output_weights_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] - weights_2 = {"unet": {"up": 5}} - pipe.set_adapters("adapter-1", weights_2) - output_weights_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] + weights_2 = {"unet": {"up": 5}} + pipe.set_adapters("adapter-1", weights_2) + output_weights_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_weights_1, output_weights_2, atol=1e-3, rtol=1e-3), - "LoRA weights 1 and 2 should give different results", - ) - self.assertFalse( - np.allclose(output_no_lora, output_weights_1, atol=1e-3, rtol=1e-3), - "No adapter and LoRA weights 1 should give different results", - ) - self.assertFalse( - np.allclose(output_no_lora, output_weights_2, atol=1e-3, rtol=1e-3), - "No adapter and LoRA weights 2 should give different results", - ) + self.assertFalse( + np.allclose(output_weights_1, output_weights_2, atol=1e-3, rtol=1e-3), + "LoRA weights 1 and 2 should give different results", + ) + self.assertFalse( + np.allclose(output_no_lora, output_weights_1, atol=1e-3, rtol=1e-3), + "No adapter and LoRA weights 1 should give different results", + ) + self.assertFalse( + np.allclose(output_no_lora, output_weights_2, atol=1e-3, rtol=1e-3), + "No adapter and LoRA weights 2 should give different results", + ) - pipe.disable_lora() - output_disabled = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe.disable_lora() + output_disabled = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), - "output with no lora and output with lora disabled should give same results", - ) + self.assertTrue( + np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), + "output with no lora and output with lora disabled should give same results", + ) def test_simple_inference_with_text_denoiser_multi_adapter_block_lora(self): """ Tests a simple inference with lora attached to text encoder and unet, attaches multiple adapters and set different weights for different blocks (i.e. block lora) """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") + denoiser.add_adapter(denoiser_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" ) - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") - denoiser.add_adapter(denoiser_lora_config, "adapter-2") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + scales_1 = {"text_encoder": 2, "unet": {"down": 5}} + scales_2 = {"unet": {"down": 5, "mid": 5}} - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + pipe.set_adapters("adapter-1", scales_1) + output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] - scales_1 = {"text_encoder": 2, "unet": {"down": 5}} - scales_2 = {"unet": {"down": 5, "mid": 5}} + pipe.set_adapters("adapter-2", scales_2) + output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.set_adapters("adapter-1", scales_1) - output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1, scales_2]) + output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.set_adapters("adapter-2", scales_2) - output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] + # Fuse and unfuse should lead to the same results + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), + "Adapter 1 and 2 should give different results", + ) - pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1, scales_2]) - output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 1 and mixed adapters should give different results", + ) - # Fuse and unfuse should lead to the same results - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), - "Adapter 1 and 2 should give different results", - ) + self.assertFalse( + np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 2 and mixed adapters should give different results", + ) - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 1 and mixed adapters should give different results", - ) + pipe.disable_lora() + output_disabled = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 2 and mixed adapters should give different results", - ) + self.assertTrue( + np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), + "output with no lora and output with lora disabled should give same results", + ) - pipe.disable_lora() - output_disabled = pipe(**inputs, generator=torch.manual_seed(0))[0] - - self.assertTrue( - np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), - "output with no lora and output with lora disabled should give same results", - ) - - # a mismatching number of adapter_names and adapter_weights should raise an error - with self.assertRaises(ValueError): - pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1]) + # a mismatching number of adapter_names and adapter_weights should raise an error + with self.assertRaises(ValueError): + pipe.set_adapters(["adapter-1", "adapter-2"], [scales_1]) def test_simple_inference_with_text_denoiser_block_scale_for_all_dict_options(self): """Tests that any valid combination of lora block scales can be used in pipe.set_adapter""" @@ -1274,170 +1234,164 @@ class PeftLoraLoaderMixinTests: Tests a simple inference with lora attached to text encoder and unet, attaches multiple adapters and set/delete them """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") + denoiser.add_adapter(denoiser_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + + if self.has_two_text_encoders or self.has_three_text_encoders: + lora_loadable_components = self.pipeline_class._lora_loadable_modules + if "text_encoder_2" in lora_loadable_components: + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" ) - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") - denoiser.add_adapter(denoiser_lora_config, "adapter-2") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + pipe.set_adapters("adapter-1") + output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] - if self.has_two_text_encoders or self.has_three_text_encoders: - lora_loadable_components = self.pipeline_class._lora_loadable_modules - if "text_encoder_2" in lora_loadable_components: - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + pipe.set_adapters("adapter-2") + output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.set_adapters("adapter-1") - output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe.set_adapters(["adapter-1", "adapter-2"]) + output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.set_adapters("adapter-2") - output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), + "Adapter 1 and 2 should give different results", + ) - pipe.set_adapters(["adapter-1", "adapter-2"]) - output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 1 and mixed adapters should give different results", + ) - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), - "Adapter 1 and 2 should give different results", - ) + self.assertFalse( + np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 2 and mixed adapters should give different results", + ) - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 1 and mixed adapters should give different results", - ) + pipe.delete_adapters("adapter-1") + output_deleted_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 2 and mixed adapters should give different results", - ) + self.assertTrue( + np.allclose(output_deleted_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), + "Adapter 1 and 2 should give different results", + ) - pipe.delete_adapters("adapter-1") - output_deleted_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe.delete_adapters("adapter-2") + output_deleted_adapters = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose(output_deleted_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), - "Adapter 1 and 2 should give different results", - ) + self.assertTrue( + np.allclose(output_no_lora, output_deleted_adapters, atol=1e-3, rtol=1e-3), + "output with no lora and output with lora disabled should give same results", + ) - pipe.delete_adapters("adapter-2") - output_deleted_adapters = pipe(**inputs, generator=torch.manual_seed(0))[0] + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") - self.assertTrue( - np.allclose(output_no_lora, output_deleted_adapters, atol=1e-3, rtol=1e-3), - "output with no lora and output with lora disabled should give same results", - ) + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") + denoiser.add_adapter(denoiser_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + pipe.set_adapters(["adapter-1", "adapter-2"]) + pipe.delete_adapters(["adapter-1", "adapter-2"]) - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") - denoiser.add_adapter(denoiser_lora_config, "adapter-2") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + output_deleted_adapters = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.set_adapters(["adapter-1", "adapter-2"]) - pipe.delete_adapters(["adapter-1", "adapter-2"]) - - output_deleted_adapters = pipe(**inputs, generator=torch.manual_seed(0))[0] - - self.assertTrue( - np.allclose(output_no_lora, output_deleted_adapters, atol=1e-3, rtol=1e-3), - "output with no lora and output with lora disabled should give same results", - ) + self.assertTrue( + np.allclose(output_no_lora, output_deleted_adapters, atol=1e-3, rtol=1e-3), + "output with no lora and output with lora disabled should give same results", + ) def test_simple_inference_with_text_denoiser_multi_adapter_weighted(self): """ Tests a simple inference with lora attached to text encoder and unet, attaches multiple adapters and set them """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") + denoiser.add_adapter(denoiser_lora_config, "adapter-2") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + + if self.has_two_text_encoders or self.has_three_text_encoders: + lora_loadable_components = self.pipeline_class._lora_loadable_modules + if "text_encoder_2" in lora_loadable_components: + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" ) - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") - denoiser.add_adapter(denoiser_lora_config, "adapter-2") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + pipe.set_adapters("adapter-1") + output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] - if self.has_two_text_encoders or self.has_three_text_encoders: - lora_loadable_components = self.pipeline_class._lora_loadable_modules - if "text_encoder_2" in lora_loadable_components: - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + pipe.set_adapters("adapter-2") + output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.set_adapters("adapter-1") - output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe.set_adapters(["adapter-1", "adapter-2"]) + output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0))[0] - pipe.set_adapters("adapter-2") - output_adapter_2 = pipe(**inputs, generator=torch.manual_seed(0))[0] + # Fuse and unfuse should lead to the same results + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), + "Adapter 1 and 2 should give different results", + ) - pipe.set_adapters(["adapter-1", "adapter-2"]) - output_adapter_mixed = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse( + np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 1 and mixed adapters should give different results", + ) - # Fuse and unfuse should lead to the same results - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_2, atol=1e-3, rtol=1e-3), - "Adapter 1 and 2 should give different results", - ) + self.assertFalse( + np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Adapter 2 and mixed adapters should give different results", + ) - self.assertFalse( - np.allclose(output_adapter_1, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 1 and mixed adapters should give different results", - ) + pipe.set_adapters(["adapter-1", "adapter-2"], [0.5, 0.6]) + output_adapter_mixed_weighted = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_adapter_2, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Adapter 2 and mixed adapters should give different results", - ) + self.assertFalse( + np.allclose(output_adapter_mixed_weighted, output_adapter_mixed, atol=1e-3, rtol=1e-3), + "Weighted adapter and mixed adapter should give different results", + ) - pipe.set_adapters(["adapter-1", "adapter-2"], [0.5, 0.6]) - output_adapter_mixed_weighted = pipe(**inputs, generator=torch.manual_seed(0))[0] + pipe.disable_lora() + output_disabled = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_adapter_mixed_weighted, output_adapter_mixed, atol=1e-3, rtol=1e-3), - "Weighted adapter and mixed adapter should give different results", - ) - - pipe.disable_lora() - output_disabled = pipe(**inputs, generator=torch.manual_seed(0))[0] - - self.assertTrue( - np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), - "output with no lora and output with lora disabled should give same results", - ) + self.assertTrue( + np.allclose(output_no_lora, output_disabled, atol=1e-3, rtol=1e-3), + "output with no lora and output with lora disabled should give same results", + ) @skip_mps @pytest.mark.xfail( @@ -1446,163 +1400,156 @@ class PeftLoraLoaderMixinTests: strict=False, ) def test_lora_fuse_nan(self): - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + + # corrupt one LoRA weight with `inf` values + with torch.no_grad(): + if self.unet_kwargs: + pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_A["adapter-1"].weight += float( + "inf" ) + else: + named_modules = [name for name, _ in pipe.transformer.named_modules()] + possible_tower_names = [ + "transformer_blocks", + "blocks", + "joint_transformer_blocks", + "single_transformer_blocks", + ] + filtered_tower_names = [ + tower_name for tower_name in possible_tower_names if hasattr(pipe.transformer, tower_name) + ] + if len(filtered_tower_names) == 0: + reason = f"`pipe.transformer` didn't have any of the following attributes: {possible_tower_names}." + raise ValueError(reason) + for tower_name in filtered_tower_names: + transformer_tower = getattr(pipe.transformer, tower_name) + has_attn1 = any("attn1" in name for name in named_modules) + if has_attn1: + transformer_tower[0].attn1.to_q.lora_A["adapter-1"].weight += float("inf") + else: + transformer_tower[0].attn.to_q.lora_A["adapter-1"].weight += float("inf") - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + # with `safe_fusing=True` we should see an Error + with self.assertRaises(ValueError): + pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, safe_fusing=True) - # corrupt one LoRA weight with `inf` values - with torch.no_grad(): - if self.unet_kwargs: - pipe.unet.mid_block.attentions[0].transformer_blocks[0].attn1.to_q.lora_A[ - "adapter-1" - ].weight += float("inf") - else: - named_modules = [name for name, _ in pipe.transformer.named_modules()] - possible_tower_names = [ - "transformer_blocks", - "blocks", - "joint_transformer_blocks", - "single_transformer_blocks", - ] - filtered_tower_names = [ - tower_name for tower_name in possible_tower_names if hasattr(pipe.transformer, tower_name) - ] - if len(filtered_tower_names) == 0: - reason = ( - f"`pipe.transformer` didn't have any of the following attributes: {possible_tower_names}." - ) - raise ValueError(reason) - for tower_name in filtered_tower_names: - transformer_tower = getattr(pipe.transformer, tower_name) - has_attn1 = any("attn1" in name for name in named_modules) - if has_attn1: - transformer_tower[0].attn1.to_q.lora_A["adapter-1"].weight += float("inf") - else: - transformer_tower[0].attn.to_q.lora_A["adapter-1"].weight += float("inf") + # without we should not see an error, but every image will be black + pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, safe_fusing=False) + out = pipe(**inputs)[0] - # with `safe_fusing=True` we should see an Error - with self.assertRaises(ValueError): - pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, safe_fusing=True) - - # without we should not see an error, but every image will be black - pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, safe_fusing=False) - out = pipe(**inputs)[0] - - self.assertTrue(np.isnan(out).all()) + self.assertTrue(np.isnan(out).all()) def test_get_adapters(self): """ Tests a simple usecase where we attach multiple adapters and check if the results are the expected results """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") - adapter_names = pipe.get_active_adapters() - self.assertListEqual(adapter_names, ["adapter-1"]) + adapter_names = pipe.get_active_adapters() + self.assertListEqual(adapter_names, ["adapter-1"]) - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") - denoiser.add_adapter(denoiser_lora_config, "adapter-2") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + denoiser.add_adapter(denoiser_lora_config, "adapter-2") - adapter_names = pipe.get_active_adapters() - self.assertListEqual(adapter_names, ["adapter-2"]) + adapter_names = pipe.get_active_adapters() + self.assertListEqual(adapter_names, ["adapter-2"]) - pipe.set_adapters(["adapter-1", "adapter-2"]) - self.assertListEqual(pipe.get_active_adapters(), ["adapter-1", "adapter-2"]) + pipe.set_adapters(["adapter-1", "adapter-2"]) + self.assertListEqual(pipe.get_active_adapters(), ["adapter-1", "adapter-2"]) def test_get_list_adapters(self): """ Tests a simple usecase where we attach multiple adapters and check if the results are the expected results """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) - # 1. - dicts_to_be_checked = {} - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - dicts_to_be_checked = {"text_encoder": ["adapter-1"]} + # 1. + dicts_to_be_checked = {} + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + dicts_to_be_checked = {"text_encoder": ["adapter-1"]} - if self.unet_kwargs is not None: - pipe.unet.add_adapter(denoiser_lora_config, "adapter-1") - dicts_to_be_checked.update({"unet": ["adapter-1"]}) - else: - pipe.transformer.add_adapter(denoiser_lora_config, "adapter-1") - dicts_to_be_checked.update({"transformer": ["adapter-1"]}) + if self.unet_kwargs is not None: + pipe.unet.add_adapter(denoiser_lora_config, "adapter-1") + dicts_to_be_checked.update({"unet": ["adapter-1"]}) + else: + pipe.transformer.add_adapter(denoiser_lora_config, "adapter-1") + dicts_to_be_checked.update({"transformer": ["adapter-1"]}) - self.assertDictEqual(pipe.get_list_adapters(), dicts_to_be_checked) + self.assertDictEqual(pipe.get_list_adapters(), dicts_to_be_checked) - # 2. - dicts_to_be_checked = {} - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") - dicts_to_be_checked = {"text_encoder": ["adapter-1", "adapter-2"]} + # 2. + dicts_to_be_checked = {} + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + dicts_to_be_checked = {"text_encoder": ["adapter-1", "adapter-2"]} - if self.unet_kwargs is not None: - pipe.unet.add_adapter(denoiser_lora_config, "adapter-2") - dicts_to_be_checked.update({"unet": ["adapter-1", "adapter-2"]}) - else: - pipe.transformer.add_adapter(denoiser_lora_config, "adapter-2") - dicts_to_be_checked.update({"transformer": ["adapter-1", "adapter-2"]}) + if self.unet_kwargs is not None: + pipe.unet.add_adapter(denoiser_lora_config, "adapter-2") + dicts_to_be_checked.update({"unet": ["adapter-1", "adapter-2"]}) + else: + pipe.transformer.add_adapter(denoiser_lora_config, "adapter-2") + dicts_to_be_checked.update({"transformer": ["adapter-1", "adapter-2"]}) - self.assertDictEqual(pipe.get_list_adapters(), dicts_to_be_checked) + self.assertDictEqual(pipe.get_list_adapters(), dicts_to_be_checked) - # 3. - pipe.set_adapters(["adapter-1", "adapter-2"]) + # 3. + pipe.set_adapters(["adapter-1", "adapter-2"]) - dicts_to_be_checked = {} - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - dicts_to_be_checked = {"text_encoder": ["adapter-1", "adapter-2"]} + dicts_to_be_checked = {} + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + dicts_to_be_checked = {"text_encoder": ["adapter-1", "adapter-2"]} - if self.unet_kwargs is not None: - dicts_to_be_checked.update({"unet": ["adapter-1", "adapter-2"]}) - else: - dicts_to_be_checked.update({"transformer": ["adapter-1", "adapter-2"]}) + if self.unet_kwargs is not None: + dicts_to_be_checked.update({"unet": ["adapter-1", "adapter-2"]}) + else: + dicts_to_be_checked.update({"transformer": ["adapter-1", "adapter-2"]}) - self.assertDictEqual( - pipe.get_list_adapters(), - dicts_to_be_checked, - ) + self.assertDictEqual( + pipe.get_list_adapters(), + dicts_to_be_checked, + ) - # 4. - dicts_to_be_checked = {} - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - dicts_to_be_checked = {"text_encoder": ["adapter-1", "adapter-2"]} + # 4. + dicts_to_be_checked = {} + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + dicts_to_be_checked = {"text_encoder": ["adapter-1", "adapter-2"]} - if self.unet_kwargs is not None: - pipe.unet.add_adapter(denoiser_lora_config, "adapter-3") - dicts_to_be_checked.update({"unet": ["adapter-1", "adapter-2", "adapter-3"]}) - else: - pipe.transformer.add_adapter(denoiser_lora_config, "adapter-3") - dicts_to_be_checked.update({"transformer": ["adapter-1", "adapter-2", "adapter-3"]}) + if self.unet_kwargs is not None: + pipe.unet.add_adapter(denoiser_lora_config, "adapter-3") + dicts_to_be_checked.update({"unet": ["adapter-1", "adapter-2", "adapter-3"]}) + else: + pipe.transformer.add_adapter(denoiser_lora_config, "adapter-3") + dicts_to_be_checked.update({"transformer": ["adapter-1", "adapter-2", "adapter-3"]}) - self.assertDictEqual(pipe.get_list_adapters(), dicts_to_be_checked) + self.assertDictEqual(pipe.get_list_adapters(), dicts_to_be_checked) @require_peft_version_greater(peft_version="0.6.2") def test_simple_inference_with_text_lora_denoiser_fused_multi( @@ -1612,8 +1559,83 @@ class PeftLoraLoaderMixinTests: Tests a simple inference with lora attached into text encoder + fuses the lora weights into base model and makes sure it works as expected - with unet and multi-adapter case """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) + + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) + + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config, "adapter-1") + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + denoiser.add_adapter(denoiser_lora_config, "adapter-2") + + if self.has_two_text_encoders or self.has_three_text_encoders: + lora_loadable_components = self.pipeline_class._lora_loadable_modules + if "text_encoder_2" in lora_loadable_components: + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + ) + pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") + + # set them to multi-adapter inference mode + pipe.set_adapters(["adapter-1", "adapter-2"]) + outputs_all_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + + pipe.set_adapters(["adapter-1"]) + outputs_lora_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + + pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, adapter_names=["adapter-1"]) + self.assertTrue(pipe.num_fused_loras == 1, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") + + # Fusing should still keep the LoRA layers so output should remain the same + outputs_lora_1_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] + + self.assertTrue( + np.allclose(outputs_lora_1, outputs_lora_1_fused, atol=expected_atol, rtol=expected_rtol), + "Fused lora should not change the output", + ) + + pipe.unfuse_lora(components=self.pipeline_class._lora_loadable_modules) + self.assertTrue(pipe.num_fused_loras == 0, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") + + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers") + + self.assertTrue(check_if_lora_correctly_set(denoiser), "Unfuse should still keep LoRA layers") + + if self.has_two_text_encoders or self.has_three_text_encoders: + if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: + self.assertTrue( + check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers" + ) + + pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, adapter_names=["adapter-2", "adapter-1"]) + self.assertTrue(pipe.num_fused_loras == 2, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") + + # Fusing should still keep the LoRA layers + output_all_lora_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + np.allclose(output_all_lora_fused, outputs_all_lora, atol=expected_atol, rtol=expected_rtol), + "Fused lora should not change the output", + ) + pipe.unfuse_lora(components=self.pipeline_class._lora_loadable_modules) + self.assertTrue(pipe.num_fused_loras == 0, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") + + def test_lora_scale_kwargs_match_fusion(self, expected_atol: float = 1e-3, expected_rtol: float = 1e-3): + attention_kwargs_name = determine_attention_kwargs_name(self.pipeline_class) + + for lora_scale in [1.0, 0.8]: + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -1627,150 +1649,65 @@ class PeftLoraLoaderMixinTests: self.assertTrue( check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" ) - pipe.text_encoder.add_adapter(text_lora_config, "adapter-2") denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet denoiser.add_adapter(denoiser_lora_config, "adapter-1") self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") - denoiser.add_adapter(denoiser_lora_config, "adapter-2") if self.has_two_text_encoders or self.has_three_text_encoders: lora_loadable_components = self.pipeline_class._lora_loadable_modules if "text_encoder_2" in lora_loadable_components: pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" + check_if_lora_correctly_set(pipe.text_encoder_2), + "Lora not correctly set in text encoder 2", ) - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-2") - - # set them to multi-adapter inference mode - pipe.set_adapters(["adapter-1", "adapter-2"]) - outputs_all_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] pipe.set_adapters(["adapter-1"]) - outputs_lora_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + attention_kwargs = {attention_kwargs_name: {"scale": lora_scale}} + outputs_lora_1 = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, adapter_names=["adapter-1"]) + pipe.fuse_lora( + components=self.pipeline_class._lora_loadable_modules, + adapter_names=["adapter-1"], + lora_scale=lora_scale, + ) self.assertTrue(pipe.num_fused_loras == 1, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") - # Fusing should still keep the LoRA layers so output should remain the same outputs_lora_1_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] self.assertTrue( np.allclose(outputs_lora_1, outputs_lora_1_fused, atol=expected_atol, rtol=expected_rtol), "Fused lora should not change the output", ) - - pipe.unfuse_lora(components=self.pipeline_class._lora_loadable_modules) - self.assertTrue(pipe.num_fused_loras == 0, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") - - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Unfuse should still keep LoRA layers") - - self.assertTrue(check_if_lora_correctly_set(denoiser), "Unfuse should still keep LoRA layers") - - if self.has_two_text_encoders or self.has_three_text_encoders: - if "text_encoder_2" in self.pipeline_class._lora_loadable_modules: - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Unfuse should still keep LoRA layers" - ) - - pipe.fuse_lora( - components=self.pipeline_class._lora_loadable_modules, adapter_names=["adapter-2", "adapter-1"] + self.assertFalse( + np.allclose(output_no_lora, outputs_lora_1, atol=expected_atol, rtol=expected_rtol), + "LoRA should change the output", ) - self.assertTrue(pipe.num_fused_loras == 2, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") - - # Fusing should still keep the LoRA layers - output_all_lora_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue( - np.allclose(output_all_lora_fused, outputs_all_lora, atol=expected_atol, rtol=expected_rtol), - "Fused lora should not change the output", - ) - pipe.unfuse_lora(components=self.pipeline_class._lora_loadable_modules) - self.assertTrue(pipe.num_fused_loras == 0, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") - - def test_lora_scale_kwargs_match_fusion(self, expected_atol: float = 1e-3, expected_rtol: float = 1e-3): - attention_kwargs_name = determine_attention_kwargs_name(self.pipeline_class) - - for lora_scale in [1.0, 0.8]: - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) - - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) - - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config, "adapter-1") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" - ) - - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config, "adapter-1") - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") - - if self.has_two_text_encoders or self.has_three_text_encoders: - lora_loadable_components = self.pipeline_class._lora_loadable_modules - if "text_encoder_2" in lora_loadable_components: - pipe.text_encoder_2.add_adapter(text_lora_config, "adapter-1") - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), - "Lora not correctly set in text encoder 2", - ) - - pipe.set_adapters(["adapter-1"]) - attention_kwargs = {attention_kwargs_name: {"scale": lora_scale}} - outputs_lora_1 = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - - pipe.fuse_lora( - components=self.pipeline_class._lora_loadable_modules, - adapter_names=["adapter-1"], - lora_scale=lora_scale, - ) - self.assertTrue(pipe.num_fused_loras == 1, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}") - - outputs_lora_1_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] - - self.assertTrue( - np.allclose(outputs_lora_1, outputs_lora_1_fused, atol=expected_atol, rtol=expected_rtol), - "Fused lora should not change the output", - ) - self.assertFalse( - np.allclose(output_no_lora, outputs_lora_1, atol=expected_atol, rtol=expected_rtol), - "LoRA should change the output", - ) @require_peft_version_greater(peft_version="0.9.0") def test_simple_inference_with_dora(self): - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components( - scheduler_cls, use_dora=True - ) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components(use_dora=True) + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_dora_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_dora_lora.shape == self.output_shape) + output_no_dora_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_dora_lora.shape == self.output_shape) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - output_dora_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + output_dora_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse( - np.allclose(output_dora_lora, output_no_dora_lora, atol=1e-3, rtol=1e-3), - "DoRA lora should change the output", - ) + self.assertFalse( + np.allclose(output_dora_lora, output_no_dora_lora, atol=1e-3, rtol=1e-3), + "DoRA lora should change the output", + ) def test_missing_keys_warning(self): - scheduler_cls = self.scheduler_classes[0] # Skip text encoder check for now as that is handled with `transformers`. - components, _, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + components, _, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -1805,9 +1742,8 @@ class PeftLoraLoaderMixinTests: self.assertTrue(missing_key.replace(f"{component}.", "") in cap_logger.out.replace("default_0.", "")) def test_unexpected_keys_warning(self): - scheduler_cls = self.scheduler_classes[0] # Skip text encoder check for now as that is handled with `transformers`. - components, _, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + components, _, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -1842,23 +1778,22 @@ class PeftLoraLoaderMixinTests: Tests a simple inference with lora attached to text encoder and unet, then unloads the lora weights and makes sure it works as expected """ - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) - pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True) + pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) + pipe.text_encoder = torch.compile(pipe.text_encoder, mode="reduce-overhead", fullgraph=True) - if self.has_two_text_encoders or self.has_three_text_encoders: - pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True) + if self.has_two_text_encoders or self.has_three_text_encoders: + pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, mode="reduce-overhead", fullgraph=True) - # Just makes sure it works.. - _ = pipe(**inputs, generator=torch.manual_seed(0))[0] + # Just makes sure it works. + _ = pipe(**inputs, generator=torch.manual_seed(0))[0] def test_modify_padding_mode(self): def set_pad_mode(network, mode="circular"): @@ -1866,22 +1801,20 @@ class PeftLoraLoaderMixinTests: if isinstance(module, torch.nn.Conv2d): module.padding_mode = mode - for scheduler_cls in self.scheduler_classes: - components, _, _ = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _pad_mode = "circular" - set_pad_mode(pipe.vae, _pad_mode) - set_pad_mode(pipe.unet, _pad_mode) + components, _, _ = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _pad_mode = "circular" + set_pad_mode(pipe.vae, _pad_mode) + set_pad_mode(pipe.unet, _pad_mode) - _, _, inputs = self.get_dummy_inputs() - _ = pipe(**inputs)[0] + _, _, inputs = self.get_dummy_inputs() + _ = pipe(**inputs)[0] def test_logs_info_when_no_lora_keys_found(self): - scheduler_cls = self.scheduler_classes[0] # Skip text encoder check for now as that is handled with `transformers`. - components, _, _ = self.get_dummy_components(scheduler_cls) + components, _, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -1925,73 +1858,71 @@ class PeftLoraLoaderMixinTests: def test_set_adapters_match_attention_kwargs(self): """Test to check if outputs after `set_adapters()` and attention kwargs match.""" attention_kwargs_name = determine_attention_kwargs_name(self.pipeline_class) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(output_no_lora.shape == self.output_shape) + + pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) + + lora_scale = 0.5 + attention_kwargs = {attention_kwargs_name: {"scale": lora_scale}} + output_lora_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] + self.assertFalse( + np.allclose(output_no_lora, output_lora_scale, atol=1e-3, rtol=1e-3), + "Lora + scale should change the output", + ) + + pipe.set_adapters("default", lora_scale) + output_lora_scale_wo_kwargs = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue( + not np.allclose(output_no_lora, output_lora_scale_wo_kwargs, atol=1e-3, rtol=1e-3), + "Lora + scale should change the output", + ) + self.assertTrue( + np.allclose(output_lora_scale, output_lora_scale_wo_kwargs, atol=1e-3, rtol=1e-3), + "Lora + scale should match the output of `set_adapters()`.", + ) + + with tempfile.TemporaryDirectory() as tmpdirname: + modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) + lora_state_dicts = self._get_lora_state_dicts(modules_to_save) + self.pipeline_class.save_lora_weights( + save_directory=tmpdirname, safe_serialization=True, **lora_state_dicts + ) + + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(output_no_lora.shape == self.output_shape) + for module_name, module in modules_to_save.items(): + self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") - pipe, _ = self.add_adapters_to_pipeline(pipe, text_lora_config, denoiser_lora_config) - - lora_scale = 0.5 - attention_kwargs = {attention_kwargs_name: {"scale": lora_scale}} - output_lora_scale = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - self.assertFalse( - np.allclose(output_no_lora, output_lora_scale, atol=1e-3, rtol=1e-3), - "Lora + scale should change the output", - ) - - pipe.set_adapters("default", lora_scale) - output_lora_scale_wo_kwargs = pipe(**inputs, generator=torch.manual_seed(0))[0] + output_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] self.assertTrue( - not np.allclose(output_no_lora, output_lora_scale_wo_kwargs, atol=1e-3, rtol=1e-3), + not np.allclose(output_no_lora, output_lora_from_pretrained, atol=1e-3, rtol=1e-3), "Lora + scale should change the output", ) self.assertTrue( - np.allclose(output_lora_scale, output_lora_scale_wo_kwargs, atol=1e-3, rtol=1e-3), - "Lora + scale should match the output of `set_adapters()`.", + np.allclose(output_lora_scale, output_lora_from_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results as attention_kwargs.", + ) + self.assertTrue( + np.allclose(output_lora_scale_wo_kwargs, output_lora_from_pretrained, atol=1e-3, rtol=1e-3), + "Loading from saved checkpoints should give same results as set_adapters().", ) - - with tempfile.TemporaryDirectory() as tmpdirname: - modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) - lora_state_dicts = self._get_lora_state_dicts(modules_to_save) - self.pipeline_class.save_lora_weights( - save_directory=tmpdirname, safe_serialization=True, **lora_state_dicts - ) - - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - pipe.load_lora_weights(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors")) - - for module_name, module in modules_to_save.items(): - self.assertTrue(check_if_lora_correctly_set(module), f"Lora not correctly set in {module_name}") - - output_lora_from_pretrained = pipe(**inputs, generator=torch.manual_seed(0), **attention_kwargs)[0] - self.assertTrue( - not np.allclose(output_no_lora, output_lora_from_pretrained, atol=1e-3, rtol=1e-3), - "Lora + scale should change the output", - ) - self.assertTrue( - np.allclose(output_lora_scale, output_lora_from_pretrained, atol=1e-3, rtol=1e-3), - "Loading from saved checkpoints should give same results as attention_kwargs.", - ) - self.assertTrue( - np.allclose(output_lora_scale_wo_kwargs, output_lora_from_pretrained, atol=1e-3, rtol=1e-3), - "Loading from saved checkpoints should give same results as set_adapters().", - ) @require_peft_version_greater("0.13.2") def test_lora_B_bias(self): # Currently, this test is only relevant for Flux Control LoRA as we are not # aware of any other LoRA checkpoint that has its `lora_B` biases trained. - components, _, denoiser_lora_config = self.get_dummy_components(self.scheduler_classes[0]) + components, _, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -2028,7 +1959,7 @@ class PeftLoraLoaderMixinTests: self.assertFalse(np.allclose(lora_bias_false_output, lora_bias_true_output, atol=1e-3, rtol=1e-3)) def test_correct_lora_configs_with_different_ranks(self): - components, _, denoiser_lora_config = self.get_dummy_components(self.scheduler_classes[0]) + components, _, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -2114,7 +2045,7 @@ class PeftLoraLoaderMixinTests: self.assertEqual(submodule.bias.dtype, dtype_to_check) def initialize_pipeline(storage_dtype=None, compute_dtype=torch.float32): - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(self.scheduler_classes[0]) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device, dtype=compute_dtype) pipe.set_progress_bar_config(disable=None) @@ -2181,7 +2112,7 @@ class PeftLoraLoaderMixinTests: self.assertTrue(module._diffusers_hook.get_hook(_PEFT_AUTOCAST_DISABLE_HOOK) is not None) # 1. Test forward with add_adapter - components, _, denoiser_lora_config = self.get_dummy_components(self.scheduler_classes[0]) + components, _, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device, dtype=compute_dtype) pipe.set_progress_bar_config(disable=None) @@ -2211,7 +2142,7 @@ class PeftLoraLoaderMixinTests: ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - components, _, _ = self.get_dummy_components(self.scheduler_classes[0]) + components, _, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device, dtype=compute_dtype) pipe.set_progress_bar_config(disable=None) @@ -2231,10 +2162,7 @@ class PeftLoraLoaderMixinTests: @parameterized.expand([4, 8, 16]) def test_lora_adapter_metadata_is_loaded_correctly(self, lora_alpha): - scheduler_cls = self.scheduler_classes[0] - components, text_lora_config, denoiser_lora_config = self.get_dummy_components( - scheduler_cls, lora_alpha=lora_alpha - ) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components(lora_alpha=lora_alpha) pipe = self.pipeline_class(**components) pipe, _ = self.add_adapters_to_pipeline( @@ -2280,10 +2208,7 @@ class PeftLoraLoaderMixinTests: @parameterized.expand([4, 8, 16]) def test_lora_adapter_metadata_save_load_inference(self, lora_alpha): - scheduler_cls = self.scheduler_classes[0] - components, text_lora_config, denoiser_lora_config = self.get_dummy_components( - scheduler_cls, lora_alpha=lora_alpha - ) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components(lora_alpha=lora_alpha) pipe = self.pipeline_class(**components).to(torch_device) _, _, inputs = self.get_dummy_inputs(with_generator=False) @@ -2311,8 +2236,7 @@ class PeftLoraLoaderMixinTests: def test_lora_unload_add_adapter(self): """Tests if `unload_lora_weights()` -> `add_adapter()` works.""" - scheduler_cls = self.scheduler_classes[0] - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components).to(torch_device) _, _, inputs = self.get_dummy_inputs(with_generator=False) @@ -2330,51 +2254,48 @@ class PeftLoraLoaderMixinTests: def test_inference_load_delete_load_adapters(self): "Tests if `load_lora_weights()` -> `delete_adapters()` -> `load_lora_weights()` works." - for scheduler_cls in self.scheduler_classes: - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) - pipe = self.pipeline_class(**components) - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - _, _, inputs = self.get_dummy_inputs(with_generator=False) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe = pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + _, _, inputs = self.get_dummy_inputs(with_generator=False) - output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] + output_no_lora = pipe(**inputs, generator=torch.manual_seed(0))[0] - if "text_encoder" in self.pipeline_class._lora_loadable_modules: - pipe.text_encoder.add_adapter(text_lora_config) + if "text_encoder" in self.pipeline_class._lora_loadable_modules: + pipe.text_encoder.add_adapter(text_lora_config) + self.assertTrue(check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder") + + denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet + denoiser.add_adapter(denoiser_lora_config) + self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + + if self.has_two_text_encoders or self.has_three_text_encoders: + lora_loadable_components = self.pipeline_class._lora_loadable_modules + if "text_encoder_2" in lora_loadable_components: + pipe.text_encoder_2.add_adapter(text_lora_config) self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder), "Lora not correctly set in text encoder" + check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" ) - denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet - denoiser.add_adapter(denoiser_lora_config) - self.assertTrue(check_if_lora_correctly_set(denoiser), "Lora not correctly set in denoiser.") + output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] - if self.has_two_text_encoders or self.has_three_text_encoders: - lora_loadable_components = self.pipeline_class._lora_loadable_modules - if "text_encoder_2" in lora_loadable_components: - pipe.text_encoder_2.add_adapter(text_lora_config) - self.assertTrue( - check_if_lora_correctly_set(pipe.text_encoder_2), "Lora not correctly set in text encoder 2" - ) + with tempfile.TemporaryDirectory() as tmpdirname: + modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) + lora_state_dicts = self._get_lora_state_dicts(modules_to_save) + self.pipeline_class.save_lora_weights(save_directory=tmpdirname, **lora_state_dicts) + self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - output_adapter_1 = pipe(**inputs, generator=torch.manual_seed(0))[0] + # First, delete adapter and compare. + pipe.delete_adapters(pipe.get_active_adapters()[0]) + output_no_adapter = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertFalse(np.allclose(output_adapter_1, output_no_adapter, atol=1e-3, rtol=1e-3)) + self.assertTrue(np.allclose(output_no_lora, output_no_adapter, atol=1e-3, rtol=1e-3)) - with tempfile.TemporaryDirectory() as tmpdirname: - modules_to_save = self._get_modules_to_save(pipe, has_denoiser=True) - lora_state_dicts = self._get_lora_state_dicts(modules_to_save) - self.pipeline_class.save_lora_weights(save_directory=tmpdirname, **lora_state_dicts) - self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - - # First, delete adapter and compare. - pipe.delete_adapters(pipe.get_active_adapters()[0]) - output_no_adapter = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertFalse(np.allclose(output_adapter_1, output_no_adapter, atol=1e-3, rtol=1e-3)) - self.assertTrue(np.allclose(output_no_lora, output_no_adapter, atol=1e-3, rtol=1e-3)) - - # Then load adapter and compare. - pipe.load_lora_weights(tmpdirname) - output_lora_loaded = pipe(**inputs, generator=torch.manual_seed(0))[0] - self.assertTrue(np.allclose(output_adapter_1, output_lora_loaded, atol=1e-3, rtol=1e-3)) + # Then load adapter and compare. + pipe.load_lora_weights(tmpdirname) + output_lora_loaded = pipe(**inputs, generator=torch.manual_seed(0))[0] + self.assertTrue(np.allclose(output_adapter_1, output_lora_loaded, atol=1e-3, rtol=1e-3)) def _test_group_offloading_inference_denoiser(self, offload_type, use_stream): from diffusers.hooks.group_offloading import _get_top_level_group_offload_hook @@ -2382,7 +2303,7 @@ class PeftLoraLoaderMixinTests: onload_device = torch_device offload_device = torch.device("cpu") - components, text_lora_config, denoiser_lora_config = self.get_dummy_components(self.scheduler_classes[0]) + components, text_lora_config, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -2399,7 +2320,7 @@ class PeftLoraLoaderMixinTests: ) self.assertTrue(os.path.isfile(os.path.join(tmpdirname, "pytorch_lora_weights.safetensors"))) - components, _, _ = self.get_dummy_components(self.scheduler_classes[0]) + components, _, _ = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe.set_progress_bar_config(disable=None) denoiser = pipe.transformer if self.unet_kwargs is None else pipe.unet @@ -2451,7 +2372,7 @@ class PeftLoraLoaderMixinTests: @require_torch_accelerator def test_lora_loading_model_cpu_offload(self): - components, _, denoiser_lora_config = self.get_dummy_components(self.scheduler_classes[0]) + components, _, denoiser_lora_config = self.get_dummy_components() _, _, inputs = self.get_dummy_inputs(with_generator=False) pipe = self.pipeline_class(**components) pipe = pipe.to(torch_device) @@ -2470,7 +2391,7 @@ class PeftLoraLoaderMixinTests: save_directory=tmpdirname, safe_serialization=True, **lora_state_dicts ) # reinitialize the pipeline to mimic the inference workflow. - components, _, denoiser_lora_config = self.get_dummy_components(self.scheduler_classes[0]) + components, _, denoiser_lora_config = self.get_dummy_components() pipe = self.pipeline_class(**components) pipe.enable_model_cpu_offload(device=torch_device) pipe.load_lora_weights(tmpdirname) From 7c54a7b38a9ce42db7c6d8ec86ec656cac9ee216 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 24 Sep 2025 05:23:41 +0200 Subject: [PATCH 5/6] Fix Custom Code loading (#12378) * update * update * update --- src/diffusers/models/auto_model.py | 2 +- src/diffusers/modular_pipelines/modular_pipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/models/auto_model.py b/src/diffusers/models/auto_model.py index ada0d54e54..47f3a992b3 100644 --- a/src/diffusers/models/auto_model.py +++ b/src/diffusers/models/auto_model.py @@ -194,7 +194,7 @@ class AutoModel(ConfigMixin): has_remote_code = "auto_map" in config and cls.__name__ in config["auto_map"] trust_remote_code = resolve_trust_remote_code(trust_remote_code, pretrained_model_or_path, has_remote_code) - if not (has_remote_code and trust_remote_code): + if not has_remote_code and trust_remote_code: raise ValueError( "Selected model repository does not happear to have any custom code or does not have a valid `config.json` file." ) diff --git a/src/diffusers/modular_pipelines/modular_pipeline.py b/src/diffusers/modular_pipelines/modular_pipeline.py index 78226a49b1..74ffc62348 100644 --- a/src/diffusers/modular_pipelines/modular_pipeline.py +++ b/src/diffusers/modular_pipelines/modular_pipeline.py @@ -323,7 +323,7 @@ class ModularPipelineBlocks(ConfigMixin, PushToHubMixin): trust_remote_code = resolve_trust_remote_code( trust_remote_code, pretrained_model_name_or_path, has_remote_code ) - if not (has_remote_code and trust_remote_code): + if not has_remote_code and trust_remote_code: raise ValueError( "Selected model repository does not happear to have any custom code or does not have a valid `config.json` file." ) From 9ef118509e1a2682bb6870533cc3552726284167 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Wed, 24 Sep 2025 09:02:25 +0530 Subject: [PATCH 6/6] [tests] disable xformer tests for pipelines it isn't popular. (#12277) disable xformer tests for pipelines it isn't popular. --- tests/pipelines/easyanimate/test_easyanimate.py | 1 + tests/pipelines/hidream_image/test_pipeline_hidream.py | 2 +- tests/pipelines/omnigen/test_pipeline_omnigen.py | 2 +- tests/pipelines/qwenimage/test_qwenimage_controlnet.py | 3 +-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/pipelines/easyanimate/test_easyanimate.py b/tests/pipelines/easyanimate/test_easyanimate.py index 2dbb8639f1..5cb2a232bb 100644 --- a/tests/pipelines/easyanimate/test_easyanimate.py +++ b/tests/pipelines/easyanimate/test_easyanimate.py @@ -48,6 +48,7 @@ class EasyAnimatePipelineFastTests(PipelineTesterMixin, unittest.TestCase): batch_params = TEXT_TO_IMAGE_BATCH_PARAMS image_params = TEXT_TO_IMAGE_IMAGE_PARAMS image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS + test_xformers_attention = False required_optional_params = frozenset( [ "num_inference_steps", diff --git a/tests/pipelines/hidream_image/test_pipeline_hidream.py b/tests/pipelines/hidream_image/test_pipeline_hidream.py index ec8d36e1d3..ddf39ba4c1 100644 --- a/tests/pipelines/hidream_image/test_pipeline_hidream.py +++ b/tests/pipelines/hidream_image/test_pipeline_hidream.py @@ -47,8 +47,8 @@ class HiDreamImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase): batch_params = TEXT_TO_IMAGE_BATCH_PARAMS image_params = TEXT_TO_IMAGE_IMAGE_PARAMS image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS - required_optional_params = PipelineTesterMixin.required_optional_params + test_xformers_attention = False test_layerwise_casting = True supports_dduf = False diff --git a/tests/pipelines/omnigen/test_pipeline_omnigen.py b/tests/pipelines/omnigen/test_pipeline_omnigen.py index 28648aa76f..1a758b7050 100644 --- a/tests/pipelines/omnigen/test_pipeline_omnigen.py +++ b/tests/pipelines/omnigen/test_pipeline_omnigen.py @@ -22,7 +22,7 @@ class OmniGenPipelineFastTests(unittest.TestCase, PipelineTesterMixin): pipeline_class = OmniGenPipeline params = frozenset(["prompt", "guidance_scale"]) batch_params = frozenset(["prompt"]) - + test_xformers_attention = False test_layerwise_casting = True def get_dummy_components(self): diff --git a/tests/pipelines/qwenimage/test_qwenimage_controlnet.py b/tests/pipelines/qwenimage/test_qwenimage_controlnet.py index c78e5cb233..188106b49b 100644 --- a/tests/pipelines/qwenimage/test_qwenimage_controlnet.py +++ b/tests/pipelines/qwenimage/test_qwenimage_controlnet.py @@ -44,7 +44,6 @@ class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase): batch_params = frozenset(["prompt", "negative_prompt", "control_image"]) image_params = frozenset(["control_image"]) image_latents_params = frozenset(["latents"]) - required_optional_params = frozenset( [ "num_inference_steps", @@ -59,7 +58,7 @@ class QwenControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ) supports_dduf = False - test_xformers_attention = True + test_xformers_attention = False test_layerwise_casting = True test_group_offloading = True