From 58519283e7fa143d3ae2bc086fcf53264cf2ece3 Mon Sep 17 00:00:00 2001 From: Yuqian Hong Date: Mon, 15 Dec 2025 18:22:42 +0800 Subject: [PATCH] Support for control-lora (#10686) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * run control-lora on diffusers * cannot load lora adapter * test * 1 * add control-lora * 1 * 1 * 1 * fix PeftAdapterMixin * fix module_to_save bug * delete json print * resolve conflits * merged but bug * change peft.py * 1 * delete state_dict print * fix alpha * Create control_lora.py * Add files via upload * rename * no need modify as peft updated * add doc * fix code style * styling isn't that hard 😉 * empty --------- Co-authored-by: Sayak Paul --- docs/source/en/api/models/controlnet.md | 15 ++ .../research_projects/control_lora/README.md | 41 ++++ .../control_lora/control_lora.py | 58 ++++++ src/diffusers/loaders/peft.py | 16 ++ .../models/controlnets/controlnet.py | 3 +- src/diffusers/utils/__init__.py | 1 + src/diffusers/utils/state_dict_utils.py | 179 ++++++++++++++++++ 7 files changed, 312 insertions(+), 1 deletion(-) create mode 100644 examples/research_projects/control_lora/README.md create mode 100644 examples/research_projects/control_lora/control_lora.py diff --git a/docs/source/en/api/models/controlnet.md b/docs/source/en/api/models/controlnet.md index f56b7383a0..0821d63fd1 100644 --- a/docs/source/en/api/models/controlnet.md +++ b/docs/source/en/api/models/controlnet.md @@ -33,6 +33,21 @@ url = "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/blob/m pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=controlnet) ``` +## Loading from Control LoRA + +Control-LoRA is introduced by Stability AI in [stabilityai/control-lora](https://huggingface.co/stabilityai/control-lora) by adding low-rank parameter efficient fine tuning to ControlNet. This approach offers a more efficient and compact method to bring model control to a wider variety of consumer GPUs. + +```py +from diffusers import ControlNetModel, UNet2DConditionModel + +lora_id = "stabilityai/control-lora" +lora_filename = "control-LoRAs-rank128/control-lora-canny-rank128.safetensors" + +unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", torch_dtype=torch.bfloat16).to("cuda") +controlnet = ControlNetModel.from_unet(unet).to(device="cuda", dtype=torch.bfloat16) +controlnet.load_lora_adapter(lora_id, weight_name=lora_filename, prefix=None, controlnet_config=controlnet.config) +``` + ## ControlNetModel [[autodoc]] ControlNetModel diff --git a/examples/research_projects/control_lora/README.md b/examples/research_projects/control_lora/README.md new file mode 100644 index 0000000000..49aa848e3e --- /dev/null +++ b/examples/research_projects/control_lora/README.md @@ -0,0 +1,41 @@ +# Control-LoRA inference example + +Control-LoRA is introduced by Stability AI in [stabilityai/control-lora](https://huggingface.co/stabilityai/control-lora) by adding low-rank parameter efficient fine tuning to ControlNet. This approach offers a more efficient and compact method to bring model control to a wider variety of consumer GPUs. + +## Installing the dependencies + +Before running the scripts, make sure to install the library's training dependencies: + +**Important** + +To make sure you can successfully run the latest versions of the example scripts, we highly recommend **installing from source** and keeping the install up to date as we update the example scripts frequently and install some example-specific requirements. To do this, execute the following steps in a new virtual environment: +```bash +git clone https://github.com/huggingface/diffusers +cd diffusers +pip install . +``` + +Then cd in the example folder and run +```bash +pip install -r requirements.txt +``` + +And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with: + +```bash +accelerate config +``` + +## Inference on SDXL + +[stabilityai/control-lora](https://huggingface.co/stabilityai/control-lora) provides a set of Control-LoRA weights for SDXL. Here we use the `canny` condition to generate an image from a text prompt and a reference image. + +```bash +python control_lora.py +``` + +## Acknowledgements + +- [stabilityai/control-lora](https://huggingface.co/stabilityai/control-lora) +- [comfyanonymous/ControlNet-v1-1_fp16_safetensors](https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors) +- [HighCWu/control-lora-v2](https://github.com/HighCWu/control-lora-v2) \ No newline at end of file diff --git a/examples/research_projects/control_lora/control_lora.py b/examples/research_projects/control_lora/control_lora.py new file mode 100644 index 0000000000..a0ad1981c7 --- /dev/null +++ b/examples/research_projects/control_lora/control_lora.py @@ -0,0 +1,58 @@ +import cv2 +import numpy as np +import torch +from PIL import Image + +from diffusers import ( + AutoencoderKL, + ControlNetModel, + StableDiffusionXLControlNetPipeline, + UNet2DConditionModel, +) +from diffusers.utils import load_image, make_image_grid + + +pipe_id = "stabilityai/stable-diffusion-xl-base-1.0" +lora_id = "stabilityai/control-lora" +lora_filename = "control-LoRAs-rank128/control-lora-canny-rank128.safetensors" + +unet = UNet2DConditionModel.from_pretrained(pipe_id, subfolder="unet", torch_dtype=torch.bfloat16).to("cuda") +controlnet = ControlNetModel.from_unet(unet).to(device="cuda", dtype=torch.bfloat16) +controlnet.load_lora_adapter(lora_id, weight_name=lora_filename, prefix=None, controlnet_config=controlnet.config) + +prompt = "aerial view, a futuristic research complex in a bright foggy jungle, hard lighting" +negative_prompt = "low quality, bad quality, sketches" + +image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png" +) + +controlnet_conditioning_scale = 1.0 # recommended for good generalization + +vae = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", torch_dtype=torch.bfloat16) +pipe = StableDiffusionXLControlNetPipeline.from_pretrained( + pipe_id, + unet=unet, + controlnet=controlnet, + vae=vae, + torch_dtype=torch.bfloat16, + safety_checker=None, +).to("cuda") + +image = np.array(image) +image = cv2.Canny(image, 100, 200) +image = image[:, :, None] +image = np.concatenate([image, image, image], axis=2) +image = Image.fromarray(image) + +images = pipe( + prompt, + negative_prompt=negative_prompt, + image=image, + controlnet_conditioning_scale=controlnet_conditioning_scale, + num_images_per_prompt=4, +).images + +final_image = [image] + images +grid = make_image_grid(final_image, 1, 5) +grid.save("hf-logo_canny.png") diff --git a/src/diffusers/loaders/peft.py b/src/diffusers/loaders/peft.py index 3f8519bbfa..30a78f00b3 100644 --- a/src/diffusers/loaders/peft.py +++ b/src/diffusers/loaders/peft.py @@ -27,6 +27,7 @@ from ..utils import ( MIN_PEFT_VERSION, USE_PEFT_BACKEND, check_peft_version, + convert_sai_sd_control_lora_state_dict_to_peft, convert_unet_state_dict_to_peft, delete_adapter_layers, get_adapter_name, @@ -232,6 +233,13 @@ class PeftAdapterMixin: if "lora_A" not in first_key: state_dict = convert_unet_state_dict_to_peft(state_dict) + # Control LoRA from SAI is different from BFL Control LoRA + # https://huggingface.co/stabilityai/control-lora + # https://huggingface.co/comfyanonymous/ControlNet-v1-1_fp16_safetensors + is_sai_sd_control_lora = "lora_controlnet" in state_dict + if is_sai_sd_control_lora: + state_dict = convert_sai_sd_control_lora_state_dict_to_peft(state_dict) + rank = {} for key, val in state_dict.items(): # Cannot figure out rank from lora layers that don't have at least 2 dimensions. @@ -263,6 +271,14 @@ class PeftAdapterMixin: adapter_name=adapter_name, ) + # Adjust LoRA config for Control LoRA + if is_sai_sd_control_lora: + lora_config.lora_alpha = lora_config.r + lora_config.alpha_pattern = lora_config.rank_pattern + lora_config.bias = "all" + lora_config.modules_to_save = lora_config.exclude_modules + lora_config.exclude_modules = None + #