[LoRA] feat: support loading loras into 4bit quantized Flux models. (#10578)

* feat: support loading loras into 4bit quantized models. * updates * update * remove weight check.
2026-01-27 17:22:53 +03:00 · 2025-01-15 12:40:40 +05:30
parent a663a67ea2
commit 263b973466
4 changed files with 71 additions and 4 deletions
--- a/tests/quantization/bnb/test_4bit.py
+++ b/tests/quantization/bnb/test_4bit.py
@@ -20,6 +20,7 @@ import unittest
 import numpy as np
 import pytest
 import safetensors.torch
+from huggingface_hub import hf_hub_download

 from diffusers import BitsAndBytesConfig, DiffusionPipeline, FluxTransformer2DModel, SD3Transformer2DModel
 from diffusers.utils import is_accelerate_version, logging
@@ -568,6 +569,27 @@ class SlowBnb4BitFluxTests(Base4bitTests):
        max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
        self.assertTrue(max_diff < 1e-3)

+    def test_lora_loading(self):
+        self.pipeline_4bit.load_lora_weights(
+            hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), adapter_name="hyper-sd"
+        )
+        self.pipeline_4bit.set_adapters("hyper-sd", adapter_weights=0.125)
+
+        output = self.pipeline_4bit(
+            prompt=self.prompt,
+            height=256,
+            width=256,
+            max_sequence_length=64,
+            output_type="np",
+            num_inference_steps=8,
+            generator=torch.Generator().manual_seed(42),
+        ).images
+        out_slice = output[0, -3:, -3:, -1].flatten()
+        expected_slice = np.array([0.5347, 0.5342, 0.5283, 0.5093, 0.4988, 0.5093, 0.5044, 0.5015, 0.4946])
+
+        max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
+        self.assertTrue(max_diff < 1e-3)
+

@slow
 class BaseBnb4BitSerializationTests(Base4bitTests):