mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-29 07:22:12 +03:00
* enable gguf test cases on XPU Signed-off-by: YAO Matrix <matrix.yao@intel.com> * make SD35LargeGGUFSingleFileTests::test_pipeline_inference pas Signed-off-by: root <root@a4bf01945cfe.jf.intel.com> * make FluxControlLoRAGGUFTests::test_lora_loading pass Signed-off-by: Yao Matrix <matrix.yao@intel.com> * polish code Signed-off-by: Yao Matrix <matrix.yao@intel.com> * Apply style fixes --------- Signed-off-by: YAO Matrix <matrix.yao@intel.com> Signed-off-by: root <root@a4bf01945cfe.jf.intel.com> Signed-off-by: Yao Matrix <matrix.yao@intel.com> Co-authored-by: root <root@a4bf01945cfe.jf.intel.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
552 lines
20 KiB
Python
552 lines
20 KiB
Python
import gc
|
|
import unittest
|
|
|
|
import numpy as np
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
from diffusers import (
|
|
AuraFlowPipeline,
|
|
AuraFlowTransformer2DModel,
|
|
FluxControlPipeline,
|
|
FluxPipeline,
|
|
FluxTransformer2DModel,
|
|
GGUFQuantizationConfig,
|
|
SD3Transformer2DModel,
|
|
StableDiffusion3Pipeline,
|
|
)
|
|
from diffusers.utils import load_image
|
|
from diffusers.utils.testing_utils import (
|
|
Expectations,
|
|
backend_empty_cache,
|
|
backend_max_memory_allocated,
|
|
backend_reset_peak_memory_stats,
|
|
enable_full_determinism,
|
|
is_gguf_available,
|
|
nightly,
|
|
numpy_cosine_similarity_distance,
|
|
require_accelerate,
|
|
require_big_accelerator,
|
|
require_gguf_version_greater_or_equal,
|
|
require_peft_backend,
|
|
torch_device,
|
|
)
|
|
|
|
|
|
if is_gguf_available():
|
|
from diffusers.quantizers.gguf.utils import GGUFLinear, GGUFParameter
|
|
|
|
enable_full_determinism()
|
|
|
|
|
|
@nightly
|
|
@require_big_accelerator
|
|
@require_accelerate
|
|
@require_gguf_version_greater_or_equal("0.10.0")
|
|
class GGUFSingleFileTesterMixin:
|
|
ckpt_path = None
|
|
model_cls = None
|
|
torch_dtype = torch.bfloat16
|
|
expected_memory_use_in_gb = 5
|
|
|
|
def test_gguf_parameters(self):
|
|
quant_storage_type = torch.uint8
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config)
|
|
|
|
for param_name, param in model.named_parameters():
|
|
if isinstance(param, GGUFParameter):
|
|
assert hasattr(param, "quant_type")
|
|
assert param.dtype == quant_storage_type
|
|
|
|
def test_gguf_linear_layers(self):
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config)
|
|
|
|
for name, module in model.named_modules():
|
|
if isinstance(module, torch.nn.Linear) and hasattr(module.weight, "quant_type"):
|
|
assert module.weight.dtype == torch.uint8
|
|
if module.bias is not None:
|
|
assert module.bias.dtype == self.torch_dtype
|
|
|
|
def test_gguf_memory_usage(self):
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
|
|
model = self.model_cls.from_single_file(
|
|
self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype
|
|
)
|
|
model.to(torch_device)
|
|
assert (model.get_memory_footprint() / 1024**3) < self.expected_memory_use_in_gb
|
|
inputs = self.get_dummy_inputs()
|
|
|
|
backend_reset_peak_memory_stats(torch_device)
|
|
backend_empty_cache(torch_device)
|
|
with torch.no_grad():
|
|
model(**inputs)
|
|
max_memory = backend_max_memory_allocated(torch_device)
|
|
assert (max_memory / 1024**3) < self.expected_memory_use_in_gb
|
|
|
|
def test_keep_modules_in_fp32(self):
|
|
r"""
|
|
A simple tests to check if the modules under `_keep_in_fp32_modules` are kept in fp32.
|
|
Also ensures if inference works.
|
|
"""
|
|
_keep_in_fp32_modules = self.model_cls._keep_in_fp32_modules
|
|
self.model_cls._keep_in_fp32_modules = ["proj_out"]
|
|
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config)
|
|
|
|
for name, module in model.named_modules():
|
|
if isinstance(module, torch.nn.Linear):
|
|
if name in model._keep_in_fp32_modules:
|
|
assert module.weight.dtype == torch.float32
|
|
self.model_cls._keep_in_fp32_modules = _keep_in_fp32_modules
|
|
|
|
def test_dtype_assignment(self):
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config)
|
|
|
|
with self.assertRaises(ValueError):
|
|
# Tries with a `dtype`
|
|
model.to(torch.float16)
|
|
|
|
with self.assertRaises(ValueError):
|
|
# Tries with a `device` and `dtype`
|
|
device_0 = f"{torch_device}:0"
|
|
model.to(device=device_0, dtype=torch.float16)
|
|
|
|
with self.assertRaises(ValueError):
|
|
# Tries with a cast
|
|
model.float()
|
|
|
|
with self.assertRaises(ValueError):
|
|
# Tries with a cast
|
|
model.half()
|
|
|
|
# This should work
|
|
model.to(torch_device)
|
|
|
|
def test_dequantize_model(self):
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
model = self.model_cls.from_single_file(self.ckpt_path, quantization_config=quantization_config)
|
|
model.dequantize()
|
|
|
|
def _check_for_gguf_linear(model):
|
|
has_children = list(model.children())
|
|
if not has_children:
|
|
return
|
|
|
|
for name, module in model.named_children():
|
|
if isinstance(module, nn.Linear):
|
|
assert not isinstance(module, GGUFLinear), f"{name} is still GGUFLinear"
|
|
assert not isinstance(module.weight, GGUFParameter), f"{name} weight is still GGUFParameter"
|
|
|
|
for name, module in model.named_children():
|
|
_check_for_gguf_linear(module)
|
|
|
|
|
|
class FluxGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase):
|
|
ckpt_path = "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q2_K.gguf"
|
|
torch_dtype = torch.bfloat16
|
|
model_cls = FluxTransformer2DModel
|
|
expected_memory_use_in_gb = 5
|
|
|
|
def setUp(self):
|
|
gc.collect()
|
|
backend_empty_cache(torch_device)
|
|
|
|
def tearDown(self):
|
|
gc.collect()
|
|
backend_empty_cache(torch_device)
|
|
|
|
def get_dummy_inputs(self):
|
|
return {
|
|
"hidden_states": torch.randn((1, 4096, 64), generator=torch.Generator("cpu").manual_seed(0)).to(
|
|
torch_device, self.torch_dtype
|
|
),
|
|
"encoder_hidden_states": torch.randn(
|
|
(1, 512, 4096),
|
|
generator=torch.Generator("cpu").manual_seed(0),
|
|
).to(torch_device, self.torch_dtype),
|
|
"pooled_projections": torch.randn(
|
|
(1, 768),
|
|
generator=torch.Generator("cpu").manual_seed(0),
|
|
).to(torch_device, self.torch_dtype),
|
|
"timestep": torch.tensor([1]).to(torch_device, self.torch_dtype),
|
|
"img_ids": torch.randn((4096, 3), generator=torch.Generator("cpu").manual_seed(0)).to(
|
|
torch_device, self.torch_dtype
|
|
),
|
|
"txt_ids": torch.randn((512, 3), generator=torch.Generator("cpu").manual_seed(0)).to(
|
|
torch_device, self.torch_dtype
|
|
),
|
|
"guidance": torch.tensor([3.5]).to(torch_device, self.torch_dtype),
|
|
}
|
|
|
|
def test_pipeline_inference(self):
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
transformer = self.model_cls.from_single_file(
|
|
self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype
|
|
)
|
|
pipe = FluxPipeline.from_pretrained(
|
|
"black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=self.torch_dtype
|
|
)
|
|
pipe.enable_model_cpu_offload()
|
|
|
|
prompt = "a cat holding a sign that says hello"
|
|
output = pipe(
|
|
prompt=prompt, num_inference_steps=2, generator=torch.Generator("cpu").manual_seed(0), output_type="np"
|
|
).images[0]
|
|
output_slice = output[:3, :3, :].flatten()
|
|
expected_slice = np.array(
|
|
[
|
|
0.47265625,
|
|
0.43359375,
|
|
0.359375,
|
|
0.47070312,
|
|
0.421875,
|
|
0.34375,
|
|
0.46875,
|
|
0.421875,
|
|
0.34765625,
|
|
0.46484375,
|
|
0.421875,
|
|
0.34179688,
|
|
0.47070312,
|
|
0.42578125,
|
|
0.34570312,
|
|
0.46875,
|
|
0.42578125,
|
|
0.3515625,
|
|
0.45507812,
|
|
0.4140625,
|
|
0.33984375,
|
|
0.4609375,
|
|
0.41796875,
|
|
0.34375,
|
|
0.45898438,
|
|
0.41796875,
|
|
0.34375,
|
|
]
|
|
)
|
|
max_diff = numpy_cosine_similarity_distance(expected_slice, output_slice)
|
|
assert max_diff < 1e-4
|
|
|
|
|
|
class SD35LargeGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase):
|
|
ckpt_path = "https://huggingface.co/city96/stable-diffusion-3.5-large-gguf/blob/main/sd3.5_large-Q4_0.gguf"
|
|
torch_dtype = torch.bfloat16
|
|
model_cls = SD3Transformer2DModel
|
|
expected_memory_use_in_gb = 5
|
|
|
|
def setUp(self):
|
|
gc.collect()
|
|
backend_empty_cache(torch_device)
|
|
|
|
def tearDown(self):
|
|
gc.collect()
|
|
backend_empty_cache(torch_device)
|
|
|
|
def get_dummy_inputs(self):
|
|
return {
|
|
"hidden_states": torch.randn((1, 16, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to(
|
|
torch_device, self.torch_dtype
|
|
),
|
|
"encoder_hidden_states": torch.randn(
|
|
(1, 512, 4096),
|
|
generator=torch.Generator("cpu").manual_seed(0),
|
|
).to(torch_device, self.torch_dtype),
|
|
"pooled_projections": torch.randn(
|
|
(1, 2048),
|
|
generator=torch.Generator("cpu").manual_seed(0),
|
|
).to(torch_device, self.torch_dtype),
|
|
"timestep": torch.tensor([1]).to(torch_device, self.torch_dtype),
|
|
}
|
|
|
|
def test_pipeline_inference(self):
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
transformer = self.model_cls.from_single_file(
|
|
self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype
|
|
)
|
|
pipe = StableDiffusion3Pipeline.from_pretrained(
|
|
"stabilityai/stable-diffusion-3.5-large", transformer=transformer, torch_dtype=self.torch_dtype
|
|
)
|
|
pipe.enable_model_cpu_offload()
|
|
|
|
prompt = "a cat holding a sign that says hello"
|
|
output = pipe(
|
|
prompt=prompt,
|
|
num_inference_steps=2,
|
|
generator=torch.Generator("cpu").manual_seed(0),
|
|
output_type="np",
|
|
).images[0]
|
|
output_slice = output[:3, :3, :].flatten()
|
|
expected_slices = Expectations(
|
|
{
|
|
("xpu", 3): np.array(
|
|
[
|
|
0.19335938,
|
|
0.3125,
|
|
0.3203125,
|
|
0.1328125,
|
|
0.3046875,
|
|
0.296875,
|
|
0.11914062,
|
|
0.2890625,
|
|
0.2890625,
|
|
0.16796875,
|
|
0.30273438,
|
|
0.33203125,
|
|
0.14648438,
|
|
0.31640625,
|
|
0.33007812,
|
|
0.12890625,
|
|
0.3046875,
|
|
0.30859375,
|
|
0.17773438,
|
|
0.33789062,
|
|
0.33203125,
|
|
0.16796875,
|
|
0.34570312,
|
|
0.32421875,
|
|
0.15625,
|
|
0.33203125,
|
|
0.31445312,
|
|
]
|
|
),
|
|
("cuda", 7): np.array(
|
|
[
|
|
0.17578125,
|
|
0.27539062,
|
|
0.27734375,
|
|
0.11914062,
|
|
0.26953125,
|
|
0.25390625,
|
|
0.109375,
|
|
0.25390625,
|
|
0.25,
|
|
0.15039062,
|
|
0.26171875,
|
|
0.28515625,
|
|
0.13671875,
|
|
0.27734375,
|
|
0.28515625,
|
|
0.12109375,
|
|
0.26757812,
|
|
0.265625,
|
|
0.16210938,
|
|
0.29882812,
|
|
0.28515625,
|
|
0.15625,
|
|
0.30664062,
|
|
0.27734375,
|
|
0.14648438,
|
|
0.29296875,
|
|
0.26953125,
|
|
]
|
|
),
|
|
}
|
|
)
|
|
expected_slice = expected_slices.get_expectation()
|
|
max_diff = numpy_cosine_similarity_distance(expected_slice, output_slice)
|
|
assert max_diff < 1e-4
|
|
|
|
|
|
class SD35MediumGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase):
|
|
ckpt_path = "https://huggingface.co/city96/stable-diffusion-3.5-medium-gguf/blob/main/sd3.5_medium-Q3_K_M.gguf"
|
|
torch_dtype = torch.bfloat16
|
|
model_cls = SD3Transformer2DModel
|
|
expected_memory_use_in_gb = 2
|
|
|
|
def setUp(self):
|
|
gc.collect()
|
|
backend_empty_cache(torch_device)
|
|
|
|
def tearDown(self):
|
|
gc.collect()
|
|
backend_empty_cache(torch_device)
|
|
|
|
def get_dummy_inputs(self):
|
|
return {
|
|
"hidden_states": torch.randn((1, 16, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to(
|
|
torch_device, self.torch_dtype
|
|
),
|
|
"encoder_hidden_states": torch.randn(
|
|
(1, 512, 4096),
|
|
generator=torch.Generator("cpu").manual_seed(0),
|
|
).to(torch_device, self.torch_dtype),
|
|
"pooled_projections": torch.randn(
|
|
(1, 2048),
|
|
generator=torch.Generator("cpu").manual_seed(0),
|
|
).to(torch_device, self.torch_dtype),
|
|
"timestep": torch.tensor([1]).to(torch_device, self.torch_dtype),
|
|
}
|
|
|
|
def test_pipeline_inference(self):
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
transformer = self.model_cls.from_single_file(
|
|
self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype
|
|
)
|
|
pipe = StableDiffusion3Pipeline.from_pretrained(
|
|
"stabilityai/stable-diffusion-3.5-medium", transformer=transformer, torch_dtype=self.torch_dtype
|
|
)
|
|
pipe.enable_model_cpu_offload()
|
|
|
|
prompt = "a cat holding a sign that says hello"
|
|
output = pipe(
|
|
prompt=prompt, num_inference_steps=2, generator=torch.Generator("cpu").manual_seed(0), output_type="np"
|
|
).images[0]
|
|
output_slice = output[:3, :3, :].flatten()
|
|
expected_slice = np.array(
|
|
[
|
|
0.625,
|
|
0.6171875,
|
|
0.609375,
|
|
0.65625,
|
|
0.65234375,
|
|
0.640625,
|
|
0.6484375,
|
|
0.640625,
|
|
0.625,
|
|
0.6484375,
|
|
0.63671875,
|
|
0.6484375,
|
|
0.66796875,
|
|
0.65625,
|
|
0.65234375,
|
|
0.6640625,
|
|
0.6484375,
|
|
0.6328125,
|
|
0.6640625,
|
|
0.6484375,
|
|
0.640625,
|
|
0.67578125,
|
|
0.66015625,
|
|
0.62109375,
|
|
0.671875,
|
|
0.65625,
|
|
0.62109375,
|
|
]
|
|
)
|
|
max_diff = numpy_cosine_similarity_distance(expected_slice, output_slice)
|
|
assert max_diff < 1e-4
|
|
|
|
|
|
class AuraFlowGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase):
|
|
ckpt_path = "https://huggingface.co/city96/AuraFlow-v0.3-gguf/blob/main/aura_flow_0.3-Q2_K.gguf"
|
|
torch_dtype = torch.bfloat16
|
|
model_cls = AuraFlowTransformer2DModel
|
|
expected_memory_use_in_gb = 4
|
|
|
|
def setUp(self):
|
|
gc.collect()
|
|
backend_empty_cache(torch_device)
|
|
|
|
def tearDown(self):
|
|
gc.collect()
|
|
backend_empty_cache(torch_device)
|
|
|
|
def get_dummy_inputs(self):
|
|
return {
|
|
"hidden_states": torch.randn((1, 4, 64, 64), generator=torch.Generator("cpu").manual_seed(0)).to(
|
|
torch_device, self.torch_dtype
|
|
),
|
|
"encoder_hidden_states": torch.randn(
|
|
(1, 512, 2048),
|
|
generator=torch.Generator("cpu").manual_seed(0),
|
|
).to(torch_device, self.torch_dtype),
|
|
"timestep": torch.tensor([1]).to(torch_device, self.torch_dtype),
|
|
}
|
|
|
|
def test_pipeline_inference(self):
|
|
quantization_config = GGUFQuantizationConfig(compute_dtype=self.torch_dtype)
|
|
transformer = self.model_cls.from_single_file(
|
|
self.ckpt_path, quantization_config=quantization_config, torch_dtype=self.torch_dtype
|
|
)
|
|
pipe = AuraFlowPipeline.from_pretrained(
|
|
"fal/AuraFlow-v0.3", transformer=transformer, torch_dtype=self.torch_dtype
|
|
)
|
|
pipe.enable_model_cpu_offload()
|
|
|
|
prompt = "a pony holding a sign that says hello"
|
|
output = pipe(
|
|
prompt=prompt, num_inference_steps=2, generator=torch.Generator("cpu").manual_seed(0), output_type="np"
|
|
).images[0]
|
|
output_slice = output[:3, :3, :].flatten()
|
|
expected_slice = np.array(
|
|
[
|
|
0.46484375,
|
|
0.546875,
|
|
0.64453125,
|
|
0.48242188,
|
|
0.53515625,
|
|
0.59765625,
|
|
0.47070312,
|
|
0.5078125,
|
|
0.5703125,
|
|
0.42773438,
|
|
0.50390625,
|
|
0.5703125,
|
|
0.47070312,
|
|
0.515625,
|
|
0.57421875,
|
|
0.45898438,
|
|
0.48632812,
|
|
0.53515625,
|
|
0.4453125,
|
|
0.5078125,
|
|
0.56640625,
|
|
0.47851562,
|
|
0.5234375,
|
|
0.57421875,
|
|
0.48632812,
|
|
0.5234375,
|
|
0.56640625,
|
|
]
|
|
)
|
|
max_diff = numpy_cosine_similarity_distance(expected_slice, output_slice)
|
|
assert max_diff < 1e-4
|
|
|
|
|
|
@require_peft_backend
|
|
@nightly
|
|
@require_big_accelerator
|
|
@require_accelerate
|
|
@require_gguf_version_greater_or_equal("0.10.0")
|
|
class FluxControlLoRAGGUFTests(unittest.TestCase):
|
|
def test_lora_loading(self):
|
|
ckpt_path = "https://huggingface.co/city96/FLUX.1-dev-gguf/blob/main/flux1-dev-Q2_K.gguf"
|
|
transformer = FluxTransformer2DModel.from_single_file(
|
|
ckpt_path,
|
|
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
|
|
torch_dtype=torch.bfloat16,
|
|
)
|
|
pipe = FluxControlPipeline.from_pretrained(
|
|
"black-forest-labs/FLUX.1-dev",
|
|
transformer=transformer,
|
|
torch_dtype=torch.bfloat16,
|
|
).to(torch_device)
|
|
pipe.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")
|
|
|
|
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
|
|
control_image = load_image(
|
|
"https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/control_image_robot_canny.png"
|
|
)
|
|
|
|
output = pipe(
|
|
prompt=prompt,
|
|
control_image=control_image,
|
|
height=256,
|
|
width=256,
|
|
num_inference_steps=10,
|
|
guidance_scale=30.0,
|
|
output_type="np",
|
|
generator=torch.manual_seed(0),
|
|
).images
|
|
|
|
out_slice = output[0, -3:, -3:, -1].flatten()
|
|
expected_slice = np.array([0.8047, 0.8359, 0.8711, 0.6875, 0.7070, 0.7383, 0.5469, 0.5820, 0.6641])
|
|
|
|
max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
|
|
self.assertTrue(max_diff < 1e-3)
|