mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
[tests] make tests device-agnostic (part 4) (#10508)
* initial comit * fix empty cache * fix one more * fix style * update device functions * update * update * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/controlnet/test_controlnet.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/controlnet/test_controlnet.py Co-authored-by: hlky <hlky@hlky.ac> * with gc.collect * update * make style * check_torch_dependencies * add mps empty cache * add changes * bug fix * enable on xpu * update more cases * revert * revert back * Update test_stable_diffusion_xl.py * Update tests/pipelines/stable_diffusion/test_stable_diffusion.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Apply suggestions from code review Co-authored-by: hlky <hlky@hlky.ac> * add test marker --------- Co-authored-by: hlky <hlky@hlky.ac>
This commit is contained in:
@@ -33,11 +33,12 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils.import_utils import is_accelerate_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
load_image,
|
||||
nightly,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_peft_backend,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -101,7 +102,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
# Keeping this test here makes sense because it doesn't look any integration
|
||||
# (value assertions on logits).
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_integration_move_lora_cpu(self):
|
||||
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
lora_id = "takuma104/lora-test-text-encoder-lora-target"
|
||||
@@ -158,7 +159,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
self.assertTrue(m.weight.device != torch.device("cpu"))
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_integration_move_lora_dora_cpu(self):
|
||||
from peft import LoraConfig
|
||||
|
||||
@@ -209,18 +210,18 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
|
||||
|
||||
@slow
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_peft_backend
|
||||
class LoraIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_integration_logits_with_scale(self):
|
||||
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
@@ -378,7 +379,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
generator = torch.Generator().manual_seed(0)
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
|
||||
lora_filename = "light_and_shadow.safetensors"
|
||||
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
|
||||
@@ -400,7 +401,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
generator = torch.Generator().manual_seed(0)
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
|
||||
lora_filename = "light_and_shadow.safetensors"
|
||||
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
|
||||
@@ -656,7 +657,7 @@ class LoraIntegrationTests(unittest.TestCase):
|
||||
See: https://github.com/huggingface/diffusers/issues/5606
|
||||
"""
|
||||
pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
||||
pipeline.enable_sequential_cpu_offload()
|
||||
pipeline.enable_sequential_cpu_offload(device=torch_device)
|
||||
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
|
||||
pipeline.load_lora_weights(civitai_path, adapter_name="ahri")
|
||||
|
||||
|
||||
@@ -30,12 +30,13 @@ from diffusers import (
|
||||
from diffusers.utils import load_image
|
||||
from diffusers.utils.import_utils import is_accelerate_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
is_flaky,
|
||||
nightly,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_big_gpu_with_torch_cuda,
|
||||
require_peft_backend,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
@@ -93,7 +94,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
def output_shape(self):
|
||||
return (1, 32, 32, 3)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_sd3_lora(self):
|
||||
"""
|
||||
Test loading the loras that are saved with the diffusers and peft formats.
|
||||
@@ -135,7 +136,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_peft_backend
|
||||
@require_big_gpu_with_torch_cuda
|
||||
@pytest.mark.big_gpu_with_torch_cuda
|
||||
@@ -146,12 +147,12 @@ class SD3LoraIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, seed=0):
|
||||
init_image = load_image(
|
||||
|
||||
@@ -36,6 +36,9 @@ from diffusers.utils import logging
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
is_peft_available,
|
||||
@@ -1002,7 +1005,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
|
||||
assert loaded_model
|
||||
assert new_output.sample.shape == (4, 4, 16, 16)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_load_sharded_checkpoint_from_hub_local(self):
|
||||
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
|
||||
@@ -1013,7 +1016,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
|
||||
assert loaded_model
|
||||
assert new_output.sample.shape == (4, 4, 16, 16)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_load_sharded_checkpoint_from_hub_local_subfolder(self):
|
||||
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
|
||||
@@ -1024,7 +1027,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
|
||||
assert loaded_model
|
||||
assert new_output.sample.shape == (4, 4, 16, 16)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@parameterized.expand(
|
||||
[
|
||||
("hf-internal-testing/unet2d-sharded-dummy", None),
|
||||
@@ -1039,7 +1042,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
|
||||
assert loaded_model
|
||||
assert new_output.sample.shape == (4, 4, 16, 16)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@parameterized.expand(
|
||||
[
|
||||
("hf-internal-testing/unet2d-sharded-dummy-subfolder", None),
|
||||
@@ -1054,7 +1057,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
|
||||
assert loaded_model
|
||||
assert new_output.sample.shape == (4, 4, 16, 16)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_load_sharded_checkpoint_device_map_from_hub_local(self):
|
||||
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
|
||||
@@ -1064,7 +1067,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
|
||||
assert loaded_model
|
||||
assert new_output.sample.shape == (4, 4, 16, 16)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_load_sharded_checkpoint_device_map_from_hub_local_subfolder(self):
|
||||
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
|
||||
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
|
||||
@@ -1164,11 +1167,11 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
return model
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_set_attention_slice_auto(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
unet = self.get_unet_model()
|
||||
unet.set_attention_slice("auto")
|
||||
@@ -1180,15 +1183,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
|
||||
with torch.no_grad():
|
||||
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
|
||||
assert mem_bytes < 5 * 10**9
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_set_attention_slice_max(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
unet = self.get_unet_model()
|
||||
unet.set_attention_slice("max")
|
||||
@@ -1200,15 +1203,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
|
||||
with torch.no_grad():
|
||||
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
|
||||
assert mem_bytes < 5 * 10**9
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_set_attention_slice_int(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
unet = self.get_unet_model()
|
||||
unet.set_attention_slice(2)
|
||||
@@ -1220,15 +1223,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
|
||||
with torch.no_grad():
|
||||
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
|
||||
assert mem_bytes < 5 * 10**9
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_set_attention_slice_list(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
# there are 32 sliceable layers
|
||||
slice_list = 16 * [2, 3]
|
||||
@@ -1242,7 +1245,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
|
||||
with torch.no_grad():
|
||||
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
|
||||
assert mem_bytes < 5 * 10**9
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
|
||||
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
||||
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
|
||||
)
|
||||
pipe.to("cuda")
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
pipe.unet.to(memory_format=torch.channels_last)
|
||||
|
||||
@@ -40,7 +40,7 @@ from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
@@ -245,7 +245,7 @@ class ControlNetPipelineSDXLFastTests(
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(expected_max_diff=2e-3)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_stable_diffusion_xl_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -254,12 +254,12 @@ class ControlNetPipelineSDXLFastTests(
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
@@ -223,12 +223,12 @@ class StableDiffusionXLControlNetPipelineFastTests(
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
@@ -31,6 +31,7 @@ from diffusers import (
|
||||
from diffusers.models import FluxControlNetModel
|
||||
from diffusers.utils import load_image
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
nightly,
|
||||
numpy_cosine_similarity_distance,
|
||||
@@ -217,12 +218,12 @@ class FluxControlNetPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_canny(self):
|
||||
controlnet = FluxControlNetModel.from_pretrained(
|
||||
|
||||
@@ -239,7 +239,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
|
||||
pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
|
||||
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
|
||||
@@ -9,6 +9,7 @@ from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPToken
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
nightly,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_big_gpu_with_torch_cuda,
|
||||
@@ -212,12 +213,12 @@ class FluxPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, seed=0):
|
||||
generator = torch.Generator(device="cpu").manual_seed(seed)
|
||||
|
||||
@@ -34,11 +34,12 @@ from diffusers import (
|
||||
from diffusers.image_processor import IPAdapterMaskProcessor
|
||||
from diffusers.utils import load_image
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
is_flaky,
|
||||
load_pt,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -54,13 +55,13 @@ class IPAdapterNightlyTestsMixin(unittest.TestCase):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_image_encoder(self, repo_id, subfolder):
|
||||
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
||||
@@ -165,7 +166,7 @@ class IPAdapterNightlyTestsMixin(unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_text_to_image(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
|
||||
@@ -280,7 +281,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
inputs = self.get_dummy_inputs()
|
||||
output_without_offload = pipeline(**inputs).images
|
||||
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
inputs = self.get_dummy_inputs()
|
||||
output_with_offload = pipeline(**inputs).images
|
||||
max_diff = np.abs(output_with_offload - output_without_offload).max()
|
||||
@@ -391,7 +392,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
def test_text_to_image_sdxl(self):
|
||||
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="sdxl_models/image_encoder")
|
||||
@@ -403,7 +404,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
feature_extractor=feature_extractor,
|
||||
torch_dtype=self.dtype,
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
||||
|
||||
inputs = self.get_dummy_inputs()
|
||||
@@ -461,7 +462,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
feature_extractor=feature_extractor,
|
||||
torch_dtype=self.dtype,
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
||||
|
||||
inputs = self.get_dummy_inputs(for_image_to_image=True)
|
||||
@@ -530,7 +531,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
feature_extractor=feature_extractor,
|
||||
torch_dtype=self.dtype,
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
||||
|
||||
inputs = self.get_dummy_inputs(for_inpainting=True)
|
||||
@@ -578,7 +579,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
image_encoder=image_encoder,
|
||||
torch_dtype=self.dtype,
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.load_ip_adapter(
|
||||
"h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors"
|
||||
)
|
||||
@@ -606,7 +607,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
image_encoder=image_encoder,
|
||||
torch_dtype=self.dtype,
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.load_ip_adapter(
|
||||
"h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"] * 2
|
||||
)
|
||||
@@ -633,7 +634,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
pipeline = StableDiffusionXLPipeline.from_pretrained(
|
||||
"RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.float16, image_encoder=image_encoder, variant="fp16"
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
pipeline.load_ip_adapter(
|
||||
["ostris/ip-composition-adapter", "h94/IP-Adapter"],
|
||||
@@ -674,7 +675,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
|
||||
image_encoder=image_encoder,
|
||||
torch_dtype=self.dtype,
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.load_ip_adapter(
|
||||
"h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"]
|
||||
)
|
||||
|
||||
@@ -24,10 +24,11 @@ from transformers import XLMRobertaTokenizerFast
|
||||
from diffusers import DDIMScheduler, KandinskyPipeline, KandinskyPriorPipeline, UNet2DConditionModel, VQModel
|
||||
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_numpy,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -246,7 +247,7 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -255,12 +256,12 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
@@ -275,19 +276,19 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class KandinskyPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinsky_text2img(self):
|
||||
expected_image = load_numpy(
|
||||
@@ -306,7 +307,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
prompt = "red cat, 4k photo"
|
||||
|
||||
generator = torch.Generator(device="cuda").manual_seed(0)
|
||||
generator = torch.Generator(device=torch_device).manual_seed(0)
|
||||
image_emb, zero_image_emb = pipe_prior(
|
||||
prompt,
|
||||
generator=generator,
|
||||
@@ -314,7 +315,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase):
|
||||
negative_prompt="",
|
||||
).to_tuple()
|
||||
|
||||
generator = torch.Generator(device="cuda").manual_seed(0)
|
||||
generator = torch.Generator(device=torch_device).manual_seed(0)
|
||||
output = pipeline(
|
||||
prompt,
|
||||
image_embeds=image_emb,
|
||||
|
||||
@@ -18,7 +18,7 @@ import unittest
|
||||
import numpy as np
|
||||
|
||||
from diffusers import KandinskyCombinedPipeline, KandinskyImg2ImgCombinedPipeline, KandinskyInpaintCombinedPipeline
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
from .test_kandinsky import Dummies
|
||||
@@ -105,7 +105,7 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -114,12 +114,12 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
@@ -213,7 +213,7 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -222,12 +222,12 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
@@ -325,7 +325,7 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -334,12 +334,12 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
@@ -32,12 +32,13 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
load_numpy,
|
||||
nightly,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -267,7 +268,7 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -299,19 +300,19 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinsky_img2img(self):
|
||||
expected_image = load_numpy(
|
||||
@@ -365,19 +366,19 @@ class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class KandinskyImg2ImgPipelineNightlyTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinsky_img2img_ddpm(self):
|
||||
expected_image = load_numpy(
|
||||
|
||||
@@ -25,12 +25,13 @@ from transformers import XLMRobertaTokenizerFast
|
||||
from diffusers import DDIMScheduler, KandinskyInpaintPipeline, KandinskyPriorPipeline, UNet2DConditionModel, VQModel
|
||||
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
load_numpy,
|
||||
nightly,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
@@ -265,7 +266,7 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def test_inference_batch_single_identical(self):
|
||||
super().test_inference_batch_single_identical(expected_max_diff=3e-3)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -274,12 +275,12 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
@@ -297,19 +298,19 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinsky_inpaint(self):
|
||||
expected_image = load_numpy(
|
||||
|
||||
@@ -22,12 +22,14 @@ import torch
|
||||
|
||||
from diffusers import DDIMScheduler, KandinskyV22Pipeline, KandinskyV22PriorPipeline, UNet2DConditionModel, VQModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_numpy,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
@@ -221,19 +223,19 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinsky_text2img(self):
|
||||
expected_image = load_numpy(
|
||||
@@ -244,12 +246,12 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
|
||||
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
|
||||
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
|
||||
)
|
||||
pipe_prior.enable_model_cpu_offload()
|
||||
pipe_prior.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
pipeline = KandinskyV22Pipeline.from_pretrained(
|
||||
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
prompt = "red cat, 4k photo"
|
||||
|
||||
@@ -22,7 +22,7 @@ from diffusers import (
|
||||
KandinskyV22Img2ImgCombinedPipeline,
|
||||
KandinskyV22InpaintCombinedPipeline,
|
||||
)
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
from .test_kandinsky import Dummies
|
||||
@@ -110,7 +110,7 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -119,12 +119,12 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
@@ -234,7 +234,7 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -243,12 +243,12 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
@@ -357,7 +357,7 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -366,12 +366,12 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
@@ -29,13 +29,15 @@ from diffusers import (
|
||||
VQModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
load_numpy,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
@@ -238,19 +240,19 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinsky_img2img(self):
|
||||
expected_image = load_numpy(
|
||||
@@ -266,12 +268,12 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
|
||||
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
|
||||
)
|
||||
pipe_prior.enable_model_cpu_offload()
|
||||
pipe_prior.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
pipeline = KandinskyV22Img2ImgPipeline.from_pretrained(
|
||||
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
|
||||
@@ -29,13 +29,14 @@ from diffusers import (
|
||||
VQModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
is_flaky,
|
||||
load_image,
|
||||
load_numpy,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -292,19 +293,19 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinsky_inpaint(self):
|
||||
expected_image = load_numpy(
|
||||
|
||||
@@ -31,10 +31,12 @@ from diffusers import (
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..pipeline_params import (
|
||||
@@ -167,25 +169,25 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinskyV3(self):
|
||||
pipe = AutoPipelineForText2Image.from_pretrained(
|
||||
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."
|
||||
@@ -211,7 +213,7 @@ class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
|
||||
pipe = AutoPipelineForImage2Image.from_pretrained(
|
||||
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
|
||||
@@ -31,10 +31,11 @@ from diffusers import (
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -192,25 +193,25 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class Kandinsky3Img2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_kandinskyV3_img2img(self):
|
||||
pipe = AutoPipelineForImage2Image.from_pretrained(
|
||||
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
|
||||
@@ -13,8 +13,9 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -222,11 +223,11 @@ class LatentConsistencyModelPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class LatentConsistencyModelPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
|
||||
@@ -14,10 +14,11 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -229,11 +230,11 @@ class LatentConsistencyModelImg2ImgPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class LatentConsistencyModelImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
|
||||
@@ -30,9 +30,10 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -218,25 +219,25 @@ class LattePipelineFastTests(PipelineTesterMixin, PyramidAttentionBroadcastTeste
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class LattePipelineIntegrationTests(unittest.TestCase):
|
||||
prompt = "A painting of a squirrel eating a burger."
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_latte(self):
|
||||
generator = torch.Generator("cpu").manual_seed(0)
|
||||
|
||||
pipe = LattePipeline.from_pretrained("maxin-cn/Latte-1", torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
prompt = self.prompt
|
||||
|
||||
videos = pipe(
|
||||
|
||||
@@ -29,10 +29,11 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -202,17 +203,17 @@ class LEditsPPPipelineStableDiffusionFastTests(unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class LEditsPPPipelineStableDiffusionSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
|
||||
@@ -41,7 +41,7 @@ from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -253,7 +253,7 @@ class LEditsPPPipelineStableDiffusionXLFastTests(unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class LEditsPPPipelineStableDiffusionXLSlowTests(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
|
||||
@@ -7,8 +7,9 @@ from transformers import AutoTokenizer, GemmaConfig, GemmaForCausalLM
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaText2ImgPipeline
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -100,7 +101,7 @@ class LuminaText2ImgPipelinePipelineFastTests(unittest.TestCase, PipelineTesterM
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class LuminaText2ImgPipelineSlowTests(unittest.TestCase):
|
||||
pipeline_class = LuminaText2ImgPipeline
|
||||
repo_id = "Alpha-VLLM/Lumina-Next-SFT-diffusers"
|
||||
@@ -108,12 +109,12 @@ class LuminaText2ImgPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, seed=0):
|
||||
if str(device).startswith("mps"):
|
||||
@@ -131,7 +132,7 @@ class LuminaText2ImgPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_lumina_inference(self):
|
||||
pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.bfloat16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
|
||||
|
||||
@@ -32,12 +32,14 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
is_flaky,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
@@ -288,17 +290,17 @@ class MarigoldDepthPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def _test_marigold_depth(
|
||||
self,
|
||||
@@ -317,8 +319,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
from_pretrained_kwargs["torch_dtype"] = torch.float16
|
||||
|
||||
pipe = MarigoldDepthPipeline.from_pretrained(model_id, **from_pretrained_kwargs)
|
||||
if device == "cuda":
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.Generator(device=device).manual_seed(generator_seed)
|
||||
@@ -358,7 +359,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_depth_einstein_f32_cuda_G0_S1_P768_E1_B1_M1(self):
|
||||
self._test_marigold_depth(
|
||||
is_fp16=False,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.1244, 0.1265, 0.1292, 0.1240, 0.1252, 0.1266, 0.1246, 0.1226, 0.1180]),
|
||||
num_inference_steps=1,
|
||||
@@ -371,7 +372,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_depth_einstein_f16_cuda_G0_S1_P768_E1_B1_M1(self):
|
||||
self._test_marigold_depth(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.1241, 0.1262, 0.1290, 0.1238, 0.1250, 0.1265, 0.1244, 0.1225, 0.1179]),
|
||||
num_inference_steps=1,
|
||||
@@ -384,7 +385,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_depth_einstein_f16_cuda_G2024_S1_P768_E1_B1_M1(self):
|
||||
self._test_marigold_depth(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=2024,
|
||||
expected_slice=np.array([0.1710, 0.1725, 0.1738, 0.1700, 0.1700, 0.1696, 0.1698, 0.1663, 0.1592]),
|
||||
num_inference_steps=1,
|
||||
@@ -397,7 +398,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_depth_einstein_f16_cuda_G0_S2_P768_E1_B1_M1(self):
|
||||
self._test_marigold_depth(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.1085, 0.1098, 0.1110, 0.1081, 0.1085, 0.1082, 0.1085, 0.1057, 0.0996]),
|
||||
num_inference_steps=2,
|
||||
@@ -410,7 +411,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_depth_einstein_f16_cuda_G0_S1_P512_E1_B1_M1(self):
|
||||
self._test_marigold_depth(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.2683, 0.2693, 0.2698, 0.2666, 0.2632, 0.2615, 0.2656, 0.2603, 0.2573]),
|
||||
num_inference_steps=1,
|
||||
@@ -423,7 +424,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_depth_einstein_f16_cuda_G0_S1_P768_E3_B1_M1(self):
|
||||
self._test_marigold_depth(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.1200, 0.1215, 0.1237, 0.1193, 0.1197, 0.1202, 0.1196, 0.1166, 0.1109]),
|
||||
num_inference_steps=1,
|
||||
@@ -437,7 +438,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_depth_einstein_f16_cuda_G0_S1_P768_E4_B2_M1(self):
|
||||
self._test_marigold_depth(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.1121, 0.1135, 0.1155, 0.1111, 0.1115, 0.1118, 0.1111, 0.1079, 0.1019]),
|
||||
num_inference_steps=1,
|
||||
@@ -451,7 +452,7 @@ class MarigoldDepthPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_depth_einstein_f16_cuda_G0_S1_P512_E1_B1_M0(self):
|
||||
self._test_marigold_depth(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.2671, 0.2690, 0.2720, 0.2659, 0.2676, 0.2739, 0.2664, 0.2686, 0.2573]),
|
||||
num_inference_steps=1,
|
||||
|
||||
@@ -32,11 +32,13 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
@@ -285,17 +287,17 @@ class MarigoldNormalsPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def _test_marigold_normals(
|
||||
self,
|
||||
@@ -314,8 +316,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
from_pretrained_kwargs["torch_dtype"] = torch.float16
|
||||
|
||||
pipe = MarigoldNormalsPipeline.from_pretrained(model_id, **from_pretrained_kwargs)
|
||||
if device == "cuda":
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
generator = torch.Generator(device=device).manual_seed(generator_seed)
|
||||
@@ -342,7 +343,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f32_cpu_G0_S1_P32_E1_B1_M1(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=False,
|
||||
device="cpu",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.8971, 0.8971, 0.8971, 0.8971, 0.8971, 0.8971, 0.8971, 0.8971, 0.8971]),
|
||||
num_inference_steps=1,
|
||||
@@ -355,7 +356,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f32_cuda_G0_S1_P768_E1_B1_M1(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=False,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.7980, 0.7952, 0.7914, 0.7931, 0.7871, 0.7816, 0.7844, 0.7710, 0.7601]),
|
||||
num_inference_steps=1,
|
||||
@@ -368,7 +369,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f16_cuda_G0_S1_P768_E1_B1_M1(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.7979, 0.7949, 0.7915, 0.7930, 0.7871, 0.7817, 0.7842, 0.7710, 0.7603]),
|
||||
num_inference_steps=1,
|
||||
@@ -381,7 +382,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f16_cuda_G2024_S1_P768_E1_B1_M1(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=2024,
|
||||
expected_slice=np.array([0.8428, 0.8428, 0.8433, 0.8369, 0.8325, 0.8315, 0.8271, 0.8135, 0.8057]),
|
||||
num_inference_steps=1,
|
||||
@@ -394,7 +395,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f16_cuda_G0_S2_P768_E1_B1_M1(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.7095, 0.7095, 0.7104, 0.7070, 0.7051, 0.7061, 0.7017, 0.6938, 0.6914]),
|
||||
num_inference_steps=2,
|
||||
@@ -407,7 +408,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f16_cuda_G0_S1_P512_E1_B1_M1(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.7168, 0.7163, 0.7163, 0.7080, 0.7061, 0.7046, 0.7031, 0.7007, 0.6987]),
|
||||
num_inference_steps=1,
|
||||
@@ -420,7 +421,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f16_cuda_G0_S1_P768_E3_B1_M1(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.7114, 0.7124, 0.7144, 0.7085, 0.7070, 0.7080, 0.7051, 0.6958, 0.6924]),
|
||||
num_inference_steps=1,
|
||||
@@ -434,7 +435,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f16_cuda_G0_S1_P768_E4_B2_M1(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.7412, 0.7441, 0.7490, 0.7383, 0.7388, 0.7437, 0.7329, 0.7271, 0.7300]),
|
||||
num_inference_steps=1,
|
||||
@@ -448,7 +449,7 @@ class MarigoldNormalsPipelineIntegrationTests(unittest.TestCase):
|
||||
def test_marigold_normals_einstein_f16_cuda_G0_S1_P512_E1_B1_M0(self):
|
||||
self._test_marigold_normals(
|
||||
is_fp16=True,
|
||||
device="cuda",
|
||||
device=torch_device,
|
||||
generator_seed=0,
|
||||
expected_slice=np.array([0.7188, 0.7144, 0.7134, 0.7178, 0.7207, 0.7222, 0.7231, 0.7041, 0.6987]),
|
||||
num_inference_steps=1,
|
||||
|
||||
@@ -23,6 +23,7 @@ from transformers import AutoTokenizer, T5EncoderModel
|
||||
|
||||
from diffusers import AutoencoderKLMochi, FlowMatchEulerDiscreteScheduler, MochiPipeline, MochiTransformer3DModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
nightly,
|
||||
numpy_cosine_similarity_distance,
|
||||
@@ -274,18 +275,18 @@ class MochiPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_mochi(self):
|
||||
generator = torch.Generator("cpu").manual_seed(0)
|
||||
|
||||
pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview", torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
prompt = self.prompt
|
||||
|
||||
videos = pipe(
|
||||
|
||||
@@ -30,8 +30,9 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -285,7 +286,7 @@ class StableDiffusionPAGPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
pipeline_class = StableDiffusionPAGPipeline
|
||||
repo_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
||||
@@ -293,12 +294,12 @@ class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", seed=1, guidance_scale=7.0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -315,7 +316,7 @@ class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_cfg(self):
|
||||
pipeline = AutoPipelineForText2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
@@ -333,7 +334,7 @@ class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_uncond(self):
|
||||
pipeline = AutoPipelineForText2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device, guidance_scale=0.0)
|
||||
|
||||
@@ -16,10 +16,11 @@ from diffusers import (
|
||||
StableDiffusion3PAGImg2ImgPipeline,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -193,7 +194,7 @@ class StableDiffusion3PAGImg2ImgPipelineFastTests(unittest.TestCase, PipelineTes
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusion3PAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
pipeline_class = StableDiffusion3PAGImg2ImgPipeline
|
||||
repo_id = "stabilityai/stable-diffusion-3-medium-diffusers"
|
||||
@@ -201,12 +202,12 @@ class StableDiffusion3PAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(
|
||||
self, device, generator_device="cpu", dtype=torch.float32, seed=0, guidance_scale=7.0, pag_scale=0.7
|
||||
@@ -233,7 +234,7 @@ class StableDiffusion3PAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
pipeline = AutoPipelineForImage2Image.from_pretrained(
|
||||
self.repo_id, enable_pag=True, torch_dtype=torch.float16, pag_applied_layers=["blocks.17"]
|
||||
)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
|
||||
@@ -32,10 +32,11 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -219,7 +220,7 @@ class StableDiffusionPAGImg2ImgPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionPAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
pipeline_class = StableDiffusionPAGImg2ImgPipeline
|
||||
repo_id = "Jiali/stable-diffusion-1.5"
|
||||
@@ -227,12 +228,12 @@ class StableDiffusionPAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -254,7 +255,7 @@ class StableDiffusionPAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_cfg(self):
|
||||
pipeline = AutoPipelineForImage2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
@@ -272,7 +273,7 @@ class StableDiffusionPAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_uncond(self):
|
||||
pipeline = AutoPipelineForImage2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device, guidance_scale=0.0)
|
||||
|
||||
@@ -30,10 +30,11 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -251,7 +252,7 @@ class StableDiffusionPAGInpaintPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
pipeline_class = StableDiffusionPAGInpaintPipeline
|
||||
repo_id = "runwayml/stable-diffusion-v1-5"
|
||||
@@ -259,12 +260,12 @@ class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", seed=0, guidance_scale=7.0):
|
||||
img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
|
||||
@@ -289,7 +290,7 @@ class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_cfg(self):
|
||||
pipeline = AutoPipelineForInpainting.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
@@ -307,7 +308,7 @@ class StableDiffusionPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_uncond(self):
|
||||
pipeline = AutoPipelineForInpainting.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device, guidance_scale=0.0)
|
||||
|
||||
@@ -30,8 +30,9 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -289,7 +290,7 @@ class StableDiffusionXLPAGPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionXLPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
pipeline_class = StableDiffusionXLPAGPipeline
|
||||
repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
||||
@@ -297,12 +298,12 @@ class StableDiffusionXLPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", seed=0, guidance_scale=7.0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -319,7 +320,7 @@ class StableDiffusionXLPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_cfg(self):
|
||||
pipeline = AutoPipelineForText2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
@@ -336,7 +337,7 @@ class StableDiffusionXLPAGPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_uncond(self):
|
||||
pipeline = AutoPipelineForText2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device, guidance_scale=0.0)
|
||||
|
||||
@@ -39,10 +39,11 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -268,19 +269,19 @@ class StableDiffusionXLPAGImg2ImgPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionXLPAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", seed=0, guidance_scale=7.0):
|
||||
img_url = (
|
||||
@@ -304,7 +305,7 @@ class StableDiffusionXLPAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_cfg(self):
|
||||
pipeline = AutoPipelineForImage2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
@@ -321,7 +322,7 @@ class StableDiffusionXLPAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_uncond(self):
|
||||
pipeline = AutoPipelineForImage2Image.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device, guidance_scale=0.0)
|
||||
|
||||
@@ -40,10 +40,11 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -273,19 +274,19 @@ class StableDiffusionXLPAGInpaintPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionXLPAGInpaintPipelineIntegrationTests(unittest.TestCase):
|
||||
repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", seed=0, guidance_scale=7.0):
|
||||
img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
|
||||
@@ -310,7 +311,7 @@ class StableDiffusionXLPAGInpaintPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_cfg(self):
|
||||
pipeline = AutoPipelineForInpainting.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
@@ -327,7 +328,7 @@ class StableDiffusionXLPAGInpaintPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
def test_pag_uncond(self):
|
||||
pipeline = AutoPipelineForInpainting.from_pretrained(self.repo_id, enable_pag=True, torch_dtype=torch.float16)
|
||||
pipeline.enable_model_cpu_offload()
|
||||
pipeline.enable_model_cpu_offload(device=torch_device)
|
||||
pipeline.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_inputs(torch_device, guidance_scale=0.0)
|
||||
|
||||
@@ -28,9 +28,10 @@ from diffusers import (
|
||||
PixArtTransformer2DModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -254,7 +255,7 @@ class PixArtAlphaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
|
||||
ckpt_id_1024 = "PixArt-alpha/PixArt-XL-2-1024-MS"
|
||||
ckpt_id_512 = "PixArt-alpha/PixArt-XL-2-512x512"
|
||||
@@ -263,18 +264,18 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_pixart_1024(self):
|
||||
generator = torch.Generator("cpu").manual_seed(0)
|
||||
|
||||
pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
prompt = self.prompt
|
||||
|
||||
image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images
|
||||
@@ -289,7 +290,7 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
|
||||
generator = torch.Generator("cpu").manual_seed(0)
|
||||
|
||||
pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_512, torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
prompt = self.prompt
|
||||
|
||||
@@ -305,7 +306,7 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
|
||||
generator = torch.manual_seed(0)
|
||||
|
||||
pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
prompt = self.prompt
|
||||
height, width = 1024, 768
|
||||
@@ -339,7 +340,7 @@ class PixArtAlphaPipelineIntegrationTests(unittest.TestCase):
|
||||
generator = torch.manual_seed(0)
|
||||
|
||||
pipe = PixArtAlphaPipeline.from_pretrained(self.ckpt_id_512, torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
prompt = self.prompt
|
||||
height, width = 512, 768
|
||||
|
||||
@@ -28,9 +28,10 @@ from diffusers import (
|
||||
PixArtTransformer2DModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -283,7 +284,7 @@ class PixArtSigmaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class PixArtSigmaPipelineIntegrationTests(unittest.TestCase):
|
||||
ckpt_id_1024 = "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS"
|
||||
ckpt_id_512 = "PixArt-alpha/PixArt-Sigma-XL-2-512-MS"
|
||||
@@ -292,18 +293,18 @@ class PixArtSigmaPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_pixart_1024(self):
|
||||
generator = torch.Generator("cpu").manual_seed(0)
|
||||
|
||||
pipe = PixArtSigmaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
prompt = self.prompt
|
||||
|
||||
image = pipe(prompt, generator=generator, num_inference_steps=2, output_type="np").images
|
||||
@@ -323,7 +324,7 @@ class PixArtSigmaPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe = PixArtSigmaPipeline.from_pretrained(
|
||||
self.ckpt_id_1024, transformer=transformer, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
prompt = self.prompt
|
||||
|
||||
@@ -339,7 +340,7 @@ class PixArtSigmaPipelineIntegrationTests(unittest.TestCase):
|
||||
generator = torch.manual_seed(0)
|
||||
|
||||
pipe = PixArtSigmaPipeline.from_pretrained(self.ckpt_id_1024, torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
prompt = self.prompt
|
||||
height, width = 1024, 768
|
||||
@@ -378,7 +379,7 @@ class PixArtSigmaPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe = PixArtSigmaPipeline.from_pretrained(
|
||||
self.ckpt_id_1024, transformer=transformer, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
prompt = self.prompt
|
||||
height, width = 512, 768
|
||||
|
||||
@@ -22,8 +22,9 @@ from transformers import Gemma2Config, Gemma2Model, GemmaTokenizer
|
||||
|
||||
from diffusers import AutoencoderDC, FlowMatchEulerDiscreteScheduler, SanaPipeline, SanaTransformer2DModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -305,19 +306,19 @@ class SanaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class SanaPipelineIntegrationTests(unittest.TestCase):
|
||||
prompt = "A painting of a squirrel eating a burger."
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_sana_1024(self):
|
||||
generator = torch.Generator("cpu").manual_seed(0)
|
||||
@@ -325,7 +326,7 @@ class SanaPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe = SanaPipeline.from_pretrained(
|
||||
"Efficient-Large-Model/Sana_1600M_1024px_diffusers", torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
image = pipe(
|
||||
prompt=self.prompt,
|
||||
@@ -351,7 +352,7 @@ class SanaPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe = SanaPipeline.from_pretrained(
|
||||
"Efficient-Large-Model/Sana_1600M_512px_diffusers", torch_dtype=torch.float16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
image = pipe(
|
||||
prompt=self.prompt,
|
||||
|
||||
@@ -22,7 +22,7 @@ from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokeni
|
||||
from diffusers import DDPMWuerstchenScheduler, StableCascadeCombinedPipeline
|
||||
from diffusers.models import StableCascadeUNet
|
||||
from diffusers.pipelines.wuerstchen import PaellaVQModel
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
@@ -205,7 +205,7 @@ class StableCascadeCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -214,12 +214,12 @@ class StableCascadeCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestC
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
@@ -24,11 +24,12 @@ from diffusers import DDPMWuerstchenScheduler, StableCascadeDecoderPipeline
|
||||
from diffusers.models import StableCascadeUNet
|
||||
from diffusers.pipelines.wuerstchen import PaellaVQModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
load_numpy,
|
||||
load_pt,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -278,25 +279,25 @@ class StableCascadeDecoderPipelineFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableCascadeDecoderPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_stable_cascade_decoder(self):
|
||||
pipe = StableCascadeDecoderPipeline.from_pretrained(
|
||||
"stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.bfloat16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."
|
||||
|
||||
@@ -24,11 +24,12 @@ from diffusers import DDPMWuerstchenScheduler, StableCascadePriorPipeline
|
||||
from diffusers.models import StableCascadeUNet
|
||||
from diffusers.utils.import_utils import is_peft_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
load_numpy,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_peft_backend,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -246,25 +247,25 @@ class StableCascadePriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableCascadePriorPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_stable_cascade_prior(self):
|
||||
pipe = StableCascadePriorPipeline.from_pretrained(
|
||||
"stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=torch.bfloat16
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."
|
||||
|
||||
@@ -44,6 +44,10 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
CaptureLogger,
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
is_torch_compile,
|
||||
load_image,
|
||||
@@ -52,7 +56,7 @@ from diffusers.utils.testing_utils import (
|
||||
numpy_cosine_similarity_distance,
|
||||
require_accelerate_version_greater,
|
||||
require_torch_2,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_multi_gpu,
|
||||
run_test_in_subprocess,
|
||||
skip_mps,
|
||||
@@ -781,11 +785,11 @@ class StableDiffusionPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -887,7 +891,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
assert np.abs(image_slice - expected_slice).max() < 3e-3
|
||||
|
||||
def test_stable_diffusion_attention_slicing(self):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
pipe = pipe.to(torch_device)
|
||||
@@ -898,8 +902,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
image_sliced = pipe(**inputs).images
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
# make sure that less than 3.75 GB is allocated
|
||||
assert mem_bytes < 3.75 * 10**9
|
||||
|
||||
@@ -910,13 +914,13 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
image = pipe(**inputs).images
|
||||
|
||||
# make sure that more than 3.75 GB is allocated
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
assert mem_bytes > 3.75 * 10**9
|
||||
max_diff = numpy_cosine_similarity_distance(image_sliced.flatten(), image.flatten())
|
||||
assert max_diff < 1e-3
|
||||
|
||||
def test_stable_diffusion_vae_slicing(self):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
|
||||
pipe = pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
@@ -929,8 +933,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
inputs["latents"] = torch.cat([inputs["latents"]] * 4)
|
||||
image_sliced = pipe(**inputs).images
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
# make sure that less than 4 GB is allocated
|
||||
assert mem_bytes < 4e9
|
||||
|
||||
@@ -942,14 +946,14 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
image = pipe(**inputs).images
|
||||
|
||||
# make sure that more than 4 GB is allocated
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
assert mem_bytes > 4e9
|
||||
# There is a small discrepancy at the image borders vs. a fully batched version.
|
||||
max_diff = numpy_cosine_similarity_distance(image_sliced.flatten(), image.flatten())
|
||||
assert max_diff < 1e-2
|
||||
|
||||
def test_stable_diffusion_vae_tiling(self):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
model_id = "CompVis/stable-diffusion-v1-4"
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
model_id, variant="fp16", torch_dtype=torch.float16, safety_checker=None
|
||||
@@ -963,7 +967,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
# enable vae tiling
|
||||
pipe.enable_vae_tiling()
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
generator = torch.Generator(device="cpu").manual_seed(0)
|
||||
output_chunked = pipe(
|
||||
[prompt],
|
||||
@@ -976,7 +980,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
)
|
||||
image_chunked = output_chunked.images
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
|
||||
# disable vae tiling
|
||||
pipe.disable_vae_tiling()
|
||||
@@ -1069,26 +1073,25 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
assert 2 * low_cpu_mem_usage_time < normal_load_time
|
||||
|
||||
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing(1)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
_ = pipe(**inputs)
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
# make sure that less than 2.8 GB is allocated
|
||||
assert mem_bytes < 2.8 * 10**9
|
||||
|
||||
def test_stable_diffusion_pipeline_with_model_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
|
||||
@@ -1102,7 +1105,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
outputs = pipe(**inputs)
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
|
||||
# With model offloading
|
||||
|
||||
@@ -1113,16 +1116,16 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
)
|
||||
pipe.unet.set_default_attn_processor()
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
|
||||
outputs_offloaded = pipe(**inputs)
|
||||
mem_bytes_offloaded = torch.cuda.max_memory_allocated()
|
||||
mem_bytes_offloaded = backend_max_memory_allocated(torch_device)
|
||||
|
||||
images = outputs.images
|
||||
offloaded_images = outputs_offloaded.images
|
||||
@@ -1135,13 +1138,13 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
assert module.device == torch.device("cpu")
|
||||
|
||||
# With attention slicing
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipe.enable_attention_slicing()
|
||||
_ = pipe(**inputs)
|
||||
mem_bytes_slicing = torch.cuda.max_memory_allocated()
|
||||
mem_bytes_slicing = backend_max_memory_allocated(torch_device)
|
||||
|
||||
assert mem_bytes_slicing < mem_bytes_offloaded
|
||||
assert mem_bytes_slicing < 3 * 10**9
|
||||
@@ -1156,7 +1159,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
)
|
||||
pipe.load_textual_inversion(a111_file)
|
||||
pipe.load_textual_inversion(a111_file_neg)
|
||||
pipe.to("cuda")
|
||||
pipe.to(torch_device)
|
||||
|
||||
generator = torch.Generator(device="cpu").manual_seed(1)
|
||||
|
||||
@@ -1173,7 +1176,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_stable_diffusion_textual_inversion_with_model_cpu_offload(self):
|
||||
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.load_textual_inversion("sd-concepts-library/low-poly-hd-logos-icons")
|
||||
|
||||
a111_file = hf_hub_download("hf-internal-testing/text_inv_embedding_a1111_format", "winter_style.pt")
|
||||
@@ -1198,8 +1201,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_stable_diffusion_textual_inversion_with_sequential_cpu_offload(self):
|
||||
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.load_textual_inversion("sd-concepts-library/low-poly-hd-logos-icons")
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipe.load_textual_inversion("sd-concepts-library/low-poly-hd-logos-icons").to(torch_device)
|
||||
|
||||
a111_file = hf_hub_download("hf-internal-testing/text_inv_embedding_a1111_format", "winter_style.pt")
|
||||
a111_file_neg = hf_hub_download(
|
||||
@@ -1257,17 +1260,17 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_download_from_hub(self):
|
||||
ckpt_paths = [
|
||||
@@ -1278,7 +1281,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
for ckpt_path in ckpt_paths:
|
||||
pipe = StableDiffusionPipeline.from_single_file(ckpt_path, torch_dtype=torch.float16)
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.to("cuda")
|
||||
pipe.to(torch_device)
|
||||
|
||||
image_out = pipe("test", num_inference_steps=1, output_type="np").images[0]
|
||||
|
||||
@@ -1294,7 +1297,7 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
ckpt_filename, config_files={"v1": config_filename}, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.to("cuda")
|
||||
pipe.to(torch_device)
|
||||
|
||||
image_out = pipe("test", num_inference_steps=1, output_type="np").images[0]
|
||||
|
||||
@@ -1302,17 +1305,17 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase):
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionPipelineNightlyTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -1412,7 +1415,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, generator_device="cpu", seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
|
||||
@@ -35,6 +35,10 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
is_torch_compile,
|
||||
@@ -42,7 +46,7 @@ from diffusers.utils.testing_utils import (
|
||||
load_numpy,
|
||||
nightly,
|
||||
require_torch_2,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
run_test_in_subprocess,
|
||||
skip_mps,
|
||||
slow,
|
||||
@@ -400,17 +404,17 @@ class StableDiffusionImg2ImgPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -513,28 +517,28 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
assert number_of_steps == 2
|
||||
|
||||
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
|
||||
"CompVis/stable-diffusion-v1-4", safety_checker=None, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing(1)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
_ = pipe(**inputs)
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
# make sure that less than 2.2 GB is allocated
|
||||
assert mem_bytes < 2.2 * 10**9
|
||||
|
||||
def test_stable_diffusion_pipeline_with_model_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
|
||||
@@ -548,7 +552,7 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe(**inputs)
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
|
||||
# With model offloading
|
||||
|
||||
@@ -559,14 +563,14 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
torch_dtype=torch.float16,
|
||||
)
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
_ = pipe(**inputs)
|
||||
mem_bytes_offloaded = torch.cuda.max_memory_allocated()
|
||||
mem_bytes_offloaded = backend_max_memory_allocated(torch_device)
|
||||
|
||||
assert mem_bytes_offloaded < mem_bytes
|
||||
for module in pipe.text_encoder, pipe.unet, pipe.vae:
|
||||
@@ -663,17 +667,17 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
|
||||
@@ -37,6 +37,10 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
is_torch_compile,
|
||||
@@ -44,7 +48,7 @@ from diffusers.utils.testing_utils import (
|
||||
load_numpy,
|
||||
nightly,
|
||||
require_torch_2,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
run_test_in_subprocess,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -602,7 +606,7 @@ class StableDiffusionSimpleInpaintPipelineFastTests(StableDiffusionInpaintPipeli
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
@@ -610,7 +614,7 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -704,21 +708,21 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
assert np.abs(expected_slice - image_slice).max() < 6e-3
|
||||
|
||||
def test_stable_diffusion_inpaint_with_sequential_cpu_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipe = StableDiffusionInpaintPipeline.from_pretrained(
|
||||
"botp/stable-diffusion-v1-5-inpainting", safety_checker=None, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing(1)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
_ = pipe(**inputs)
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
# make sure that less than 2.2 GB is allocated
|
||||
assert mem_bytes < 2.2 * 10**9
|
||||
|
||||
@@ -793,7 +797,7 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
@@ -801,7 +805,7 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -907,9 +911,9 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te
|
||||
assert np.abs(expected_slice - image_slice).max() < 6e-3
|
||||
|
||||
def test_stable_diffusion_inpaint_with_sequential_cpu_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
vae = AsymmetricAutoencoderKL.from_pretrained(
|
||||
"cross-attention/asymmetric-autoencoder-kl-x-1-5", torch_dtype=torch.float16
|
||||
@@ -920,12 +924,12 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te
|
||||
pipe.vae = vae
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing(1)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
_ = pipe(**inputs)
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
# make sure that less than 2.45 GB is allocated
|
||||
assert mem_bytes < 2.45 * 10**9
|
||||
|
||||
@@ -1009,7 +1013,7 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te
|
||||
pipe = StableDiffusionInpaintPipeline.from_single_file(filename, torch_dtype=torch.float16)
|
||||
pipe.vae = vae
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.to("cuda")
|
||||
pipe.to(torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
inputs["num_inference_steps"] = 1
|
||||
@@ -1019,17 +1023,17 @@ class StableDiffusionInpaintPipelineAsymmetricAutoencoderKLSlowTests(unittest.Te
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionInpaintPipelineNightlyTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
|
||||
@@ -33,10 +33,14 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -266,17 +270,17 @@ class StableDiffusionInstructPix2PixPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, seed=0):
|
||||
generator = torch.manual_seed(seed)
|
||||
@@ -384,21 +388,21 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase):
|
||||
assert number_of_steps == 3
|
||||
|
||||
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
|
||||
"timbrooks/instruct-pix2pix", safety_checker=None, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing(1)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs()
|
||||
_ = pipe(**inputs)
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
# make sure that less than 2.2 GB is allocated
|
||||
assert mem_bytes < 2.2 * 10**9
|
||||
|
||||
|
||||
@@ -34,12 +34,13 @@ from diffusers import (
|
||||
from diffusers.utils.testing_utils import (
|
||||
CaptureLogger,
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
load_numpy,
|
||||
nightly,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -330,9 +331,8 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
_generator_device = "cpu" if not generator_device.startswith("cuda") else "cuda"
|
||||
if not str(device).startswith("mps"):
|
||||
generator = torch.Generator(device=_generator_device).manual_seed(seed)
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
else:
|
||||
generator = torch.manual_seed(seed)
|
||||
|
||||
@@ -361,9 +361,9 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
|
||||
expected_slice = np.array([0.49493, 0.47896, 0.40798, 0.54214, 0.53212, 0.48202, 0.47656, 0.46329, 0.48506])
|
||||
assert np.abs(image_slice - expected_slice).max() < 7e-3
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_stable_diffusion_attention_slicing(self):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"stabilityai/stable-diffusion-2-base", torch_dtype=torch.float16
|
||||
)
|
||||
@@ -376,8 +376,8 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
image_sliced = pipe(**inputs).images
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
# make sure that less than 3.3 GB is allocated
|
||||
assert mem_bytes < 3.3 * 10**9
|
||||
|
||||
@@ -388,7 +388,7 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
|
||||
image = pipe(**inputs).images
|
||||
|
||||
# make sure that more than 3.3 GB is allocated
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
assert mem_bytes > 3.3 * 10**9
|
||||
max_diff = numpy_cosine_similarity_distance(image.flatten(), image_sliced.flatten())
|
||||
assert max_diff < 5e-3
|
||||
|
||||
@@ -37,6 +37,7 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
@@ -44,7 +45,7 @@ from diffusers.utils.testing_utils import (
|
||||
nightly,
|
||||
require_accelerate_version_greater,
|
||||
require_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -378,17 +379,17 @@ class StableDiffusionDepth2ImgPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=device).manual_seed(seed)
|
||||
@@ -425,17 +426,17 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=device).manual_seed(seed)
|
||||
|
||||
@@ -33,12 +33,13 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
nightly,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
@@ -299,18 +300,18 @@ class StableDiffusionDiffEditPipelineFastTests(
|
||||
return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict)
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@nightly
|
||||
class StableDiffusionDiffEditPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@@ -331,7 +332,7 @@ class StableDiffusionDiffEditPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe.scheduler.clip_sample = True
|
||||
|
||||
pipe.inverse_scheduler = DDIMInverseScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
source_prompt = "a bowl of fruit"
|
||||
@@ -377,17 +378,17 @@ class StableDiffusionDiffEditPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionDiffEditPipelineNightlyTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
|
||||
@@ -24,11 +24,14 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
|
||||
|
||||
from diffusers import AutoencoderKL, PNDMScheduler, StableDiffusionInpaintPipeline, UNet2DConditionModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
load_numpy,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -161,19 +164,19 @@ class StableDiffusion2InpaintPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_stable_diffusion_inpaint_pipeline(self):
|
||||
init_image = load_image(
|
||||
@@ -248,9 +251,9 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):
|
||||
assert np.abs(expected_image - image).max() < 5e-1
|
||||
|
||||
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
init_image = load_image(
|
||||
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
|
||||
@@ -270,7 +273,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing(1)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
|
||||
|
||||
|
||||
@@ -31,11 +31,12 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.schedulers import KarrasDiffusionSchedulers
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
load_numpy,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -284,29 +285,29 @@ class StableDiffusionLatentUpscalePipelineFastTests(
|
||||
pass
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
class StableDiffusionLatentUpscalePipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_latent_upscaler_fp16(self):
|
||||
generator = torch.manual_seed(33)
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
|
||||
pipe.to("cuda")
|
||||
pipe.to(torch_device)
|
||||
|
||||
upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained(
|
||||
"stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16
|
||||
)
|
||||
upscaler.to("cuda")
|
||||
upscaler.to(torch_device)
|
||||
|
||||
prompt = "a photo of an astronaut high resolution, unreal engine, ultra realistic"
|
||||
|
||||
@@ -332,7 +333,7 @@ class StableDiffusionLatentUpscalePipelineIntegrationTests(unittest.TestCase):
|
||||
upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained(
|
||||
"stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16
|
||||
)
|
||||
upscaler.to("cuda")
|
||||
upscaler.to(torch_device)
|
||||
|
||||
prompt = "the temple of fire by Ross Tran and Gerardo Dottori, oil on canvas"
|
||||
|
||||
|
||||
@@ -25,12 +25,16 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
|
||||
|
||||
from diffusers import AutoencoderKL, DDIMScheduler, DDPMScheduler, StableDiffusionUpscalePipeline, UNet2DConditionModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
load_numpy,
|
||||
require_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -44,13 +48,13 @@ class StableDiffusionUpscalePipelineFastTests(unittest.TestCase):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@property
|
||||
def dummy_image(self):
|
||||
@@ -381,19 +385,19 @@ class StableDiffusionUpscalePipelineFastTests(unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_stable_diffusion_upscale_pipeline(self):
|
||||
image = load_image(
|
||||
@@ -459,9 +463,9 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):
|
||||
assert np.abs(expected_image - image).max() < 5e-1
|
||||
|
||||
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
image = load_image(
|
||||
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
|
||||
@@ -475,7 +479,7 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing(1)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
prompt = "a cat sitting on a park bench"
|
||||
|
||||
@@ -488,6 +492,6 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):
|
||||
output_type="np",
|
||||
)
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
# make sure that less than 2.9 GB is allocated
|
||||
assert mem_bytes < 2.9 * 10**9
|
||||
|
||||
@@ -31,11 +31,15 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
load_numpy,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -49,13 +53,13 @@ class StableDiffusion2VPredictionPipelineFastTests(unittest.TestCase):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@property
|
||||
def dummy_cond_unet(self):
|
||||
@@ -258,19 +262,19 @@ class StableDiffusion2VPredictionPipelineFastTests(unittest.TestCase):
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_stable_diffusion_v_pred_default(self):
|
||||
sd_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2")
|
||||
@@ -357,7 +361,7 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
||||
|
||||
def test_stable_diffusion_attention_slicing_v_pred(self):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
model_id = "stabilityai/stable-diffusion-2"
|
||||
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
|
||||
pipe.to(torch_device)
|
||||
@@ -373,8 +377,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
image_chunked = output_chunked.images
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
# make sure that less than 5.5 GB is allocated
|
||||
assert mem_bytes < 5.5 * 10**9
|
||||
|
||||
@@ -385,7 +389,7 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
|
||||
image = output.images
|
||||
|
||||
# make sure that more than 3.0 GB is allocated
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
assert mem_bytes > 3 * 10**9
|
||||
max_diff = numpy_cosine_similarity_distance(image.flatten(), image_chunked.flatten())
|
||||
assert max_diff < 1e-3
|
||||
@@ -421,7 +425,7 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
|
||||
pipe.scheduler = DDIMScheduler.from_config(
|
||||
pipe.scheduler.config, timestep_spacing="trailing", rescale_betas_zero_snr=True
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
prompt = "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k"
|
||||
@@ -466,7 +470,7 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
|
||||
|
||||
pipe = StableDiffusionPipeline.from_single_file(filename, torch_dtype=torch.float16)
|
||||
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
image_out = pipe("test", num_inference_steps=1, output_type="np").images[0]
|
||||
|
||||
@@ -530,20 +534,20 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
|
||||
assert 2 * low_cpu_mem_usage_time < normal_load_time
|
||||
|
||||
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading_v_pred(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipeline_id = "stabilityai/stable-diffusion-2"
|
||||
prompt = "Andromeda galaxy in a bottle"
|
||||
|
||||
pipeline = StableDiffusionPipeline.from_pretrained(pipeline_id, torch_dtype=torch.float16)
|
||||
pipeline.enable_attention_slicing(1)
|
||||
pipeline.enable_sequential_cpu_offload()
|
||||
pipeline.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
generator = torch.manual_seed(0)
|
||||
_ = pipeline(prompt, generator=generator, num_inference_steps=5)
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
# make sure that less than 2.8 GB is allocated
|
||||
assert mem_bytes < 2.8 * 10**9
|
||||
|
||||
@@ -8,6 +8,7 @@ from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModelWithProject
|
||||
|
||||
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transformer2DModel, StableDiffusion3Pipeline
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_big_gpu_with_torch_cuda,
|
||||
slow,
|
||||
@@ -240,12 +241,12 @@ class StableDiffusion3PipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, seed=0):
|
||||
if str(device).startswith("mps"):
|
||||
@@ -263,7 +264,7 @@ class StableDiffusion3PipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_sd3_inference(self):
|
||||
pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.utils import load_image
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
floats_tensor,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_big_gpu_with_torch_cuda,
|
||||
@@ -174,12 +175,12 @@ class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, seed=0):
|
||||
init_image = load_image(
|
||||
@@ -202,7 +203,7 @@ class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase):
|
||||
|
||||
def test_sd3_img2img_inference(self):
|
||||
pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device)
|
||||
|
||||
|
||||
@@ -35,12 +35,13 @@ from diffusers import (
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
load_numpy,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -604,17 +605,17 @@ class StableDiffusionMultiAdapterPipelineFastTests(AdapterTests, PipelineTesterM
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionAdapterPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_stable_diffusion_adapter_depth_sd_v15(self):
|
||||
adapter_model = "TencentARC/t2iadapter_depth_sd15v2"
|
||||
|
||||
@@ -30,13 +30,17 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
)
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
backend_max_memory_allocated,
|
||||
backend_reset_max_memory_allocated,
|
||||
backend_reset_peak_memory_stats,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
load_numpy,
|
||||
nightly,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -164,17 +168,17 @@ class StableDiffusionImageVariationPipelineFastTests(
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
@@ -258,37 +262,37 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
|
||||
assert number_of_steps == inputs["num_inference_steps"]
|
||||
|
||||
def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
backend_empty_cache(torch_device)
|
||||
backend_reset_max_memory_allocated(torch_device)
|
||||
backend_reset_peak_memory_stats(torch_device)
|
||||
|
||||
pipe = StableDiffusionImageVariationPipeline.from_pretrained(
|
||||
"lambdalabs/sd-image-variations-diffusers", safety_checker=None, torch_dtype=torch.float16
|
||||
)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
pipe.enable_attention_slicing(1)
|
||||
pipe.enable_sequential_cpu_offload()
|
||||
pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
|
||||
inputs = self.get_inputs(torch_device, dtype=torch.float16)
|
||||
_ = pipe(**inputs)
|
||||
|
||||
mem_bytes = torch.cuda.max_memory_allocated()
|
||||
mem_bytes = backend_max_memory_allocated(torch_device)
|
||||
# make sure that less than 2.6 GB is allocated
|
||||
assert mem_bytes < 2.6 * 10**9
|
||||
|
||||
|
||||
@nightly
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionImageVariationPipelineNightlyTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
|
||||
generator = torch.Generator(device=generator_device).manual_seed(seed)
|
||||
|
||||
@@ -38,7 +38,7 @@ from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
load_image,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -265,7 +265,7 @@ class StableDiffusionXLPipelineFastTests(
|
||||
def test_inference_batch_single_identical(self):
|
||||
super().test_inference_batch_single_identical(expected_max_diff=3e-3)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_stable_diffusion_xl_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -274,12 +274,12 @@ class StableDiffusionXLPipelineFastTests(
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLPipeline(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLPipeline(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
@@ -42,7 +42,7 @@ from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
load_image,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -293,7 +293,7 @@ class StableDiffusionXLImg2ImgPipelineFastTests(
|
||||
|
||||
assert np.allclose(image_slice, expected_slice, atol=1e-4, rtol=1e-4)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_stable_diffusion_xl_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -302,12 +302,12 @@ class StableDiffusionXLImg2ImgPipelineFastTests(
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLImg2ImgPipeline(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLImg2ImgPipeline(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
@@ -596,7 +596,7 @@ class StableDiffusionXLImg2ImgRefinerOnlyPipelineFastTests(
|
||||
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_stable_diffusion_xl_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -605,12 +605,12 @@ class StableDiffusionXLImg2ImgRefinerOnlyPipelineFastTests(
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLImg2ImgPipeline(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLImg2ImgPipeline(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
@@ -41,7 +41,13 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
UniPCMultistepScheduler,
|
||||
)
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, require_torch_gpu, slow, torch_device
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
from ..pipeline_params import (
|
||||
TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS,
|
||||
@@ -305,7 +311,48 @@ class StableDiffusionXLInpaintPipelineFastTests(
|
||||
def test_save_load_optional_components(self):
|
||||
pass
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_stable_diffusion_xl_inpaint_negative_prompt_embeds(self):
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLInpaintPipeline(**components)
|
||||
sd_pipe = sd_pipe.to(torch_device)
|
||||
sd_pipe = sd_pipe.to(torch_device)
|
||||
sd_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
# forward without prompt embeds
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
negative_prompt = 3 * ["this is a negative prompt"]
|
||||
inputs["negative_prompt"] = negative_prompt
|
||||
inputs["prompt"] = 3 * [inputs["prompt"]]
|
||||
|
||||
output = sd_pipe(**inputs)
|
||||
image_slice_1 = output.images[0, -3:, -3:, -1]
|
||||
|
||||
# forward with prompt embeds
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
negative_prompt = 3 * ["this is a negative prompt"]
|
||||
prompt = 3 * [inputs.pop("prompt")]
|
||||
|
||||
(
|
||||
prompt_embeds,
|
||||
negative_prompt_embeds,
|
||||
pooled_prompt_embeds,
|
||||
negative_pooled_prompt_embeds,
|
||||
) = sd_pipe.encode_prompt(prompt, negative_prompt=negative_prompt)
|
||||
|
||||
output = sd_pipe(
|
||||
**inputs,
|
||||
prompt_embeds=prompt_embeds,
|
||||
negative_prompt_embeds=negative_prompt_embeds,
|
||||
pooled_prompt_embeds=pooled_prompt_embeds,
|
||||
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
||||
)
|
||||
image_slice_2 = output.images[0, -3:, -3:, -1]
|
||||
|
||||
# make sure that it's equal
|
||||
assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4
|
||||
|
||||
@require_torch_accelerator
|
||||
def test_stable_diffusion_xl_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -314,12 +361,12 @@ class StableDiffusionXLInpaintPipelineFastTests(
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLInpaintPipeline(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = StableDiffusionXLInpaintPipeline(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
@@ -20,14 +20,20 @@ import numpy as np
|
||||
import torch
|
||||
|
||||
from diffusers import StableDiffusionXLKDiffusionPipeline
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
||||
|
||||
enable_full_determinism()
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableDiffusionXLKPipelineIntegrationTests(unittest.TestCase):
|
||||
dtype = torch.float16
|
||||
|
||||
@@ -35,13 +41,13 @@ class StableDiffusionXLKPipelineIntegrationTests(unittest.TestCase):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_stable_diffusion_xl(self):
|
||||
sd_pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained(
|
||||
|
||||
@@ -22,12 +22,13 @@ from diffusers.utils import load_image, logging
|
||||
from diffusers.utils.import_utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
CaptureLogger,
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_accelerate_version_greater,
|
||||
require_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
@@ -515,19 +516,19 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class StableVideoDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_sd_video(self):
|
||||
pipe = StableVideoDiffusionPipeline.from_pretrained(
|
||||
@@ -535,7 +536,7 @@ class StableVideoDiffusionPipelineSlowTests(unittest.TestCase):
|
||||
variant="fp16",
|
||||
torch_dtype=torch.float16,
|
||||
)
|
||||
pipe.enable_model_cpu_offload()
|
||||
pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
image = load_image(
|
||||
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"
|
||||
|
||||
@@ -1383,11 +1383,11 @@ class PipelineFastTests(unittest.TestCase):
|
||||
feature_extractor=self.dummy_extractor,
|
||||
)
|
||||
|
||||
sd.enable_model_cpu_offload()
|
||||
sd.enable_model_cpu_offload(device=torch_device)
|
||||
|
||||
logger = logging.get_logger("diffusers.pipelines.pipeline_utils")
|
||||
with CaptureLogger(logger) as cap_logger:
|
||||
sd.to("cuda")
|
||||
sd.to(torch_device)
|
||||
|
||||
assert "It is strongly recommended against doing so" in str(cap_logger)
|
||||
|
||||
|
||||
@@ -23,10 +23,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
|
||||
from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoSDPipeline, UNet3DConditionModel
|
||||
from diffusers.utils import is_xformers_available
|
||||
from diffusers.utils.testing_utils import (
|
||||
backend_empty_cache,
|
||||
enable_full_determinism,
|
||||
load_numpy,
|
||||
numpy_cosine_similarity_distance,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
skip_mps,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -184,19 +185,19 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, SDFunctionTesterMixin,
|
||||
|
||||
@slow
|
||||
@skip_mps
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class TextToVideoSDPipelineSlowTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# clean up the VRAM before each test
|
||||
super().setUp()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def tearDown(self):
|
||||
# clean up the VRAM after each test
|
||||
super().tearDown()
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_two_step_model(self):
|
||||
expected_video = load_numpy(
|
||||
|
||||
@@ -27,6 +27,7 @@ from diffusers.utils.testing_utils import (
|
||||
load_image,
|
||||
nightly,
|
||||
require_torch_2,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
run_test_in_subprocess,
|
||||
torch_device,
|
||||
@@ -501,20 +502,19 @@ class UniDiffuserPipelineFastTests(
|
||||
def test_inference_batch_single_identical(self):
|
||||
super().test_inference_batch_single_identical(expected_max_diff=2e-4)
|
||||
|
||||
@require_torch_gpu
|
||||
def test_unidiffuser_default_joint_v1_cuda_fp16(self):
|
||||
device = "cuda"
|
||||
@require_torch_accelerator
|
||||
def test_unidiffuser_default_joint_v1_fp16(self):
|
||||
unidiffuser_pipe = UniDiffuserPipeline.from_pretrained(
|
||||
"hf-internal-testing/unidiffuser-test-v1", torch_dtype=torch.float16
|
||||
)
|
||||
unidiffuser_pipe = unidiffuser_pipe.to(device)
|
||||
unidiffuser_pipe = unidiffuser_pipe.to(torch_device)
|
||||
unidiffuser_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
# Set mode to 'joint'
|
||||
unidiffuser_pipe.set_joint_mode()
|
||||
assert unidiffuser_pipe.mode == "joint"
|
||||
|
||||
inputs = self.get_dummy_inputs_with_latents(device)
|
||||
inputs = self.get_dummy_inputs_with_latents(torch_device)
|
||||
# Delete prompt and image for joint inference.
|
||||
del inputs["prompt"]
|
||||
del inputs["image"]
|
||||
@@ -531,20 +531,19 @@ class UniDiffuserPipelineFastTests(
|
||||
expected_text_prefix = '" This This'
|
||||
assert text[0][: len(expected_text_prefix)] == expected_text_prefix
|
||||
|
||||
@require_torch_gpu
|
||||
def test_unidiffuser_default_text2img_v1_cuda_fp16(self):
|
||||
device = "cuda"
|
||||
@require_torch_accelerator
|
||||
def test_unidiffuser_default_text2img_v1_fp16(self):
|
||||
unidiffuser_pipe = UniDiffuserPipeline.from_pretrained(
|
||||
"hf-internal-testing/unidiffuser-test-v1", torch_dtype=torch.float16
|
||||
)
|
||||
unidiffuser_pipe = unidiffuser_pipe.to(device)
|
||||
unidiffuser_pipe = unidiffuser_pipe.to(torch_device)
|
||||
unidiffuser_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
# Set mode to 'text2img'
|
||||
unidiffuser_pipe.set_text_to_image_mode()
|
||||
assert unidiffuser_pipe.mode == "text2img"
|
||||
|
||||
inputs = self.get_dummy_inputs_with_latents(device)
|
||||
inputs = self.get_dummy_inputs_with_latents(torch_device)
|
||||
# Delete prompt and image for joint inference.
|
||||
del inputs["image"]
|
||||
inputs["data_type"] = 1
|
||||
@@ -556,20 +555,19 @@ class UniDiffuserPipelineFastTests(
|
||||
expected_img_slice = np.array([0.5054, 0.5498, 0.5854, 0.3052, 0.4458, 0.6489, 0.5122, 0.4810, 0.6138])
|
||||
assert np.abs(image_slice.flatten() - expected_img_slice).max() < 1e-3
|
||||
|
||||
@require_torch_gpu
|
||||
def test_unidiffuser_default_img2text_v1_cuda_fp16(self):
|
||||
device = "cuda"
|
||||
@require_torch_accelerator
|
||||
def test_unidiffuser_default_img2text_v1_fp16(self):
|
||||
unidiffuser_pipe = UniDiffuserPipeline.from_pretrained(
|
||||
"hf-internal-testing/unidiffuser-test-v1", torch_dtype=torch.float16
|
||||
)
|
||||
unidiffuser_pipe = unidiffuser_pipe.to(device)
|
||||
unidiffuser_pipe = unidiffuser_pipe.to(torch_device)
|
||||
unidiffuser_pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
# Set mode to 'img2text'
|
||||
unidiffuser_pipe.set_image_to_text_mode()
|
||||
assert unidiffuser_pipe.mode == "img2text"
|
||||
|
||||
inputs = self.get_dummy_inputs_with_latents(device)
|
||||
inputs = self.get_dummy_inputs_with_latents(torch_device)
|
||||
# Delete prompt and image for joint inference.
|
||||
del inputs["prompt"]
|
||||
inputs["data_type"] = 1
|
||||
|
||||
@@ -21,7 +21,7 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
|
||||
|
||||
from diffusers import DDPMWuerstchenScheduler, WuerstchenCombinedPipeline
|
||||
from diffusers.pipelines.wuerstchen import PaellaVQModel, WuerstchenDiffNeXt, WuerstchenPrior
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device
|
||||
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device
|
||||
|
||||
from ..test_pipelines_common import PipelineTesterMixin
|
||||
|
||||
@@ -198,7 +198,7 @@ class WuerstchenCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestCase
|
||||
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
|
||||
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_offloads(self):
|
||||
pipes = []
|
||||
components = self.get_dummy_components()
|
||||
@@ -207,12 +207,12 @@ class WuerstchenCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestCase
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_sequential_cpu_offload()
|
||||
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
components = self.get_dummy_components()
|
||||
sd_pipe = self.pipeline_class(**components)
|
||||
sd_pipe.enable_model_cpu_offload()
|
||||
sd_pipe.enable_model_cpu_offload(device=torch_device)
|
||||
pipes.append(sd_pipe)
|
||||
|
||||
image_slices = []
|
||||
|
||||
Reference in New Issue
Block a user