1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

[tests] make tests device-agnostic (part 3) (#10437)

* initial comit

* fix empty cache

* fix one more

* fix style

* update device functions

* update

* update

* Update src/diffusers/utils/testing_utils.py

Co-authored-by: hlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py

Co-authored-by: hlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py

Co-authored-by: hlky <hlky@hlky.ac>

* Update tests/pipelines/controlnet/test_controlnet.py

Co-authored-by: hlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py

Co-authored-by: hlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py

Co-authored-by: hlky <hlky@hlky.ac>

* Update tests/pipelines/controlnet/test_controlnet.py

Co-authored-by: hlky <hlky@hlky.ac>

* with gc.collect

* update

* make style

* check_torch_dependencies

* add mps empty cache

* bug fix

* Apply suggestions from code review

---------

Co-authored-by: hlky <hlky@hlky.ac>
This commit is contained in:
Fanli Lin
2025-01-21 20:15:45 +08:00
committed by GitHub
parent 158a5a87fb
commit ec37e20972
26 changed files with 275 additions and 170 deletions

View File

@@ -86,7 +86,12 @@ if is_torch_available():
) from e
logger.info(f"torch_device overrode to {torch_device}")
else:
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
if torch.cuda.is_available():
torch_device = "cuda"
elif torch.xpu.is_available():
torch_device = "xpu"
else:
torch_device = "cpu"
is_torch_higher_equal_than_1_12 = version.parse(
version.parse(torch.__version__).base_version
) >= version.parse("1.12")
@@ -1067,12 +1072,51 @@ def _is_torch_fp64_available(device):
# Guard these lookups for when Torch is not used - alternative accelerator support is for PyTorch
if is_torch_available():
# Behaviour flags
BACKEND_SUPPORTS_TRAINING = {"cuda": True, "cpu": True, "mps": False, "default": True}
BACKEND_SUPPORTS_TRAINING = {"cuda": True, "xpu": True, "cpu": True, "mps": False, "default": True}
# Function definitions
BACKEND_EMPTY_CACHE = {"cuda": torch.cuda.empty_cache, "cpu": None, "mps": None, "default": None}
BACKEND_DEVICE_COUNT = {"cuda": torch.cuda.device_count, "cpu": lambda: 0, "mps": lambda: 0, "default": 0}
BACKEND_MANUAL_SEED = {"cuda": torch.cuda.manual_seed, "cpu": torch.manual_seed, "default": torch.manual_seed}
BACKEND_EMPTY_CACHE = {
"cuda": torch.cuda.empty_cache,
"xpu": torch.xpu.empty_cache,
"cpu": None,
"mps": torch.mps.empty_cache,
"default": None,
}
BACKEND_DEVICE_COUNT = {
"cuda": torch.cuda.device_count,
"xpu": torch.xpu.device_count,
"cpu": lambda: 0,
"mps": lambda: 0,
"default": 0,
}
BACKEND_MANUAL_SEED = {
"cuda": torch.cuda.manual_seed,
"xpu": torch.xpu.manual_seed,
"cpu": torch.manual_seed,
"mps": torch.mps.manual_seed,
"default": torch.manual_seed,
}
BACKEND_RESET_PEAK_MEMORY_STATS = {
"cuda": torch.cuda.reset_peak_memory_stats,
"xpu": getattr(torch.xpu, "reset_peak_memory_stats", None),
"cpu": None,
"mps": None,
"default": None,
}
BACKEND_RESET_MAX_MEMORY_ALLOCATED = {
"cuda": torch.cuda.reset_max_memory_allocated,
"xpu": None,
"cpu": None,
"mps": None,
"default": None,
}
BACKEND_MAX_MEMORY_ALLOCATED = {
"cuda": torch.cuda.max_memory_allocated,
"xpu": getattr(torch.xpu, "max_memory_allocated", None),
"cpu": 0,
"mps": 0,
"default": 0,
}
# This dispatches a defined function according to the accelerator from the function definitions.
@@ -1103,6 +1147,18 @@ def backend_device_count(device: str):
return _device_agnostic_dispatch(device, BACKEND_DEVICE_COUNT)
def backend_reset_peak_memory_stats(device: str):
return _device_agnostic_dispatch(device, BACKEND_RESET_PEAK_MEMORY_STATS)
def backend_reset_max_memory_allocated(device: str):
return _device_agnostic_dispatch(device, BACKEND_RESET_MAX_MEMORY_ALLOCATED)
def backend_max_memory_allocated(device: str):
return _device_agnostic_dispatch(device, BACKEND_MAX_MEMORY_ALLOCATED)
# These are callables which return boolean behaviour flags and can be used to specify some
# device agnostic alternative where the feature is unsupported.
def backend_supports_training(device: str):
@@ -1159,3 +1215,6 @@ if is_torch_available():
update_mapping_from_spec(BACKEND_EMPTY_CACHE, "EMPTY_CACHE_FN")
update_mapping_from_spec(BACKEND_DEVICE_COUNT, "DEVICE_COUNT_FN")
update_mapping_from_spec(BACKEND_SUPPORTS_TRAINING, "SUPPORTS_TRAINING")
update_mapping_from_spec(BACKEND_RESET_PEAK_MEMORY_STATS, "RESET_PEAK_MEMORY_STATS_FN")
update_mapping_from_spec(BACKEND_RESET_MAX_MEMORY_ALLOCATED, "RESET_MAX_MEMORY_ALLOCATED_FN")
update_mapping_from_spec(BACKEND_MAX_MEMORY_ALLOCATED, "MAX_MEMORY_ALLOCATED_FN")

View File

@@ -57,8 +57,8 @@ from diffusers.utils.testing_utils import (
get_python_version,
is_torch_compile,
require_torch_2,
require_torch_accelerator,
require_torch_accelerator_with_training,
require_torch_gpu,
require_torch_multi_gpu,
run_test_in_subprocess,
torch_all_close,
@@ -543,7 +543,7 @@ class ModelTesterMixin:
assert torch.allclose(output, output_3, atol=self.base_precision)
assert torch.allclose(output_2, output_3, atol=self.base_precision)
@require_torch_gpu
@require_torch_accelerator
def test_set_attn_processor_for_determinism(self):
if self.uses_custom_attn_processor:
return
@@ -1068,7 +1068,7 @@ class ModelTesterMixin:
self.assertTrue(f"Adapter name {wrong_name} not found in the model." in str(err_context.exception))
@require_torch_gpu
@require_torch_accelerator
def test_cpu_offload(self):
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**config).eval()
@@ -1098,7 +1098,7 @@ class ModelTesterMixin:
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
@require_torch_gpu
@require_torch_accelerator
def test_disk_offload_without_safetensors(self):
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**config).eval()
@@ -1132,7 +1132,7 @@ class ModelTesterMixin:
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
@require_torch_gpu
@require_torch_accelerator
def test_disk_offload_with_safetensors(self):
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**config).eval()
@@ -1191,7 +1191,7 @@ class ModelTesterMixin:
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
@require_torch_gpu
@require_torch_accelerator
def test_sharded_checkpoints(self):
torch.manual_seed(0)
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
@@ -1223,7 +1223,7 @@ class ModelTesterMixin:
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
@require_torch_gpu
@require_torch_accelerator
def test_sharded_checkpoints_with_variant(self):
torch.manual_seed(0)
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
@@ -1261,7 +1261,7 @@ class ModelTesterMixin:
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
@require_torch_gpu
@require_torch_accelerator
def test_sharded_checkpoints_device_map(self):
config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**config).eval()

View File

@@ -27,7 +27,7 @@ from diffusers.utils.testing_utils import (
enable_full_determinism,
numpy_cosine_similarity_distance,
require_hf_hub_version_greater,
require_torch_gpu,
require_torch_accelerator,
require_transformers_version_greater,
slow,
torch_device,
@@ -332,7 +332,7 @@ class AllegroPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class AllegroPipelineIntegrationTests(unittest.TestCase):
prompt = "A painting of a squirrel eating a burger."
@@ -350,7 +350,7 @@ class AllegroPipelineIntegrationTests(unittest.TestCase):
generator = torch.Generator("cpu").manual_seed(0)
pipe = AllegroPipeline.from_pretrained("rhymes-ai/Allegro", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
prompt = self.prompt
videos = pipe(

View File

@@ -20,9 +20,10 @@ from diffusers import (
from diffusers.models.attention import FreeNoiseTransformerBlock
from diffusers.utils import is_xformers_available, logging
from diffusers.utils.testing_utils import (
backend_empty_cache,
numpy_cosine_similarity_distance,
require_accelerator,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -547,19 +548,19 @@ class AnimateDiffPipelineFastTests(
@slow
@require_torch_gpu
@require_torch_accelerator
class AnimateDiffPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_animatediff(self):
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
@@ -573,7 +574,7 @@ class AnimateDiffPipelineSlowTests(unittest.TestCase):
clip_sample=False,
)
pipe.enable_vae_slicing()
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
prompt = "night, b&w photo of old house, post apocalypse, forest, storm weather, wind, rocks, 8k uhd, dslr, soft lighting, high quality, film grain"

View File

@@ -24,7 +24,7 @@ from diffusers import AutoencoderKLCogVideoX, CogVideoXPipeline, CogVideoXTransf
from diffusers.utils.testing_utils import (
enable_full_determinism,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -321,7 +321,7 @@ class CogVideoXPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class CogVideoXPipelineIntegrationTests(unittest.TestCase):
prompt = "A painting of a squirrel eating a burger."
@@ -339,7 +339,7 @@ class CogVideoXPipelineIntegrationTests(unittest.TestCase):
generator = torch.Generator("cpu").manual_seed(0)
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
prompt = self.prompt
videos = pipe(

View File

@@ -24,9 +24,10 @@ from transformers import AutoTokenizer, T5EncoderModel
from diffusers import AutoencoderKLCogVideoX, CogVideoXImageToVideoPipeline, CogVideoXTransformer3DModel, DDIMScheduler
from diffusers.utils import load_image
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -344,25 +345,25 @@ class CogVideoXImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestC
@slow
@require_torch_gpu
@require_torch_accelerator
class CogVideoXImageToVideoPipelineIntegrationTests(unittest.TestCase):
prompt = "A painting of a squirrel eating a burger."
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_cogvideox(self):
generator = torch.Generator("cpu").manual_seed(0)
pipe = CogVideoXImageToVideoPipeline.from_pretrained("THUDM/CogVideoX-5b-I2V", torch_dtype=torch.bfloat16)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
prompt = self.prompt
image = load_image(

View File

@@ -24,7 +24,7 @@ from diffusers import AutoencoderKL, CogVideoXDDIMScheduler, CogView3PlusPipelin
from diffusers.utils.testing_utils import (
enable_full_determinism,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -232,7 +232,7 @@ class CogView3PlusPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class CogView3PlusPipelineIntegrationTests(unittest.TestCase):
prompt = "A painting of a squirrel eating a burger."
@@ -250,7 +250,7 @@ class CogView3PlusPipelineIntegrationTests(unittest.TestCase):
generator = torch.Generator("cpu").manual_seed(0)
pipe = CogView3PlusPipeline.from_pretrained("THUDM/CogView3Plus-3b", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
prompt = self.prompt
images = pipe(

View File

@@ -34,13 +34,17 @@ from diffusers import (
from diffusers.pipelines.controlnet.pipeline_controlnet import MultiControlNetModel
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
get_python_version,
is_torch_compile,
load_image,
load_numpy,
require_torch_2,
require_torch_gpu,
require_torch_accelerator,
run_test_in_subprocess,
slow,
torch_device,
@@ -703,17 +707,17 @@ class StableDiffusionMultiControlNetOneModelPipelineFastTests(
@slow
@require_torch_gpu
@require_torch_accelerator
class ControlNetPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_canny(self):
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
@@ -721,7 +725,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -748,7 +752,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -775,7 +779,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -802,7 +806,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -829,7 +833,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -856,7 +860,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -883,7 +887,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(5)
@@ -910,7 +914,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(5)
@@ -932,9 +936,9 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
assert np.abs(expected_image - image).max() < 8e-2
def test_sequential_cpu_offloading(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-seg")
@@ -943,7 +947,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
)
pipe.set_progress_bar_config(disable=None)
pipe.enable_attention_slicing()
pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload(device=torch_device)
prompt = "house"
image = load_image(
@@ -957,7 +961,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
output_type="np",
)
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
# make sure that less than 7 GB is allocated
assert mem_bytes < 4 * 10**9
@@ -967,7 +971,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -1000,7 +1004,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -1041,7 +1045,7 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -1068,17 +1072,17 @@ class ControlNetPipelineSlowTests(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionMultiControlNetPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_pose_and_canny(self):
controlnet_canny = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
@@ -1089,7 +1093,7 @@ class StableDiffusionMultiControlNetPipelineSlowTests(unittest.TestCase):
safety_checker=None,
controlnet=[controlnet_pose, controlnet_canny],
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)

View File

@@ -39,7 +39,7 @@ from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
load_numpy,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -393,7 +393,7 @@ class StableDiffusionMultiControlNetPipelineFastTests(
@slow
@require_torch_gpu
@require_torch_accelerator
class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
@@ -411,7 +411,7 @@ class ControlNetImg2ImgPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)

View File

@@ -40,7 +40,7 @@ from diffusers.utils.testing_utils import (
floats_tensor,
load_numpy,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -445,7 +445,7 @@ class MultiControlNetInpaintPipelineFastTests(
@slow
@require_torch_gpu
@require_torch_accelerator
class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
@@ -463,7 +463,7 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
"botp/stable-diffusion-v1-5-inpainting", safety_checker=None, controlnet=controlnet
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -509,7 +509,7 @@ class ControlNetInpaintPipelineSlowTests(unittest.TestCase):
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(33)

View File

@@ -35,9 +35,10 @@ from diffusers.models.unets.unet_2d_blocks import UNetMidBlock2D
from diffusers.pipelines.controlnet.pipeline_controlnet import MultiControlNetModel
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
load_image,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -212,7 +213,7 @@ class StableDiffusionXLControlNetPipelineFastTests(
def test_save_load_optional_components(self):
self._test_save_load_optional_components()
@require_torch_gpu
@require_torch_accelerator
def test_stable_diffusion_xl_offloads(self):
pipes = []
components = self.get_dummy_components()
@@ -893,17 +894,17 @@ class StableDiffusionXLMultiControlNetOneModelPipelineFastTests(
@slow
@require_torch_gpu
@require_torch_accelerator
class ControlNetSDXLPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_canny(self):
controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0")
@@ -911,7 +912,7 @@ class ControlNetSDXLPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
)
pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -934,7 +935,7 @@ class ControlNetSDXLPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet
)
pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)

View File

@@ -28,7 +28,12 @@ from diffusers import (
UNet2DConditionModel,
)
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
require_torch_accelerator,
torch_device,
)
from ..pipeline_params import (
IMAGE_TO_IMAGE_IMAGE_PARAMS,
@@ -241,7 +246,7 @@ class ControlNetPipelineSDXLImg2ImgFastTests(
def test_save_load_optional_components(self):
pass
@require_torch_gpu
@require_torch_accelerator
def test_stable_diffusion_xl_offloads(self):
pipes = []
components = self.get_dummy_components()
@@ -250,12 +255,12 @@ class ControlNetPipelineSDXLImg2ImgFastTests(
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []

View File

@@ -29,8 +29,9 @@ from diffusers import (
from diffusers.models import HunyuanDiT2DControlNetModel, HunyuanDiT2DMultiControlNetModel
from diffusers.utils import load_image
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -178,19 +179,19 @@ class HunyuanDiTControlNetPipelineFastTests(unittest.TestCase, PipelineTesterMix
@slow
@require_torch_gpu
@require_torch_accelerator
class HunyuanDiTControlNetPipelineSlowTests(unittest.TestCase):
pipeline_class = HunyuanDiTControlNetPipeline
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_canny(self):
controlnet = HunyuanDiT2DControlNetModel.from_pretrained(
@@ -199,7 +200,7 @@ class HunyuanDiTControlNetPipelineSlowTests(unittest.TestCase):
pipe = HunyuanDiTControlNetPipeline.from_pretrained(
"Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -238,7 +239,7 @@ class HunyuanDiTControlNetPipelineSlowTests(unittest.TestCase):
pipe = HunyuanDiTControlNetPipeline.from_pretrained(
"Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -277,7 +278,7 @@ class HunyuanDiTControlNetPipelineSlowTests(unittest.TestCase):
pipe = HunyuanDiTControlNetPipeline.from_pretrained(
"Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -318,7 +319,7 @@ class HunyuanDiTControlNetPipelineSlowTests(unittest.TestCase):
pipe = HunyuanDiTControlNetPipeline.from_pretrained(
"Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)

View File

@@ -34,13 +34,14 @@ from diffusers import (
)
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
is_torch_compile,
load_image,
load_numpy,
require_accelerator,
require_torch_2,
require_torch_gpu,
require_torch_accelerator,
run_test_in_subprocess,
slow,
torch_device,
@@ -92,7 +93,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
safety_checker=None,
torch_dtype=torch.float16,
)
pipe.to("cuda")
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.unet.to(memory_format=torch.channels_last)
@@ -334,12 +335,12 @@ class ControlNetXSPipelineFastTests(
@slow
@require_torch_gpu
@require_torch_accelerator
class ControlNetXSPipelineSlowTests(unittest.TestCase):
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_canny(self):
controlnet = ControlNetXSAdapter.from_pretrained(
@@ -348,7 +349,7 @@ class ControlNetXSPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetXSPipeline.from_pretrained(
"stabilityai/stable-diffusion-2-1-base", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -374,7 +375,7 @@ class ControlNetXSPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionControlNetXSPipeline.from_pretrained(
"stabilityai/stable-diffusion-2-1-base", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)

View File

@@ -31,7 +31,14 @@ from diffusers import (
UNet2DConditionModel,
)
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import enable_full_determinism, load_image, require_torch_gpu, slow, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
load_image,
require_torch_accelerator,
slow,
torch_device,
)
from diffusers.utils.torch_utils import randn_tensor
from ...models.autoencoders.vae import (
@@ -192,7 +199,7 @@ class StableDiffusionXLControlNetXSPipelineFastTests(
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=2e-3)
@require_torch_gpu
@require_torch_accelerator
# Copied from test_controlnet_sdxl.py
def test_stable_diffusion_xl_offloads(self):
pipes = []
@@ -202,12 +209,12 @@ class StableDiffusionXLControlNetXSPipelineFastTests(
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
@@ -369,12 +376,12 @@ class StableDiffusionXLControlNetXSPipelineFastTests(
@slow
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionXLControlNetXSPipelineSlowTests(unittest.TestCase):
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_canny(self):
controlnet = ControlNetXSAdapter.from_pretrained(
@@ -383,7 +390,7 @@ class StableDiffusionXLControlNetXSPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionXLControlNetXSPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -407,7 +414,7 @@ class StableDiffusionXLControlNetXSPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionXLControlNetXSPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)

View File

@@ -19,7 +19,7 @@ import numpy as np
import torch
from diffusers import DDIMPipeline, DDIMScheduler, UNet2DModel
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, slow, torch_device
from ..pipeline_params import UNCONDITIONAL_IMAGE_GENERATION_BATCH_PARAMS, UNCONDITIONAL_IMAGE_GENERATION_PARAMS
from ..test_pipelines_common import PipelineTesterMixin
@@ -99,7 +99,7 @@ class DDIMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class DDIMPipelineIntegrationTests(unittest.TestCase):
def test_inference_cifar10(self):
model_id = "google/ddpm-cifar10-32"

View File

@@ -19,7 +19,7 @@ import numpy as np
import torch
from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, slow, torch_device
enable_full_determinism()
@@ -88,7 +88,7 @@ class DDPMPipelineFastTests(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class DDPMPipelineIntegrationTests(unittest.TestCase):
def test_inference_cifar10(self):
model_id = "google/ddpm-cifar10-32"

View File

@@ -24,10 +24,13 @@ from diffusers import (
from diffusers.models.attention_processor import AttnAddedKVProcessor
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
load_numpy,
require_accelerator,
require_hf_hub_version_greater,
require_torch_gpu,
require_torch_accelerator,
require_transformers_version_greater,
skip_mps,
slow,
@@ -98,28 +101,28 @@ class IFPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin, unittest.T
@slow
@require_torch_gpu
@require_torch_accelerator
class IFPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_if_text_to_image(self):
pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
backend_reset_max_memory_allocated(torch_device)
backend_empty_cache(torch_device)
backend_reset_peak_memory_stats(torch_device)
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe(

View File

@@ -23,11 +23,14 @@ from diffusers import IFImg2ImgPipeline
from diffusers.models.attention_processor import AttnAddedKVProcessor
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
floats_tensor,
load_numpy,
require_accelerator,
require_hf_hub_version_greater,
require_torch_gpu,
require_torch_accelerator,
require_transformers_version_greater,
skip_mps,
slow,
@@ -109,19 +112,19 @@ class IFImg2ImgPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin, uni
@slow
@require_torch_gpu
@require_torch_accelerator
class IFImg2ImgPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_if_img2img(self):
pipe = IFImg2ImgPipeline.from_pretrained(
@@ -130,11 +133,11 @@ class IFImg2ImgPipelineSlowTests(unittest.TestCase):
torch_dtype=torch.float16,
)
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
backend_reset_max_memory_allocated(torch_device)
backend_empty_cache(torch_device)
backend_reset_peak_memory_stats(torch_device)
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
generator = torch.Generator(device="cpu").manual_seed(0)

View File

@@ -23,11 +23,15 @@ from diffusers import IFImg2ImgSuperResolutionPipeline
from diffusers.models.attention_processor import AttnAddedKVProcessor
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
floats_tensor,
load_numpy,
require_accelerator,
require_hf_hub_version_greater,
require_torch_gpu,
require_torch_accelerator,
require_transformers_version_greater,
skip_mps,
slow,
@@ -106,19 +110,19 @@ class IFImg2ImgSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipelineT
@slow
@require_torch_gpu
@require_torch_accelerator
class IFImg2ImgSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_if_img2img_superresolution(self):
pipe = IFImg2ImgSuperResolutionPipeline.from_pretrained(
@@ -127,11 +131,11 @@ class IFImg2ImgSuperResolutionPipelineSlowTests(unittest.TestCase):
torch_dtype=torch.float16,
)
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
backend_reset_max_memory_allocated(torch_device)
backend_empty_cache(torch_device)
backend_reset_peak_memory_stats(torch_device)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -151,7 +155,8 @@ class IFImg2ImgSuperResolutionPipelineSlowTests(unittest.TestCase):
assert image.shape == (256, 256, 3)
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 12 * 10**9
expected_image = load_numpy(

View File

@@ -23,11 +23,15 @@ from diffusers import IFInpaintingPipeline
from diffusers.models.attention_processor import AttnAddedKVProcessor
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
floats_tensor,
load_numpy,
require_accelerator,
require_hf_hub_version_greater,
require_torch_gpu,
require_torch_accelerator,
require_transformers_version_greater,
skip_mps,
slow,
@@ -106,30 +110,30 @@ class IFInpaintingPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin,
@slow
@require_torch_gpu
@require_torch_accelerator
class IFInpaintingPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_if_inpainting(self):
pipe = IFInpaintingPipeline.from_pretrained(
"DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16
)
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
mask_image = floats_tensor((1, 3, 64, 64), rng=random.Random(1)).to(torch_device)
@@ -145,7 +149,7 @@ class IFInpaintingPipelineSlowTests(unittest.TestCase):
)
image = output.images[0]
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 12 * 10**9
expected_image = load_numpy(

View File

@@ -23,11 +23,15 @@ from diffusers import IFInpaintingSuperResolutionPipeline
from diffusers.models.attention_processor import AttnAddedKVProcessor
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
floats_tensor,
load_numpy,
require_accelerator,
require_hf_hub_version_greater,
require_torch_gpu,
require_torch_accelerator,
require_transformers_version_greater,
skip_mps,
slow,
@@ -108,31 +112,31 @@ class IFInpaintingSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipeli
@slow
@require_torch_gpu
@require_torch_accelerator
class IFInpaintingSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_if_inpainting_superresolution(self):
pipe = IFInpaintingSuperResolutionPipeline.from_pretrained(
"DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16
)
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
# Super resolution test
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -154,7 +158,7 @@ class IFInpaintingSuperResolutionPipelineSlowTests(unittest.TestCase):
assert image.shape == (256, 256, 3)
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 12 * 10**9
expected_image = load_numpy(

View File

@@ -23,11 +23,15 @@ from diffusers import IFSuperResolutionPipeline
from diffusers.models.attention_processor import AttnAddedKVProcessor
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
floats_tensor,
load_numpy,
require_accelerator,
require_hf_hub_version_greater,
require_torch_gpu,
require_torch_accelerator,
require_transformers_version_greater,
skip_mps,
slow,
@@ -101,31 +105,31 @@ class IFSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMi
@slow
@require_torch_gpu
@require_torch_accelerator
class IFSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_if_superresolution(self):
pipe = IFSuperResolutionPipeline.from_pretrained(
"DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16
)
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
# Super resolution test
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
generator = torch.Generator(device="cpu").manual_seed(0)
@@ -141,7 +145,7 @@ class IFSuperResolutionPipelineSlowTests(unittest.TestCase):
assert image.shape == (256, 256, 3)
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 12 * 10**9
expected_image = load_numpy(

View File

@@ -30,7 +30,7 @@ from diffusers import (
from diffusers.utils.testing_utils import (
enable_full_determinism,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
@@ -299,7 +299,7 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class HunyuanDiTPipelineIntegrationTests(unittest.TestCase):
prompt = "一个宇航员在骑马"
@@ -319,7 +319,7 @@ class HunyuanDiTPipelineIntegrationTests(unittest.TestCase):
pipe = HunyuanDiTPipeline.from_pretrained(
"XCLiu/HunyuanDiT-0523", revision="refs/pr/2", torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
prompt = self.prompt
image = pipe(

View File

@@ -36,10 +36,11 @@ from diffusers import (
from diffusers.models.unets import I2VGenXLUNet
from diffusers.utils import is_xformers_available, load_image
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
floats_tensor,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
skip_mps,
slow,
torch_device,
@@ -228,23 +229,23 @@ class I2VGenXLPipelineFastTests(SDFunctionTesterMixin, PipelineTesterMixin, unit
@slow
@require_torch_gpu
@require_torch_accelerator
class I2VGenXLPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_i2vgen_xl(self):
pipe = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"

View File

@@ -66,6 +66,7 @@ from diffusers.utils import (
)
from diffusers.utils.testing_utils import (
CaptureLogger,
backend_empty_cache,
enable_full_determinism,
floats_tensor,
get_python_version,
@@ -78,7 +79,7 @@ from diffusers.utils.testing_utils import (
require_hf_hub_version_greater,
require_onnxruntime,
require_torch_2,
require_torch_gpu,
require_torch_accelerator,
require_transformers_version_greater,
run_test_in_subprocess,
slow,
@@ -1150,7 +1151,7 @@ class CustomPipelineTests(unittest.TestCase):
assert conf_1 == conf_2
@slow
@require_torch_gpu
@require_torch_accelerator
def test_download_from_git(self):
# Because adaptive_avg_pool2d_backward_cuda
# does not have a deterministic implementation.
@@ -1364,7 +1365,7 @@ class PipelineFastTests(unittest.TestCase):
assert image_img2img.shape == (1, 32, 32, 3)
assert image_text2img.shape == (1, 64, 64, 3)
@require_torch_gpu
@require_torch_accelerator
def test_pipe_false_offload_warn(self):
unet = self.dummy_cond_unet()
scheduler = PNDMScheduler(skip_prk_steps=True)
@@ -1898,19 +1899,19 @@ class PipelineFastTests(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class PipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_smart_download(self):
model_id = "hf-internal-testing/unet-pipeline-dummy"
@@ -2102,7 +2103,7 @@ class PipelineSlowTests(unittest.TestCase):
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.enable_attention_slicing()
compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder)
@@ -2129,19 +2130,19 @@ class PipelineSlowTests(unittest.TestCase):
@nightly
@require_torch_gpu
@require_torch_accelerator
class PipelineNightlyTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_ddpm_ddim_equality_batched(self):
seed = 0