From 7a587349943325e667866971a36996a56fcff143 Mon Sep 17 00:00:00 2001 From: Yao Matrix Date: Tue, 23 Sep 2025 21:01:45 -0700 Subject: [PATCH] xpu enabling for 4 cases (#12345) Signed-off-by: Yao, Matrix --- .../modular_pipelines/components_manager.py | 13 ++++++++++--- tests/lora/test_lora_layers_hunyuanvideo.py | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index f48a227e2e..ed847fa414 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -25,6 +25,7 @@ from ..utils import ( is_accelerate_available, logging, ) +from ..utils.torch_utils import get_device if is_accelerate_available(): @@ -161,7 +162,9 @@ class AutoOffloadStrategy: current_module_size = model.get_memory_footprint() - mem_on_device = torch.cuda.mem_get_info(execution_device.index)[0] + device_type = execution_device.type + device_module = getattr(torch, device_type, torch.cuda) + mem_on_device = device_module.mem_get_info(execution_device.index)[0] mem_on_device = mem_on_device - self.memory_reserve_margin if current_module_size < mem_on_device: return [] @@ -301,7 +304,7 @@ class ComponentsManager: cm.add("vae", vae_model, collection="sdxl") # Enable auto offloading - cm.enable_auto_cpu_offload(device="cuda") + cm.enable_auto_cpu_offload() # Retrieve components unet = cm.get_one(name="unet", collection="sdxl") @@ -490,6 +493,8 @@ class ComponentsManager: gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() + if torch.xpu.is_available(): + torch.xpu.empty_cache() # YiYi TODO: rename to search_components for now, may remove this method def search_components( @@ -678,7 +683,7 @@ class ComponentsManager: return get_return_dict(matches, return_dict_with_names) - def enable_auto_cpu_offload(self, device: Union[str, int, torch.device] = "cuda", memory_reserve_margin="3GB"): + def enable_auto_cpu_offload(self, device: Union[str, int, torch.device] = None, memory_reserve_margin="3GB"): """ Enable automatic CPU offloading for all components. @@ -704,6 +709,8 @@ class ComponentsManager: self.disable_auto_cpu_offload() offload_strategy = AutoOffloadStrategy(memory_reserve_margin=memory_reserve_margin) + if device is None: + device = get_device() device = torch.device(device) if device.index is None: device = torch.device(f"{device.type}:{0}") diff --git a/tests/lora/test_lora_layers_hunyuanvideo.py b/tests/lora/test_lora_layers_hunyuanvideo.py index 7ea0f1fcc9..cfd5d3146a 100644 --- a/tests/lora/test_lora_layers_hunyuanvideo.py +++ b/tests/lora/test_lora_layers_hunyuanvideo.py @@ -253,6 +253,7 @@ class HunyuanVideoLoRAIntegrationTests(unittest.TestCase): expected_slices = Expectations( { ("cuda", 7): np.array([0.1013, 0.1924, 0.0078, 0.1021, 0.1929, 0.0078, 0.1023, 0.1919, 0.7402, 0.104, 0.4482, 0.7354, 0.0925, 0.4382, 0.7275, 0.0815]), + ("xpu", 3): np.array([0.1013, 0.1924, 0.0078, 0.1021, 0.1929, 0.0078, 0.1023, 0.1919, 0.7402, 0.104, 0.4482, 0.7354, 0.0925, 0.4382, 0.7275, 0.0815]), } ) # fmt: on