enable semantic diffusion and stable diffusion panorama cases on XPU (#11459)

Signed-off-by: Yao Matrix <matrix.yao@intel.com>
2026-01-27 17:22:53 +03:00 · 2025-05-05 17:58:07 +08:00
parent ec3d58286d
commit a674914fd5
2 changed files with 26 additions and 20 deletions
--- a/tests/pipelines/semantic_stable_diffusion/test_semantic_diffusion.py
+++ b/tests/pipelines/semantic_stable_diffusion/test_semantic_diffusion.py
@@ -25,11 +25,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
 from diffusers import AutoencoderKL, DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel
 from diffusers.pipelines.semantic_stable_diffusion import SemanticStableDiffusionPipeline as StableDiffusionPipeline
 from diffusers.utils.testing_utils import (
+    backend_empty_cache,
    enable_full_determinism,
    floats_tensor,
    nightly,
-    require_accelerator,
-    require_torch_gpu,
+    require_torch_accelerator,
    torch_device,
 )

@@ -42,13 +42,13 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase):
        # clean up the VRAM before each test
        super().setUp()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def tearDown(self):
        # clean up the VRAM after each test
        super().tearDown()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    @property
    def dummy_image(self):
@@ -238,7 +238,7 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase):
        image = pipe("example prompt", num_inference_steps=2).images[0]
        assert image is not None

-    @require_accelerator
+    @require_torch_accelerator
    def test_semantic_diffusion_fp16(self):
        """Test that stable diffusion works with fp16"""
        unet = self.dummy_cond_unet
@@ -272,22 +272,21 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase):


@nightly
-@require_torch_gpu
+@require_torch_accelerator
 class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
    def setUp(self):
        # clean up the VRAM before each test
        super().setUp()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def tearDown(self):
        # clean up the VRAM after each test
        super().tearDown()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def test_positive_guidance(self):
-        torch_device = "cuda"
        pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        pipe = pipe.to(torch_device)
        pipe.set_progress_bar_config(disable=None)
@@ -370,7 +369,6 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_negative_guidance(self):
-        torch_device = "cuda"
        pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        pipe = pipe.to(torch_device)
        pipe.set_progress_bar_config(disable=None)
@@ -453,7 +451,6 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_multi_cond_guidance(self):
-        torch_device = "cuda"
        pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        pipe = pipe.to(torch_device)
        pipe.set_progress_bar_config(disable=None)
@@ -536,7 +533,6 @@ class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_guidance_fp16(self):
-        torch_device = "cuda"
        pipe = StableDiffusionPipeline.from_pretrained(
            "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
        )
--- a/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py
+++ b/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py
@@ -29,7 +29,17 @@ from diffusers import (
    StableDiffusionPanoramaPipeline,
    UNet2DConditionModel,
 )
-from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, skip_mps, torch_device
+from diffusers.utils.testing_utils import (
+    backend_empty_cache,
+    backend_max_memory_allocated,
+    backend_reset_max_memory_allocated,
+    backend_reset_peak_memory_stats,
+    enable_full_determinism,
+    nightly,
+    require_torch_accelerator,
+    skip_mps,
+    torch_device,
+)

 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
 from ..test_pipelines_common import (
@@ -267,17 +277,17 @@ class StableDiffusionPanoramaPipelineFastTests(


@nightly
-@require_torch_gpu
+@require_torch_accelerator
 class StableDiffusionPanoramaNightlyTests(unittest.TestCase):
    def setUp(self):
        super().setUp()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def tearDown(self):
        super().tearDown()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def get_inputs(self, seed=0):
        generator = torch.manual_seed(seed)
@@ -415,9 +425,9 @@ class StableDiffusionPanoramaNightlyTests(unittest.TestCase):
        assert number_of_steps == 3

    def test_stable_diffusion_panorama_pipeline_with_sequential_cpu_offloading(self):
-        torch.cuda.empty_cache()
-        torch.cuda.reset_max_memory_allocated()
-        torch.cuda.reset_peak_memory_stats()
+        backend_empty_cache(torch_device)
+        backend_reset_max_memory_allocated(torch_device)
+        backend_reset_peak_memory_stats(torch_device)

        model_ckpt = "stabilityai/stable-diffusion-2-base"
        scheduler = DDIMScheduler.from_pretrained(model_ckpt, subfolder="scheduler")
@@ -429,6 +439,6 @@ class StableDiffusionPanoramaNightlyTests(unittest.TestCase):
        inputs = self.get_inputs()
        _ = pipe(**inputs)

-        mem_bytes = torch.cuda.max_memory_allocated()
+        mem_bytes = backend_max_memory_allocated(torch_device)
        # make sure that less than 5.2 GB is allocated
        assert mem_bytes < 5.5 * 10**9