[tests] Add inference test slices for SD3 and remove unnecessary tests (#12106)

* update * nuke LoC for inference slices
2026-01-27 17:22:53 +03:00 · 2025-08-11 18:36:09 +05:30
parent 4a9dbd56f6
commit 135df5be9d
3 changed files with 46 additions and 217 deletions
--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py
@@ -124,37 +124,22 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
        }
        return inputs

-    def test_stable_diffusion_3_different_prompts(self):
-        pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
+    def test_inference(self):
+        components = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)

        inputs = self.get_dummy_inputs(torch_device)
-        output_same_prompt = pipe(**inputs).images[0]
+        image = pipe(**inputs).images[0]
+        generated_slice = image.flatten()
+        generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])

-        inputs = self.get_dummy_inputs(torch_device)
-        inputs["prompt_2"] = "a different prompt"
-        inputs["prompt_3"] = "another different prompt"
-        output_different_prompts = pipe(**inputs).images[0]
+        # fmt: off
+        expected_slice = np.array([0.5112, 0.5228, 0.5235, 0.5524, 0.3188, 0.5017, 0.5574, 0.4899, 0.6812, 0.5991, 0.3908, 0.5213, 0.5582, 0.4457, 0.4204, 0.5616])
+        # fmt: on

-        max_diff = np.abs(output_same_prompt - output_different_prompts).max()
-
-        # Outputs should be different here
-        assert max_diff > 1e-2
-
-    def test_stable_diffusion_3_different_negative_prompts(self):
-        pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
-
-        inputs = self.get_dummy_inputs(torch_device)
-        output_same_prompt = pipe(**inputs).images[0]
-
-        inputs = self.get_dummy_inputs(torch_device)
-        inputs["negative_prompt_2"] = "deformed"
-        inputs["negative_prompt_3"] = "blurry"
-        output_different_prompts = pipe(**inputs).images[0]
-
-        max_diff = np.abs(output_same_prompt - output_different_prompts).max()
-
-        # Outputs should be different here
-        assert max_diff > 1e-2
+        self.assertTrue(
+            np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice."
+        )

    def test_fused_qkv_projections(self):
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
@@ -268,40 +253,9 @@ class StableDiffusion3PipelineSlowTests(unittest.TestCase):

        image = pipe(**inputs).images[0]
        image_slice = image[0, :10, :10]
-        expected_slice = np.array(
-            [
-                0.4648,
-                0.4404,
-                0.4177,
-                0.5063,
-                0.4800,
-                0.4287,
-                0.5425,
-                0.5190,
-                0.4717,
-                0.5430,
-                0.5195,
-                0.4766,
-                0.5361,
-                0.5122,
-                0.4612,
-                0.4871,
-                0.4749,
-                0.4058,
-                0.4756,
-                0.4678,
-                0.3804,
-                0.4832,
-                0.4822,
-                0.3799,
-                0.5103,
-                0.5034,
-                0.3953,
-                0.5073,
-                0.4839,
-                0.3884,
-            ]
-        )
+        # fmt: off
+        expected_slice = np.array([0.4648, 0.4404, 0.4177, 0.5063, 0.4800, 0.4287, 0.5425, 0.5190, 0.4717, 0.5430, 0.5195, 0.4766, 0.5361, 0.5122, 0.4612, 0.4871, 0.4749, 0.4058, 0.4756, 0.4678, 0.3804, 0.4832, 0.4822, 0.3799, 0.5103, 0.5034, 0.3953, 0.5073, 0.4839, 0.3884])
+        # fmt: on

        max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())

--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py
@@ -128,37 +128,22 @@ class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unitte
        }
        return inputs

-    def test_stable_diffusion_3_img2img_different_prompts(self):
-        pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
+    def test_inference(self):
+        components = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)

        inputs = self.get_dummy_inputs(torch_device)
-        output_same_prompt = pipe(**inputs).images[0]
+        image = pipe(**inputs).images[0]
+        generated_slice = image.flatten()
+        generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])

-        inputs = self.get_dummy_inputs(torch_device)
-        inputs["prompt_2"] = "a different prompt"
-        inputs["prompt_3"] = "another different prompt"
-        output_different_prompts = pipe(**inputs).images[0]
+        # fmt: off
+        expected_slice = np.array([0.4564, 0.5486, 0.4868, 0.5923, 0.3775, 0.5543, 0.4807, 0.4177, 0.3778, 0.5957, 0.5726, 0.4333, 0.6312, 0.5062, 0.4838, 0.5984])
+        # fmt: on

-        max_diff = np.abs(output_same_prompt - output_different_prompts).max()
-
-        # Outputs should be different here
-        assert max_diff > 1e-2
-
-    def test_stable_diffusion_3_img2img_different_negative_prompts(self):
-        pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
-
-        inputs = self.get_dummy_inputs(torch_device)
-        output_same_prompt = pipe(**inputs).images[0]
-
-        inputs = self.get_dummy_inputs(torch_device)
-        inputs["negative_prompt_2"] = "deformed"
-        inputs["negative_prompt_3"] = "blurry"
-        output_different_prompts = pipe(**inputs).images[0]
-
-        max_diff = np.abs(output_same_prompt - output_different_prompts).max()
-
-        # Outputs should be different here
-        assert max_diff > 1e-2
+        self.assertTrue(
+            np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice."
+        )

    @unittest.skip("Skip for now.")
    def test_multi_vae(self):
@@ -207,112 +192,16 @@ class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase):
        inputs = self.get_inputs(torch_device)
        image = pipe(**inputs).images[0]
        image_slice = image[0, :10, :10]
+
+        # fmt: off
        expected_slices = Expectations(
            {
-                ("xpu", 3): np.array(
-                    [
-                        0.5117,
-                        0.4421,
-                        0.3852,
-                        0.5044,
-                        0.4219,
-                        0.3262,
-                        0.5024,
-                        0.4329,
-                        0.3276,
-                        0.4978,
-                        0.4412,
-                        0.3355,
-                        0.4983,
-                        0.4338,
-                        0.3279,
-                        0.4893,
-                        0.4241,
-                        0.3129,
-                        0.4875,
-                        0.4253,
-                        0.3030,
-                        0.4961,
-                        0.4267,
-                        0.2988,
-                        0.5029,
-                        0.4255,
-                        0.3054,
-                        0.5132,
-                        0.4248,
-                        0.3222,
-                    ]
-                ),
-                ("cuda", 7): np.array(
-                    [
-                        0.5435,
-                        0.4673,
-                        0.5732,
-                        0.4438,
-                        0.3557,
-                        0.4912,
-                        0.4331,
-                        0.3491,
-                        0.4915,
-                        0.4287,
-                        0.347,
-                        0.4849,
-                        0.4355,
-                        0.3469,
-                        0.4871,
-                        0.4431,
-                        0.3538,
-                        0.4912,
-                        0.4521,
-                        0.3643,
-                        0.5059,
-                        0.4587,
-                        0.373,
-                        0.5166,
-                        0.4685,
-                        0.3845,
-                        0.5264,
-                        0.4746,
-                        0.3914,
-                        0.5342,
-                    ]
-                ),
-                ("cuda", 8): np.array(
-                    [
-                        0.5146,
-                        0.4385,
-                        0.3826,
-                        0.5098,
-                        0.4150,
-                        0.3218,
-                        0.5142,
-                        0.4312,
-                        0.3298,
-                        0.5127,
-                        0.4431,
-                        0.3411,
-                        0.5171,
-                        0.4424,
-                        0.3374,
-                        0.5088,
-                        0.4348,
-                        0.3242,
-                        0.5073,
-                        0.4380,
-                        0.3174,
-                        0.5132,
-                        0.4397,
-                        0.3115,
-                        0.5132,
-                        0.4343,
-                        0.3118,
-                        0.5219,
-                        0.4328,
-                        0.3256,
-                    ]
-                ),
+                ("xpu", 3): np.array([0.5117, 0.4421, 0.3852, 0.5044, 0.4219, 0.3262, 0.5024, 0.4329, 0.3276, 0.4978, 0.4412, 0.3355, 0.4983, 0.4338, 0.3279, 0.4893, 0.4241, 0.3129, 0.4875, 0.4253, 0.3030, 0.4961, 0.4267, 0.2988, 0.5029, 0.4255, 0.3054, 0.5132, 0.4248, 0.3222]),
+                ("cuda", 7): np.array([0.5435, 0.4673, 0.5732, 0.4438, 0.3557, 0.4912, 0.4331, 0.3491, 0.4915, 0.4287, 0.347, 0.4849, 0.4355, 0.3469, 0.4871, 0.4431, 0.3538, 0.4912, 0.4521, 0.3643, 0.5059, 0.4587, 0.373, 0.5166, 0.4685, 0.3845, 0.5264, 0.4746, 0.3914, 0.5342]),
+                ("cuda", 8): np.array([0.5146, 0.4385, 0.3826, 0.5098, 0.4150, 0.3218, 0.5142, 0.4312, 0.3298, 0.5127, 0.4431, 0.3411, 0.5171, 0.4424, 0.3374, 0.5088, 0.4348, 0.3242, 0.5073, 0.4380, 0.3174, 0.5132, 0.4397, 0.3115, 0.5132, 0.4343, 0.3118, 0.5219, 0.4328, 0.3256]),
            }
        )
+        # fmt: on

        expected_slice = expected_slices.get_expectation()

--- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py
+++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py
@@ -132,37 +132,23 @@ class StableDiffusion3InpaintPipelineFastTests(PipelineLatentTesterMixin, unitte
        }
        return inputs

-    def test_stable_diffusion_3_inpaint_different_prompts(self):
-        pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
+    def test_inference(self):
+        components = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)

        inputs = self.get_dummy_inputs(torch_device)
-        output_same_prompt = pipe(**inputs).images[0]
+        image = pipe(**inputs).images[0]
+        generated_slice = image.flatten()
+        generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]])

-        inputs = self.get_dummy_inputs(torch_device)
-        inputs["prompt_2"] = "a different prompt"
-        inputs["prompt_3"] = "another different prompt"
-        output_different_prompts = pipe(**inputs).images[0]
+        # fmt: off
+        expected_slice = np.array([0.5035, 0.6661, 0.5859, 0.413, 0.4224, 0.4234, 0.7181, 0.5062, 0.5183, 0.6877, 0.5074, 0.585, 0.6111, 0.5422, 0.5306, 0.5891])
+        # fmt: on

-        max_diff = np.abs(output_same_prompt - output_different_prompts).max()
-
-        # Outputs should be different here
-        assert max_diff > 1e-2
-
-    def test_stable_diffusion_3_inpaint_different_negative_prompts(self):
-        pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
-
-        inputs = self.get_dummy_inputs(torch_device)
-        output_same_prompt = pipe(**inputs).images[0]
-
-        inputs = self.get_dummy_inputs(torch_device)
-        inputs["negative_prompt_2"] = "deformed"
-        inputs["negative_prompt_3"] = "blurry"
-        output_different_prompts = pipe(**inputs).images[0]
-
-        max_diff = np.abs(output_same_prompt - output_different_prompts).max()
-
-        # Outputs should be different here
-        assert max_diff > 1e-2
+        self.assertTrue(
+            np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice."
+        )

+    @unittest.skip("Skip for now.")
    def test_multi_vae(self):
        pass