From a9dd86029e2f32bb528e25ee83c7f6f11898b456 Mon Sep 17 00:00:00 2001
From: Dhruv Nair <dhruv.nair@gmail.com>
Date: Mon, 22 Apr 2024 15:41:59 +0530
Subject: [PATCH] Fix Kandinksy V22 tests (#7699)

update
---
 tests/pipelines/kandinsky/test_kandinsky.py        |  2 +-
 tests/pipelines/kandinsky2_2/test_kandinsky.py     | 14 +++++++-------
 .../kandinsky2_2/test_kandinsky_controlnet.py      | 12 +++++++-----
 .../test_kandinsky_controlnet_img2img.py           |  9 ++++++---
 .../kandinsky2_2/test_kandinsky_img2img.py         | 10 ++++++----
 .../kandinsky2_2/test_kandinsky_inpaint.py         |  7 +++++--
 6 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/tests/pipelines/kandinsky/test_kandinsky.py b/tests/pipelines/kandinsky/test_kandinsky.py
index 28c7684044..8553ed96e9 100644
--- a/tests/pipelines/kandinsky/test_kandinsky.py
+++ b/tests/pipelines/kandinsky/test_kandinsky.py
@@ -299,7 +299,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase):
         pipe_prior.to(torch_device)
 
         pipeline = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
-        pipeline = pipeline.to(torch_device)
+        pipeline.to(torch_device)
         pipeline.set_progress_bar_config(disable=None)
 
         prompt = "red cat, 4k photo"
diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky.py b/tests/pipelines/kandinsky2_2/test_kandinsky.py
index 5b0e16c036..cbd9166efa 100644
--- a/tests/pipelines/kandinsky2_2/test_kandinsky.py
+++ b/tests/pipelines/kandinsky2_2/test_kandinsky.py
@@ -25,11 +25,12 @@ from diffusers.utils.testing_utils import (
     enable_full_determinism,
     floats_tensor,
     load_numpy,
+    numpy_cosine_similarity_distance,
     require_torch_gpu,
     slow,
 )
 
-from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
+from ..test_pipelines_common import PipelineTesterMixin
 
 
 enable_full_determinism()
@@ -248,12 +249,12 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
         pipeline = KandinskyV22Pipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
         )
-        pipeline = pipeline.enable_model_cpu_offload()
+        pipeline.enable_model_cpu_offload()
         pipeline.set_progress_bar_config(disable=None)
 
         prompt = "red cat, 4k photo"
 
-        generator = torch.Generator(device="cuda").manual_seed(0)
+        generator = torch.Generator(device="cpu").manual_seed(0)
         image_emb, zero_image_emb = pipe_prior(
             prompt,
             generator=generator,
@@ -261,7 +262,7 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
             negative_prompt="",
         ).to_tuple()
 
-        generator = torch.Generator(device="cuda").manual_seed(0)
+        generator = torch.Generator(device="cpu").manual_seed(0)
         output = pipeline(
             image_embeds=image_emb,
             negative_image_embeds=zero_image_emb,
@@ -269,9 +270,8 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
             num_inference_steps=3,
             output_type="np",
         )
-
         image = output.images[0]
-
         assert image.shape == (512, 512, 3)
 
-        assert_mean_pixel_difference(image, expected_image)
+        max_diff = numpy_cosine_similarity_distance(expected_image.flatten(), image.flatten())
+        assert max_diff < 1e-4
diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py
index b849fa81f5..1f3219e0d6 100644
--- a/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py
+++ b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py
@@ -33,10 +33,11 @@ from diffusers.utils.testing_utils import (
     load_image,
     load_numpy,
     nightly,
+    numpy_cosine_similarity_distance,
     require_torch_gpu,
 )
 
-from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
+from ..test_pipelines_common import PipelineTesterMixin
 
 
 enable_full_determinism()
@@ -260,12 +261,12 @@ class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
         pipeline = KandinskyV22ControlnetPipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16
         )
-        pipeline = pipeline.enable_model_cpu_offload()
+        pipeline.enable_model_cpu_offload()
         pipeline.set_progress_bar_config(disable=None)
 
         prompt = "A robot, 4k photo"
 
-        generator = torch.Generator(device="cuda").manual_seed(0)
+        generator = torch.Generator(device="cpu").manual_seed(0)
         image_emb, zero_image_emb = pipe_prior(
             prompt,
             generator=generator,
@@ -273,7 +274,7 @@ class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
             negative_prompt="",
         ).to_tuple()
 
-        generator = torch.Generator(device="cuda").manual_seed(0)
+        generator = torch.Generator(device="cpu").manual_seed(0)
         output = pipeline(
             image_embeds=image_emb,
             negative_image_embeds=zero_image_emb,
@@ -287,4 +288,5 @@ class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
 
         assert image.shape == (512, 512, 3)
 
-        assert_mean_pixel_difference(image, expected_image)
+        max_diff = numpy_cosine_similarity_distance(expected_image.flatten(), image.flatten())
+        assert max_diff < 1e-4
diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py
index 4f72649ac0..20944aa3d6 100644
--- a/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py
+++ b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py
@@ -34,10 +34,11 @@ from diffusers.utils.testing_utils import (
     load_image,
     load_numpy,
     nightly,
+    numpy_cosine_similarity_distance,
     require_torch_gpu,
 )
 
-from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
+from ..test_pipelines_common import PipelineTesterMixin
 
 
 enable_full_determinism()
@@ -274,7 +275,7 @@ class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
         pipeline = KandinskyV22ControlnetImg2ImgPipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16
         )
-        pipeline = pipeline.enable_model_cpu_offload()
+        pipeline.enable_model_cpu_offload()
 
         pipeline.set_progress_bar_config(disable=None)
 
@@ -289,6 +290,7 @@ class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
             num_inference_steps=5,
         ).to_tuple()
 
+        generator = torch.Generator(device="cpu").manual_seed(0)
         output = pipeline(
             image=init_image,
             image_embeds=image_emb,
@@ -306,4 +308,5 @@ class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
 
         assert image.shape == (512, 512, 3)
 
-        assert_mean_pixel_difference(image, expected_image)
+        max_diff = numpy_cosine_similarity_distance(expected_image.flatten(), image.flatten())
+        assert max_diff < 1e-4
diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_img2img.py b/tests/pipelines/kandinsky2_2/test_kandinsky_img2img.py
index dc35075d58..26d8b45cf9 100644
--- a/tests/pipelines/kandinsky2_2/test_kandinsky_img2img.py
+++ b/tests/pipelines/kandinsky2_2/test_kandinsky_img2img.py
@@ -33,11 +33,12 @@ from diffusers.utils.testing_utils import (
     floats_tensor,
     load_image,
     load_numpy,
+    numpy_cosine_similarity_distance,
     require_torch_gpu,
     slow,
 )
 
-from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
+from ..test_pipelines_common import PipelineTesterMixin
 
 
 enable_full_determinism()
@@ -270,8 +271,7 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
         pipeline = KandinskyV22Img2ImgPipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
         )
-        pipeline = pipeline.enable_model_cpu_offload()
-
+        pipeline.enable_model_cpu_offload()
         pipeline.set_progress_bar_config(disable=None)
 
         generator = torch.Generator(device="cpu").manual_seed(0)
@@ -282,6 +282,7 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
             negative_prompt="",
         ).to_tuple()
 
+        generator = torch.Generator(device="cpu").manual_seed(0)
         output = pipeline(
             image=init_image,
             image_embeds=image_emb,
@@ -298,4 +299,5 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
 
         assert image.shape == (768, 768, 3)
 
-        assert_mean_pixel_difference(image, expected_image)
+        max_diff = numpy_cosine_similarity_distance(expected_image.flatten(), image.flatten())
+        assert max_diff < 1e-4
diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py b/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py
index 54e85ae831..25cf4bbed4 100644
--- a/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py
+++ b/tests/pipelines/kandinsky2_2/test_kandinsky_inpaint.py
@@ -34,12 +34,13 @@ from diffusers.utils.testing_utils import (
     is_flaky,
     load_image,
     load_numpy,
+    numpy_cosine_similarity_distance,
     require_torch_gpu,
     slow,
     torch_device,
 )
 
-from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
+from ..test_pipelines_common import PipelineTesterMixin
 
 
 enable_full_determinism()
@@ -338,6 +339,7 @@ class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
             negative_prompt="",
         ).to_tuple()
 
+        generator = torch.Generator(device="cpu").manual_seed(0)
         output = pipeline(
             image=init_image,
             mask_image=mask,
@@ -354,4 +356,5 @@ class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
 
         assert image.shape == (768, 768, 3)
 
-        assert_mean_pixel_difference(image, expected_image)
+        max_diff = numpy_cosine_similarity_distance(expected_image.flatten(), image.flatten())
+        assert max_diff < 1e-4