diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py index 13d8dfe218..8f5efb866a 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py @@ -229,8 +229,8 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): gc.collect() torch.cuda.empty_cache() - def get_inputs(self, device, dtype=torch.float32, seed=0): - generator = torch.Generator(device=device).manual_seed(seed) + def get_inputs(self, seed=0): + generator = torch.manual_seed(seed) image = load_image( "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_pix2pix/example.jpg" ) @@ -253,12 +253,12 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() - inputs = self.get_inputs(torch_device) + inputs = self.get_inputs() image = pipe(**inputs).images image_slice = image[0, -3:, -3:, -1].flatten() assert image.shape == (1, 512, 512, 3) - expected_slice = np.array([0.3214, 0.3252, 0.3313, 0.3261, 0.3332, 0.3351, 0.324, 0.3296, 0.3206]) + expected_slice = np.array([0.5902, 0.6015, 0.6027, 0.5983, 0.6092, 0.6061, 0.5765, 0.5785, 0.5555]) assert np.abs(expected_slice - image_slice).max() < 1e-3 def test_stable_diffusion_pix2pix_k_lms(self): @@ -270,12 +270,12 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() - inputs = self.get_inputs(torch_device) + inputs = self.get_inputs() image = pipe(**inputs).images image_slice = image[0, -3:, -3:, -1].flatten() assert image.shape == (1, 512, 512, 3) - expected_slice = np.array([0.3893, 0.393, 0.3997, 0.4196, 0.4239, 0.4307, 0.4268, 0.4317, 0.419]) + expected_slice = np.array([0.6578, 0.6817, 0.6972, 0.6761, 0.6856, 0.6916, 0.6428, 0.6516, 0.6301]) assert np.abs(expected_slice - image_slice).max() < 1e-3 def test_stable_diffusion_pix2pix_ddim(self): @@ -287,12 +287,12 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() - inputs = self.get_inputs(torch_device) + inputs = self.get_inputs() image = pipe(**inputs).images image_slice = image[0, -3:, -3:, -1].flatten() assert image.shape == (1, 512, 512, 3) - expected_slice = np.array([0.5151, 0.5186, 0.5133, 0.5176, 0.5147, 0.5198, 0.522, 0.5122, 0.5244]) + expected_slice = np.array([0.3828, 0.3834, 0.3818, 0.3792, 0.3865, 0.3752, 0.3792, 0.3847, 0.3753]) assert np.abs(expected_slice - image_slice).max() < 1e-3 def test_stable_diffusion_pix2pix_intermediate_state(self): @@ -306,13 +306,13 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): latents = latents.detach().cpu().numpy() assert latents.shape == (1, 4, 64, 64) latents_slice = latents[0, -3:, -3:, -1] - expected_slice = np.array([-0.7178, -0.9165, -1.3906, 1.8174, 1.9482, 1.3652, 1.1533, 1.542, 1.2461]) + expected_slice = np.array([-0.2388, -0.4673, -0.9775, 1.5127, 1.4414, 0.7778, 0.9907, 0.8472, 0.7788]) assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 elif step == 2: latents = latents.detach().cpu().numpy() assert latents.shape == (1, 4, 64, 64) latents_slice = latents[0, -3:, -3:, -1] - expected_slice = np.array([-0.7183, -0.9253, -1.3857, 1.8174, 1.9766, 1.3574, 1.1533, 1.5244, 1.2539]) + expected_slice = np.array([-0.2568, -0.4648, -0.9639, 1.5137, 1.4609, 0.7603, 0.9795, 0.8403, 0.7949]) assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 callback_fn.has_been_called = False @@ -324,7 +324,7 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() - inputs = self.get_inputs(torch_device, dtype=torch.float16) + inputs = self.get_inputs() pipe(**inputs, callback=callback_fn, callback_steps=1) assert callback_fn.has_been_called assert number_of_steps == 3 @@ -342,7 +342,7 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): pipe.enable_attention_slicing(1) pipe.enable_sequential_cpu_offload() - inputs = self.get_inputs(torch_device, dtype=torch.float16) + inputs = self.get_inputs() _ = pipe(**inputs) mem_bytes = torch.cuda.max_memory_allocated() @@ -350,7 +350,7 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): assert mem_bytes < 2.2 * 10**9 def test_stable_diffusion_pix2pix_pipeline_multiple_of_8(self): - inputs = self.get_inputs(torch_device) + inputs = self.get_inputs() # resize to resolution that is divisible by 8 but not 16 or 32 inputs["image"] = inputs["image"].resize((504, 504)) @@ -369,5 +369,5 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase): image_slice = image[255:258, 383:386, -1] assert image.shape == (504, 504, 3) - expected_slice = np.array([0.1834, 0.2046, 0.2429, 0.1825, 0.2201, 0.2576, 0.1968, 0.2185, 0.2487]) + expected_slice = np.array([0.2726, 0.2529, 0.2664, 0.2655, 0.2641, 0.2642, 0.2591, 0.2649, 0.259]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3