diff --git a/tests/pipelines/audioldm/test_audioldm.py b/tests/pipelines/audioldm/test_audioldm.py index 516cea76b7..0a2a44bf48 100644 --- a/tests/pipelines/audioldm/test_audioldm.py +++ b/tests/pipelines/audioldm/test_audioldm.py @@ -359,7 +359,7 @@ class AudioLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): self._test_attention_slicing_forward_pass(test_mean_pixel_difference=False) def test_inference_batch_single_identical(self): - self._test_inference_batch_single_identical(test_mean_pixel_difference=False) + self._test_inference_batch_single_identical() @unittest.skipIf( torch_device != "cuda" or not is_xformers_available(), diff --git a/tests/pipelines/audioldm2/test_audioldm2.py b/tests/pipelines/audioldm2/test_audioldm2.py index 33343d86ab..6fc0d66d4b 100644 --- a/tests/pipelines/audioldm2/test_audioldm2.py +++ b/tests/pipelines/audioldm2/test_audioldm2.py @@ -459,7 +459,7 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase): def test_inference_batch_single_identical(self): # increase tolerance from 1e-4 -> 2e-4 to account for large composite model - self._test_inference_batch_single_identical(test_mean_pixel_difference=False, expected_max_diff=2e-4) + self._test_inference_batch_single_identical(expected_max_diff=2e-4) def test_save_load_local(self): # increase tolerance from 1e-4 -> 2e-4 to account for large composite model diff --git a/tests/pipelines/dit/test_dit.py b/tests/pipelines/dit/test_dit.py index 8f4d11ec38..0edc8cf323 100644 --- a/tests/pipelines/dit/test_dit.py +++ b/tests/pipelines/dit/test_dit.py @@ -96,7 +96,7 @@ class DiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase): self.assertLessEqual(max_diff, 1e-3) def test_inference_batch_single_identical(self): - self._test_inference_batch_single_identical(relax_max_difference=True, expected_max_diff=1e-3) + self._test_inference_batch_single_identical(expected_max_diff=1e-3) @unittest.skipIf( torch_device != "cuda" or not is_xformers_available(), diff --git a/tests/pipelines/kandinsky/test_kandinsky_prior.py b/tests/pipelines/kandinsky/test_kandinsky_prior.py index b9f78ee0e8..bdc584968a 100644 --- a/tests/pipelines/kandinsky/test_kandinsky_prior.py +++ b/tests/pipelines/kandinsky/test_kandinsky_prior.py @@ -224,15 +224,7 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase): @skip_mps def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True - test_mean_pixel_difference = False - - self._test_inference_batch_single_identical( - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, - test_mean_pixel_difference=test_mean_pixel_difference, - ) + self._test_inference_batch_single_identical(expected_max_diff=1e-2) @skip_mps def test_attention_slicing_forward_pass(self): diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py b/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py index 317e822a46..a0de5cceeb 100644 --- a/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py +++ b/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py @@ -224,15 +224,7 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase) @skip_mps def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True - test_mean_pixel_difference = False - - self._test_inference_batch_single_identical( - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, - test_mean_pixel_difference=test_mean_pixel_difference, - ) + self._test_inference_batch_single_identical(expected_max_diff=1e-3) @skip_mps def test_attention_slicing_forward_pass(self): diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py b/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py index f71cbfcd0b..89b603e9fc 100644 --- a/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py +++ b/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py @@ -234,15 +234,7 @@ class KandinskyV22PriorEmb2EmbPipelineFastTests(PipelineTesterMixin, unittest.Te @skip_mps def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True - test_mean_pixel_difference = False - - self._test_inference_batch_single_identical( - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, - test_mean_pixel_difference=test_mean_pixel_difference, - ) + self._test_inference_batch_single_identical(expected_max_diff=1e-2) @skip_mps def test_attention_slicing_forward_pass(self): diff --git a/tests/pipelines/musicldm/test_musicldm.py b/tests/pipelines/musicldm/test_musicldm.py index ea4c52aee1..4bf03569bb 100644 --- a/tests/pipelines/musicldm/test_musicldm.py +++ b/tests/pipelines/musicldm/test_musicldm.py @@ -373,7 +373,7 @@ class MusicLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): self._test_attention_slicing_forward_pass(test_mean_pixel_difference=False) def test_inference_batch_single_identical(self): - self._test_inference_batch_single_identical(test_mean_pixel_difference=False) + self._test_inference_batch_single_identical() @unittest.skipIf( torch_device != "cuda" or not is_xformers_available(), diff --git a/tests/pipelines/shap_e/test_shap_e.py b/tests/pipelines/shap_e/test_shap_e.py index 27dfcb5df3..3e944eba42 100644 --- a/tests/pipelines/shap_e/test_shap_e.py +++ b/tests/pipelines/shap_e/test_shap_e.py @@ -44,11 +44,11 @@ class ShapEPipelineFastTests(PipelineTesterMixin, unittest.TestCase): @property def text_embedder_hidden_size(self): - return 32 + return 16 @property def time_input_dim(self): - return 32 + return 16 @property def time_embed_dim(self): @@ -201,14 +201,7 @@ class ShapEPipelineFastTests(PipelineTesterMixin, unittest.TestCase): self._test_inference_batch_consistent(batch_sizes=[1, 2]) def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True - - self._test_inference_batch_single_identical( - batch_size=2, - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, - ) + self._test_inference_batch_single_identical(batch_size=2, expected_max_diff=6e-3) def test_num_images_per_prompt(self): components = self.get_dummy_components() diff --git a/tests/pipelines/shap_e/test_shap_e_img2img.py b/tests/pipelines/shap_e/test_shap_e_img2img.py index 2b1eccf196..35b6f594da 100644 --- a/tests/pipelines/shap_e/test_shap_e_img2img.py +++ b/tests/pipelines/shap_e/test_shap_e_img2img.py @@ -52,11 +52,11 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): @property def text_embedder_hidden_size(self): - return 32 + return 16 @property def time_input_dim(self): - return 32 + return 16 @property def time_embed_dim(self): @@ -71,10 +71,10 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): torch.manual_seed(0) config = CLIPVisionConfig( hidden_size=self.text_embedder_hidden_size, - image_size=64, + image_size=32, projection_dim=self.text_embedder_hidden_size, - intermediate_size=37, - num_attention_heads=4, + intermediate_size=24, + num_attention_heads=2, num_channels=3, num_hidden_layers=5, patch_size=1, @@ -170,7 +170,7 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): return components def get_dummy_inputs(self, device, seed=0): - input_image = floats_tensor((1, 3, 64, 64), rng=random.Random(seed)).to(device) + input_image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) if str(device).startswith("mps"): generator = torch.manual_seed(seed) @@ -219,15 +219,12 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): def test_inference_batch_consistent(self): # NOTE: Larger batch sizes cause this test to timeout, only test on smaller batches - self._test_inference_batch_consistent(batch_sizes=[1, 2]) + self._test_inference_batch_consistent(batch_sizes=[2]) def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True self._test_inference_batch_single_identical( batch_size=2, - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, + expected_max_diff=5e-3, ) def test_num_images_per_prompt(self): diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 95762e3642..e7b9aa6391 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -499,14 +499,7 @@ class StableDiffusionPipelineFastTests( negative_prompt = None num_images_per_prompt = 1 logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion") - - prompt = 25 * "@" - with CaptureLogger(logger) as cap_logger_3: - negative_text_embeddings_3, text_embeddings_3 = sd_pipe.encode_prompt( - prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt - ) - if negative_text_embeddings_3 is not None: - text_embeddings_3 = torch.cat([negative_text_embeddings_3, text_embeddings_3]) + logger.setLevel(logging.WARNING) prompt = 100 * "@" with CaptureLogger(logger) as cap_logger: @@ -516,6 +509,9 @@ class StableDiffusionPipelineFastTests( if negative_text_embeddings is not None: text_embeddings = torch.cat([negative_text_embeddings, text_embeddings]) + # 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25 + assert cap_logger.out.count("@") == 25 + negative_prompt = "Hello" with CaptureLogger(logger) as cap_logger_2: negative_text_embeddings_2, text_embeddings_2 = sd_pipe.encode_prompt( @@ -524,12 +520,18 @@ class StableDiffusionPipelineFastTests( if negative_text_embeddings_2 is not None: text_embeddings_2 = torch.cat([negative_text_embeddings_2, text_embeddings_2]) + assert cap_logger.out == cap_logger_2.out + + prompt = 25 * "@" + with CaptureLogger(logger) as cap_logger_3: + negative_text_embeddings_3, text_embeddings_3 = sd_pipe.encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + if negative_text_embeddings_3 is not None: + text_embeddings_3 = torch.cat([negative_text_embeddings_3, text_embeddings_3]) + assert text_embeddings_3.shape == text_embeddings_2.shape == text_embeddings.shape assert text_embeddings.shape[1] == 77 - - assert cap_logger.out == cap_logger_2.out - # 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25 - assert cap_logger.out.count("@") == 25 assert cap_logger_3.out == "" def test_stable_diffusion_height_width_opt(self): diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py index 3842dda2e5..2fa4605889 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py @@ -250,6 +250,7 @@ class StableDiffusion2PipelineFastTests( negative_prompt = None num_images_per_prompt = 1 logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion") + logger.setLevel(logging.WARNING) prompt = 25 * "@" with CaptureLogger(logger) as cap_logger_3: diff --git a/tests/pipelines/stable_unclip/test_stable_unclip.py b/tests/pipelines/stable_unclip/test_stable_unclip.py index 8d5edda169..f7affbe997 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip.py @@ -182,9 +182,7 @@ class StableUnCLIPPipelineFastTests( # Overriding PipelineTesterMixin::test_inference_batch_single_identical # because UnCLIP undeterminism requires a looser check. def test_inference_batch_single_identical(self): - test_max_difference = torch_device in ["cpu", "mps"] - - self._test_inference_batch_single_identical(test_max_difference=test_max_difference) + self._test_inference_batch_single_identical(expected_max_diff=1e-3) @slow diff --git a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py index 52581eb574..9bbde46e4d 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py @@ -196,9 +196,7 @@ class StableUnCLIPImg2ImgPipelineFastTests( # Overriding PipelineTesterMixin::test_inference_batch_single_identical # because undeterminism requires a looser check. def test_inference_batch_single_identical(self): - test_max_difference = torch_device in ["cpu", "mps"] - - self._test_inference_batch_single_identical(test_max_difference=test_max_difference) + self._test_inference_batch_single_identical(expected_max_diff=1e-3) @unittest.skipIf( torch_device != "cuda" or not is_xformers_available(), diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 157ff2b699..b1eebbe312 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -374,11 +374,11 @@ class PipelineTesterMixin: f"Required optional parameters not present: {remaining_required_optional_parameters}", ) - def test_inference_batch_consistent(self, batch_sizes=[2, 4, 13]): + def test_inference_batch_consistent(self, batch_sizes=[2]): self._test_inference_batch_consistent(batch_sizes=batch_sizes) def _test_inference_batch_consistent( - self, batch_sizes=[2, 4, 13], additional_params_copy_to_batched_inputs=["num_inference_steps"] + self, batch_sizes=[2], additional_params_copy_to_batched_inputs=["num_inference_steps"] ): components = self.get_dummy_components() pipe = self.pipeline_class(**components) @@ -386,137 +386,103 @@ class PipelineTesterMixin: pipe.set_progress_bar_config(disable=None) inputs = self.get_dummy_inputs(torch_device) + inputs["generator"] = self.get_generator(0) logger = logging.get_logger(pipe.__module__) logger.setLevel(level=diffusers.logging.FATAL) - # batchify inputs + # prepare batched inputs + batched_inputs = [] for batch_size in batch_sizes: - batched_inputs = {} - for name, value in inputs.items(): - if name in self.batch_params: - # prompt is string - if name == "prompt": - len_prompt = len(value) - # make unequal batch sizes - batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + batched_input = {} + batched_input.update(inputs) + + for name in self.batch_params: + if name not in inputs: + continue + + value = inputs[name] + if name == "prompt": + len_prompt = len(value) + # make unequal batch sizes + batched_input[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + + # make last batch super long + batched_input[name][-1] = 100 * "very long" - # make last batch super long - batched_inputs[name][-1] = 100 * "very long" - # or else we have images - else: - batched_inputs[name] = batch_size * [value] - elif name == "batch_size": - batched_inputs[name] = batch_size else: - batched_inputs[name] = value + batched_input[name] = batch_size * [value] - for arg in additional_params_copy_to_batched_inputs: - batched_inputs[arg] = inputs[arg] + if "generator" in inputs: + batched_input["generator"] = [self.get_generator(i) for i in range(batch_size)] - batched_inputs["output_type"] = "np" + if "batch_size" in inputs: + batched_input["batch_size"] = batch_size - if self.pipeline_class.__name__ == "DanceDiffusionPipeline": - batched_inputs.pop("output_type") - - output = pipe(**batched_inputs) - - assert len(output[0]) == batch_size - - batched_inputs["output_type"] = "np" - - if self.pipeline_class.__name__ == "DanceDiffusionPipeline": - batched_inputs.pop("output_type") - - output = pipe(**batched_inputs)[0] - - assert output.shape[0] == batch_size + batched_inputs.append(batched_input) logger.setLevel(level=diffusers.logging.WARNING) + for batch_size, batched_input in zip(batch_sizes, batched_inputs): + output = pipe(**batched_input) + assert len(output[0]) == batch_size def test_inference_batch_single_identical(self, batch_size=3, expected_max_diff=1e-4): self._test_inference_batch_single_identical(batch_size=batch_size, expected_max_diff=expected_max_diff) def _test_inference_batch_single_identical( self, - batch_size=3, - test_max_difference=None, - test_mean_pixel_difference=None, - relax_max_difference=False, + batch_size=2, expected_max_diff=1e-4, additional_params_copy_to_batched_inputs=["num_inference_steps"], ): - if test_max_difference is None: - # TODO(Pedro) - not sure why, but not at all reproducible at the moment it seems - # make sure that batched and non-batched is identical - test_max_difference = torch_device != "mps" - - if test_mean_pixel_difference is None: - # TODO same as above - test_mean_pixel_difference = torch_device != "mps" - - generator_device = "cpu" components = self.get_dummy_components() pipe = self.pipeline_class(**components) + for components in pipe.components.values(): + if hasattr(components, "set_default_attn_processor"): + components.set_default_attn_processor() + pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - - inputs = self.get_dummy_inputs(generator_device) + inputs = self.get_dummy_inputs(torch_device) + # Reset generator in case it is has been used in self.get_dummy_inputs + inputs["generator"] = self.get_generator(0) logger = logging.get_logger(pipe.__module__) logger.setLevel(level=diffusers.logging.FATAL) # batchify inputs batched_inputs = {} - batch_size = batch_size - for name, value in inputs.items(): - if name in self.batch_params: - # prompt is string - if name == "prompt": - len_prompt = len(value) - # make unequal batch sizes - batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + batched_inputs.update(inputs) + + for name in self.batch_params: + if name not in inputs: + continue + + value = inputs[name] + if name == "prompt": + len_prompt = len(value) + batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)] + batched_inputs[name][-1] = 100 * "very long" - # make last batch super long - batched_inputs[name][-1] = 100 * "very long" - # or else we have images - else: - batched_inputs[name] = batch_size * [value] - elif name == "batch_size": - batched_inputs[name] = batch_size - elif name == "generator": - batched_inputs[name] = [self.get_generator(i) for i in range(batch_size)] else: - batched_inputs[name] = value + batched_inputs[name] = batch_size * [value] + + if "generator" in inputs: + batched_inputs["generator"] = [self.get_generator(i) for i in range(batch_size)] + + if "batch_size" in inputs: + batched_inputs["batch_size"] = batch_size for arg in additional_params_copy_to_batched_inputs: batched_inputs[arg] = inputs[arg] - if self.pipeline_class.__name__ != "DanceDiffusionPipeline": - batched_inputs["output_type"] = "np" - + output = pipe(**inputs) output_batch = pipe(**batched_inputs) + assert output_batch[0].shape[0] == batch_size - inputs["generator"] = self.get_generator(0) - - output = pipe(**inputs) - - logger.setLevel(level=diffusers.logging.WARNING) - if test_max_difference: - if relax_max_difference: - # Taking the median of the largest differences - # is resilient to outliers - diff = np.abs(output_batch[0][0] - output[0][0]) - diff = diff.flatten() - diff.sort() - max_diff = np.median(diff[-5:]) - else: - max_diff = np.abs(output_batch[0][0] - output[0][0]).max() - assert max_diff < expected_max_diff - - if test_mean_pixel_difference: - assert_mean_pixel_difference(output_batch[0][0], output[0][0]) + max_diff = np.abs(output_batch[0][0] - output[0][0]).max() + assert max_diff < expected_max_diff def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4): components = self.get_dummy_components() @@ -528,8 +494,9 @@ class PipelineTesterMixin: pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - output = pipe(**self.get_dummy_inputs(torch_device))[0] - output_tuple = pipe(**self.get_dummy_inputs(torch_device), return_dict=False)[0] + generator_device = "cpu" + output = pipe(**self.get_dummy_inputs(generator_device))[0] + output_tuple = pipe(**self.get_dummy_inputs(generator_device), return_dict=False)[0] max_diff = np.abs(to_np(output) - to_np(output_tuple)).max() self.assertLess(max_diff, expected_max_difference) @@ -710,11 +677,12 @@ class PipelineTesterMixin: pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(torch_device) + generator_device = "cpu" + inputs = self.get_dummy_inputs(generator_device) output_without_slicing = pipe(**inputs)[0] pipe.enable_attention_slicing(slice_size=1) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs(generator_device) output_with_slicing = pipe(**inputs)[0] if test_max_difference: diff --git a/tests/pipelines/text_to_video/test_text_to_video.py b/tests/pipelines/text_to_video/test_text_to_video.py index e03c8fc5df..2c47dc492d 100644 --- a/tests/pipelines/text_to_video/test_text_to_video.py +++ b/tests/pipelines/text_to_video/test_text_to_video.py @@ -62,14 +62,14 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase): def get_dummy_components(self): torch.manual_seed(0) unet = UNet3DConditionModel( - block_out_channels=(32, 64, 64, 64), + block_out_channels=(32, 32), layers_per_block=2, sample_size=32, in_channels=4, out_channels=4, - down_block_types=("CrossAttnDownBlock3D", "CrossAttnDownBlock3D", "CrossAttnDownBlock3D", "DownBlock3D"), - up_block_types=("UpBlock3D", "CrossAttnUpBlock3D", "CrossAttnUpBlock3D", "CrossAttnUpBlock3D"), - cross_attention_dim=32, + down_block_types=("CrossAttnDownBlock3D", "DownBlock3D"), + up_block_types=("UpBlock3D", "CrossAttnUpBlock3D"), + cross_attention_dim=4, attention_head_dim=4, ) scheduler = DDIMScheduler( @@ -81,27 +81,27 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ) torch.manual_seed(0) vae = AutoencoderKL( - block_out_channels=[32, 64], + block_out_channels=(32,), in_channels=3, out_channels=3, - down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], - up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + down_block_types=["DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D"], latent_channels=4, - sample_size=128, + sample_size=32, ) torch.manual_seed(0) text_encoder_config = CLIPTextConfig( bos_token_id=0, eos_token_id=2, - hidden_size=32, - intermediate_size=37, + hidden_size=4, + intermediate_size=16, layer_norm_eps=1e-05, - num_attention_heads=4, - num_hidden_layers=5, + num_attention_heads=2, + num_hidden_layers=2, pad_token_id=1, vocab_size=1000, hidden_act="gelu", - projection_dim=512, + projection_dim=32, ) text_encoder = CLIPTextModel(text_encoder_config) tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") @@ -141,8 +141,8 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase): frames = sd_pipe(**inputs).frames image_slice = frames[0][-3:, -3:, -1] - assert frames[0].shape == (64, 64, 3) - expected_slice = np.array([158.0, 160.0, 153.0, 125.0, 100.0, 121.0, 111.0, 93.0, 113.0]) + assert frames[0].shape == (32, 32, 3) + expected_slice = np.array([91.0, 152.0, 66.0, 192.0, 94.0, 126.0, 101.0, 123.0, 152.0]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 diff --git a/tests/pipelines/text_to_video/test_video_to_video.py b/tests/pipelines/text_to_video/test_video_to_video.py index 6b1c44ceb0..f057eb3499 100644 --- a/tests/pipelines/text_to_video/test_video_to_video.py +++ b/tests/pipelines/text_to_video/test_video_to_video.py @@ -82,7 +82,7 @@ class VideoToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase): beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", - clip_sample=False, + clip_sample=True, set_alpha_to_one=False, ) torch.manual_seed(0) diff --git a/tests/pipelines/unclip/test_unclip.py b/tests/pipelines/unclip/test_unclip.py index 2fe6033712..98e105bbb7 100644 --- a/tests/pipelines/unclip/test_unclip.py +++ b/tests/pipelines/unclip/test_unclip.py @@ -373,8 +373,6 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase): # because UnCLIP undeterminism requires a looser check. @skip_mps def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True additional_params_copy_to_batched_inputs = [ "prior_num_inference_steps", "decoder_num_inference_steps", @@ -382,9 +380,7 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ] self._test_inference_batch_single_identical( - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, - additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs, + additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs, expected_max_diff=5e-3 ) def test_inference_batch_consistent(self): diff --git a/tests/pipelines/unclip/test_unclip_image_variation.py b/tests/pipelines/unclip/test_unclip_image_variation.py index 6affa22d15..c2adba4a69 100644 --- a/tests/pipelines/unclip/test_unclip_image_variation.py +++ b/tests/pipelines/unclip/test_unclip_image_variation.py @@ -448,17 +448,12 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa # because UnCLIP undeterminism requires a looser check. @skip_mps def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True additional_params_copy_to_batched_inputs = [ "decoder_num_inference_steps", "super_res_num_inference_steps", ] - self._test_inference_batch_single_identical( - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, - additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs, + additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs, expected_max_diff=5e-3 ) def test_inference_batch_consistent(self): diff --git a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py index 7891056d10..1442196251 100644 --- a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py +++ b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py @@ -170,15 +170,7 @@ class WuerstchenDecoderPipelineFastTests(PipelineTesterMixin, unittest.TestCase) @skip_mps def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True - test_mean_pixel_difference = False - - self._test_inference_batch_single_identical( - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, - test_mean_pixel_difference=test_mean_pixel_difference, - ) + self._test_inference_batch_single_identical(expected_max_diff=1e-5) @skip_mps def test_attention_slicing_forward_pass(self): diff --git a/tests/pipelines/wuerstchen/test_wuerstchen_prior.py b/tests/pipelines/wuerstchen/test_wuerstchen_prior.py index 045729b30b..b8f51c9584 100644 --- a/tests/pipelines/wuerstchen/test_wuerstchen_prior.py +++ b/tests/pipelines/wuerstchen/test_wuerstchen_prior.py @@ -166,14 +166,7 @@ class WuerstchenPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase): @skip_mps def test_inference_batch_single_identical(self): - test_max_difference = torch_device == "cpu" - relax_max_difference = True - test_mean_pixel_difference = False - self._test_inference_batch_single_identical( - test_max_difference=test_max_difference, - relax_max_difference=relax_max_difference, - test_mean_pixel_difference=test_mean_pixel_difference, expected_max_diff=2e-1, )