diff --git a/tests/pipelines/audioldm/test_audioldm.py b/tests/pipelines/audioldm/test_audioldm.py
index 516cea76b7..0a2a44bf48 100644
--- a/tests/pipelines/audioldm/test_audioldm.py
+++ b/tests/pipelines/audioldm/test_audioldm.py
@@ -359,7 +359,7 @@ class AudioLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         self._test_attention_slicing_forward_pass(test_mean_pixel_difference=False)
 
     def test_inference_batch_single_identical(self):
-        self._test_inference_batch_single_identical(test_mean_pixel_difference=False)
+        self._test_inference_batch_single_identical()
 
     @unittest.skipIf(
         torch_device != "cuda" or not is_xformers_available(),
diff --git a/tests/pipelines/audioldm2/test_audioldm2.py b/tests/pipelines/audioldm2/test_audioldm2.py
index 33343d86ab..6fc0d66d4b 100644
--- a/tests/pipelines/audioldm2/test_audioldm2.py
+++ b/tests/pipelines/audioldm2/test_audioldm2.py
@@ -459,7 +459,7 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
 
     def test_inference_batch_single_identical(self):
         # increase tolerance from 1e-4 -> 2e-4 to account for large composite model
-        self._test_inference_batch_single_identical(test_mean_pixel_difference=False, expected_max_diff=2e-4)
+        self._test_inference_batch_single_identical(expected_max_diff=2e-4)
 
     def test_save_load_local(self):
         # increase tolerance from 1e-4 -> 2e-4 to account for large composite model
diff --git a/tests/pipelines/dit/test_dit.py b/tests/pipelines/dit/test_dit.py
index 8f4d11ec38..0edc8cf323 100644
--- a/tests/pipelines/dit/test_dit.py
+++ b/tests/pipelines/dit/test_dit.py
@@ -96,7 +96,7 @@ class DiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         self.assertLessEqual(max_diff, 1e-3)
 
     def test_inference_batch_single_identical(self):
-        self._test_inference_batch_single_identical(relax_max_difference=True, expected_max_diff=1e-3)
+        self._test_inference_batch_single_identical(expected_max_diff=1e-3)
 
     @unittest.skipIf(
         torch_device != "cuda" or not is_xformers_available(),
diff --git a/tests/pipelines/kandinsky/test_kandinsky_prior.py b/tests/pipelines/kandinsky/test_kandinsky_prior.py
index b9f78ee0e8..bdc584968a 100644
--- a/tests/pipelines/kandinsky/test_kandinsky_prior.py
+++ b/tests/pipelines/kandinsky/test_kandinsky_prior.py
@@ -224,15 +224,7 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
 
     @skip_mps
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
-        test_mean_pixel_difference = False
-
-        self._test_inference_batch_single_identical(
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
-            test_mean_pixel_difference=test_mean_pixel_difference,
-        )
+        self._test_inference_batch_single_identical(expected_max_diff=1e-2)
 
     @skip_mps
     def test_attention_slicing_forward_pass(self):
diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py b/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py
index 317e822a46..a0de5cceeb 100644
--- a/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_prior.py
@@ -224,15 +224,7 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
 
     @skip_mps
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
-        test_mean_pixel_difference = False
-
-        self._test_inference_batch_single_identical(
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
-            test_mean_pixel_difference=test_mean_pixel_difference,
-        )
+        self._test_inference_batch_single_identical(expected_max_diff=1e-3)
 
     @skip_mps
     def test_attention_slicing_forward_pass(self):
diff --git a/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py b/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py
index f71cbfcd0b..89b603e9fc 100644
--- a/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py
+++ b/tests/pipelines/kandinsky_v22/test_kandinsky_prior_emb2emb.py
@@ -234,15 +234,7 @@ class KandinskyV22PriorEmb2EmbPipelineFastTests(PipelineTesterMixin, unittest.Te
 
     @skip_mps
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
-        test_mean_pixel_difference = False
-
-        self._test_inference_batch_single_identical(
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
-            test_mean_pixel_difference=test_mean_pixel_difference,
-        )
+        self._test_inference_batch_single_identical(expected_max_diff=1e-2)
 
     @skip_mps
     def test_attention_slicing_forward_pass(self):
diff --git a/tests/pipelines/musicldm/test_musicldm.py b/tests/pipelines/musicldm/test_musicldm.py
index ea4c52aee1..4bf03569bb 100644
--- a/tests/pipelines/musicldm/test_musicldm.py
+++ b/tests/pipelines/musicldm/test_musicldm.py
@@ -373,7 +373,7 @@ class MusicLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         self._test_attention_slicing_forward_pass(test_mean_pixel_difference=False)
 
     def test_inference_batch_single_identical(self):
-        self._test_inference_batch_single_identical(test_mean_pixel_difference=False)
+        self._test_inference_batch_single_identical()
 
     @unittest.skipIf(
         torch_device != "cuda" or not is_xformers_available(),
diff --git a/tests/pipelines/shap_e/test_shap_e.py b/tests/pipelines/shap_e/test_shap_e.py
index 27dfcb5df3..3e944eba42 100644
--- a/tests/pipelines/shap_e/test_shap_e.py
+++ b/tests/pipelines/shap_e/test_shap_e.py
@@ -44,11 +44,11 @@ class ShapEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
 
     @property
     def text_embedder_hidden_size(self):
-        return 32
+        return 16
 
     @property
     def time_input_dim(self):
-        return 32
+        return 16
 
     @property
     def time_embed_dim(self):
@@ -201,14 +201,7 @@ class ShapEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         self._test_inference_batch_consistent(batch_sizes=[1, 2])
 
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
-
-        self._test_inference_batch_single_identical(
-            batch_size=2,
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
-        )
+        self._test_inference_batch_single_identical(batch_size=2, expected_max_diff=6e-3)
 
     def test_num_images_per_prompt(self):
         components = self.get_dummy_components()
diff --git a/tests/pipelines/shap_e/test_shap_e_img2img.py b/tests/pipelines/shap_e/test_shap_e_img2img.py
index 2b1eccf196..35b6f594da 100644
--- a/tests/pipelines/shap_e/test_shap_e_img2img.py
+++ b/tests/pipelines/shap_e/test_shap_e_img2img.py
@@ -52,11 +52,11 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
 
     @property
     def text_embedder_hidden_size(self):
-        return 32
+        return 16
 
     @property
     def time_input_dim(self):
-        return 32
+        return 16
 
     @property
     def time_embed_dim(self):
@@ -71,10 +71,10 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         torch.manual_seed(0)
         config = CLIPVisionConfig(
             hidden_size=self.text_embedder_hidden_size,
-            image_size=64,
+            image_size=32,
             projection_dim=self.text_embedder_hidden_size,
-            intermediate_size=37,
-            num_attention_heads=4,
+            intermediate_size=24,
+            num_attention_heads=2,
             num_channels=3,
             num_hidden_layers=5,
             patch_size=1,
@@ -170,7 +170,7 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         return components
 
     def get_dummy_inputs(self, device, seed=0):
-        input_image = floats_tensor((1, 3, 64, 64), rng=random.Random(seed)).to(device)
+        input_image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device)
 
         if str(device).startswith("mps"):
             generator = torch.manual_seed(seed)
@@ -219,15 +219,12 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
 
     def test_inference_batch_consistent(self):
         # NOTE: Larger batch sizes cause this test to timeout, only test on smaller batches
-        self._test_inference_batch_consistent(batch_sizes=[1, 2])
+        self._test_inference_batch_consistent(batch_sizes=[2])
 
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
         self._test_inference_batch_single_identical(
             batch_size=2,
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
+            expected_max_diff=5e-3,
         )
 
     def test_num_images_per_prompt(self):
diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
index 95762e3642..e7b9aa6391 100644
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -499,14 +499,7 @@ class StableDiffusionPipelineFastTests(
         negative_prompt = None
         num_images_per_prompt = 1
         logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion")
-
-        prompt = 25 * "@"
-        with CaptureLogger(logger) as cap_logger_3:
-            negative_text_embeddings_3, text_embeddings_3 = sd_pipe.encode_prompt(
-                prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
-            )
-            if negative_text_embeddings_3 is not None:
-                text_embeddings_3 = torch.cat([negative_text_embeddings_3, text_embeddings_3])
+        logger.setLevel(logging.WARNING)
 
         prompt = 100 * "@"
         with CaptureLogger(logger) as cap_logger:
@@ -516,6 +509,9 @@ class StableDiffusionPipelineFastTests(
             if negative_text_embeddings is not None:
                 text_embeddings = torch.cat([negative_text_embeddings, text_embeddings])
 
+        # 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25
+        assert cap_logger.out.count("@") == 25
+
         negative_prompt = "Hello"
         with CaptureLogger(logger) as cap_logger_2:
             negative_text_embeddings_2, text_embeddings_2 = sd_pipe.encode_prompt(
@@ -524,12 +520,18 @@ class StableDiffusionPipelineFastTests(
             if negative_text_embeddings_2 is not None:
                 text_embeddings_2 = torch.cat([negative_text_embeddings_2, text_embeddings_2])
 
+        assert cap_logger.out == cap_logger_2.out
+
+        prompt = 25 * "@"
+        with CaptureLogger(logger) as cap_logger_3:
+            negative_text_embeddings_3, text_embeddings_3 = sd_pipe.encode_prompt(
+                prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
+            )
+            if negative_text_embeddings_3 is not None:
+                text_embeddings_3 = torch.cat([negative_text_embeddings_3, text_embeddings_3])
+
         assert text_embeddings_3.shape == text_embeddings_2.shape == text_embeddings.shape
         assert text_embeddings.shape[1] == 77
-
-        assert cap_logger.out == cap_logger_2.out
-        # 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25
-        assert cap_logger.out.count("@") == 25
         assert cap_logger_3.out == ""
 
     def test_stable_diffusion_height_width_opt(self):
diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
index 3842dda2e5..2fa4605889 100644
--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
@@ -250,6 +250,7 @@ class StableDiffusion2PipelineFastTests(
         negative_prompt = None
         num_images_per_prompt = 1
         logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion")
+        logger.setLevel(logging.WARNING)
 
         prompt = 25 * "@"
         with CaptureLogger(logger) as cap_logger_3:
diff --git a/tests/pipelines/stable_unclip/test_stable_unclip.py b/tests/pipelines/stable_unclip/test_stable_unclip.py
index 8d5edda169..f7affbe997 100644
--- a/tests/pipelines/stable_unclip/test_stable_unclip.py
+++ b/tests/pipelines/stable_unclip/test_stable_unclip.py
@@ -182,9 +182,7 @@ class StableUnCLIPPipelineFastTests(
     # Overriding PipelineTesterMixin::test_inference_batch_single_identical
     # because UnCLIP undeterminism requires a looser check.
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device in ["cpu", "mps"]
-
-        self._test_inference_batch_single_identical(test_max_difference=test_max_difference)
+        self._test_inference_batch_single_identical(expected_max_diff=1e-3)
 
 
 @slow
diff --git a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py
index 52581eb574..9bbde46e4d 100644
--- a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py
+++ b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py
@@ -196,9 +196,7 @@ class StableUnCLIPImg2ImgPipelineFastTests(
     # Overriding PipelineTesterMixin::test_inference_batch_single_identical
     # because undeterminism requires a looser check.
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device in ["cpu", "mps"]
-
-        self._test_inference_batch_single_identical(test_max_difference=test_max_difference)
+        self._test_inference_batch_single_identical(expected_max_diff=1e-3)
 
     @unittest.skipIf(
         torch_device != "cuda" or not is_xformers_available(),
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 157ff2b699..b1eebbe312 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -374,11 +374,11 @@ class PipelineTesterMixin:
             f"Required optional parameters not present: {remaining_required_optional_parameters}",
         )
 
-    def test_inference_batch_consistent(self, batch_sizes=[2, 4, 13]):
+    def test_inference_batch_consistent(self, batch_sizes=[2]):
         self._test_inference_batch_consistent(batch_sizes=batch_sizes)
 
     def _test_inference_batch_consistent(
-        self, batch_sizes=[2, 4, 13], additional_params_copy_to_batched_inputs=["num_inference_steps"]
+        self, batch_sizes=[2], additional_params_copy_to_batched_inputs=["num_inference_steps"]
     ):
         components = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
@@ -386,137 +386,103 @@ class PipelineTesterMixin:
         pipe.set_progress_bar_config(disable=None)
 
         inputs = self.get_dummy_inputs(torch_device)
+        inputs["generator"] = self.get_generator(0)
 
         logger = logging.get_logger(pipe.__module__)
         logger.setLevel(level=diffusers.logging.FATAL)
 
-        # batchify inputs
+        # prepare batched inputs
+        batched_inputs = []
         for batch_size in batch_sizes:
-            batched_inputs = {}
-            for name, value in inputs.items():
-                if name in self.batch_params:
-                    # prompt is string
-                    if name == "prompt":
-                        len_prompt = len(value)
-                        # make unequal batch sizes
-                        batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)]
+            batched_input = {}
+            batched_input.update(inputs)
+
+            for name in self.batch_params:
+                if name not in inputs:
+                    continue
+
+                value = inputs[name]
+                if name == "prompt":
+                    len_prompt = len(value)
+                    # make unequal batch sizes
+                    batched_input[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)]
+
+                    # make last batch super long
+                    batched_input[name][-1] = 100 * "very long"
 
-                        # make last batch super long
-                        batched_inputs[name][-1] = 100 * "very long"
-                    # or else we have images
-                    else:
-                        batched_inputs[name] = batch_size * [value]
-                elif name == "batch_size":
-                    batched_inputs[name] = batch_size
                 else:
-                    batched_inputs[name] = value
+                    batched_input[name] = batch_size * [value]
 
-            for arg in additional_params_copy_to_batched_inputs:
-                batched_inputs[arg] = inputs[arg]
+            if "generator" in inputs:
+                batched_input["generator"] = [self.get_generator(i) for i in range(batch_size)]
 
-            batched_inputs["output_type"] = "np"
+            if "batch_size" in inputs:
+                batched_input["batch_size"] = batch_size
 
-            if self.pipeline_class.__name__ == "DanceDiffusionPipeline":
-                batched_inputs.pop("output_type")
-
-            output = pipe(**batched_inputs)
-
-            assert len(output[0]) == batch_size
-
-            batched_inputs["output_type"] = "np"
-
-            if self.pipeline_class.__name__ == "DanceDiffusionPipeline":
-                batched_inputs.pop("output_type")
-
-            output = pipe(**batched_inputs)[0]
-
-            assert output.shape[0] == batch_size
+            batched_inputs.append(batched_input)
 
         logger.setLevel(level=diffusers.logging.WARNING)
+        for batch_size, batched_input in zip(batch_sizes, batched_inputs):
+            output = pipe(**batched_input)
+            assert len(output[0]) == batch_size
 
     def test_inference_batch_single_identical(self, batch_size=3, expected_max_diff=1e-4):
         self._test_inference_batch_single_identical(batch_size=batch_size, expected_max_diff=expected_max_diff)
 
     def _test_inference_batch_single_identical(
         self,
-        batch_size=3,
-        test_max_difference=None,
-        test_mean_pixel_difference=None,
-        relax_max_difference=False,
+        batch_size=2,
         expected_max_diff=1e-4,
         additional_params_copy_to_batched_inputs=["num_inference_steps"],
     ):
-        if test_max_difference is None:
-            # TODO(Pedro) - not sure why, but not at all reproducible at the moment it seems
-            # make sure that batched and non-batched is identical
-            test_max_difference = torch_device != "mps"
-
-        if test_mean_pixel_difference is None:
-            # TODO same as above
-            test_mean_pixel_difference = torch_device != "mps"
-
-        generator_device = "cpu"
         components = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
+        for components in pipe.components.values():
+            if hasattr(components, "set_default_attn_processor"):
+                components.set_default_attn_processor()
+
         pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
-
-        inputs = self.get_dummy_inputs(generator_device)
+        inputs = self.get_dummy_inputs(torch_device)
+        # Reset generator in case it is has been used in self.get_dummy_inputs
+        inputs["generator"] = self.get_generator(0)
 
         logger = logging.get_logger(pipe.__module__)
         logger.setLevel(level=diffusers.logging.FATAL)
 
         # batchify inputs
         batched_inputs = {}
-        batch_size = batch_size
-        for name, value in inputs.items():
-            if name in self.batch_params:
-                # prompt is string
-                if name == "prompt":
-                    len_prompt = len(value)
-                    # make unequal batch sizes
-                    batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)]
+        batched_inputs.update(inputs)
+
+        for name in self.batch_params:
+            if name not in inputs:
+                continue
+
+            value = inputs[name]
+            if name == "prompt":
+                len_prompt = len(value)
+                batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)]
+                batched_inputs[name][-1] = 100 * "very long"
 
-                    # make last batch super long
-                    batched_inputs[name][-1] = 100 * "very long"
-                # or else we have images
-                else:
-                    batched_inputs[name] = batch_size * [value]
-            elif name == "batch_size":
-                batched_inputs[name] = batch_size
-            elif name == "generator":
-                batched_inputs[name] = [self.get_generator(i) for i in range(batch_size)]
             else:
-                batched_inputs[name] = value
+                batched_inputs[name] = batch_size * [value]
+
+        if "generator" in inputs:
+            batched_inputs["generator"] = [self.get_generator(i) for i in range(batch_size)]
+
+        if "batch_size" in inputs:
+            batched_inputs["batch_size"] = batch_size
 
         for arg in additional_params_copy_to_batched_inputs:
             batched_inputs[arg] = inputs[arg]
 
-        if self.pipeline_class.__name__ != "DanceDiffusionPipeline":
-            batched_inputs["output_type"] = "np"
-
+        output = pipe(**inputs)
         output_batch = pipe(**batched_inputs)
+
         assert output_batch[0].shape[0] == batch_size
 
-        inputs["generator"] = self.get_generator(0)
-
-        output = pipe(**inputs)
-
-        logger.setLevel(level=diffusers.logging.WARNING)
-        if test_max_difference:
-            if relax_max_difference:
-                # Taking the median of the largest <n> differences
-                # is resilient to outliers
-                diff = np.abs(output_batch[0][0] - output[0][0])
-                diff = diff.flatten()
-                diff.sort()
-                max_diff = np.median(diff[-5:])
-            else:
-                max_diff = np.abs(output_batch[0][0] - output[0][0]).max()
-            assert max_diff < expected_max_diff
-
-        if test_mean_pixel_difference:
-            assert_mean_pixel_difference(output_batch[0][0], output[0][0])
+        max_diff = np.abs(output_batch[0][0] - output[0][0]).max()
+        assert max_diff < expected_max_diff
 
     def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4):
         components = self.get_dummy_components()
@@ -528,8 +494,9 @@ class PipelineTesterMixin:
         pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
 
-        output = pipe(**self.get_dummy_inputs(torch_device))[0]
-        output_tuple = pipe(**self.get_dummy_inputs(torch_device), return_dict=False)[0]
+        generator_device = "cpu"
+        output = pipe(**self.get_dummy_inputs(generator_device))[0]
+        output_tuple = pipe(**self.get_dummy_inputs(generator_device), return_dict=False)[0]
 
         max_diff = np.abs(to_np(output) - to_np(output_tuple)).max()
         self.assertLess(max_diff, expected_max_difference)
@@ -710,11 +677,12 @@ class PipelineTesterMixin:
         pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
 
-        inputs = self.get_dummy_inputs(torch_device)
+        generator_device = "cpu"
+        inputs = self.get_dummy_inputs(generator_device)
         output_without_slicing = pipe(**inputs)[0]
 
         pipe.enable_attention_slicing(slice_size=1)
-        inputs = self.get_dummy_inputs(torch_device)
+        inputs = self.get_dummy_inputs(generator_device)
         output_with_slicing = pipe(**inputs)[0]
 
         if test_max_difference:
diff --git a/tests/pipelines/text_to_video/test_text_to_video.py b/tests/pipelines/text_to_video/test_text_to_video.py
index e03c8fc5df..2c47dc492d 100644
--- a/tests/pipelines/text_to_video/test_text_to_video.py
+++ b/tests/pipelines/text_to_video/test_text_to_video.py
@@ -62,14 +62,14 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
     def get_dummy_components(self):
         torch.manual_seed(0)
         unet = UNet3DConditionModel(
-            block_out_channels=(32, 64, 64, 64),
+            block_out_channels=(32, 32),
             layers_per_block=2,
             sample_size=32,
             in_channels=4,
             out_channels=4,
-            down_block_types=("CrossAttnDownBlock3D", "CrossAttnDownBlock3D", "CrossAttnDownBlock3D", "DownBlock3D"),
-            up_block_types=("UpBlock3D", "CrossAttnUpBlock3D", "CrossAttnUpBlock3D", "CrossAttnUpBlock3D"),
-            cross_attention_dim=32,
+            down_block_types=("CrossAttnDownBlock3D", "DownBlock3D"),
+            up_block_types=("UpBlock3D", "CrossAttnUpBlock3D"),
+            cross_attention_dim=4,
             attention_head_dim=4,
         )
         scheduler = DDIMScheduler(
@@ -81,27 +81,27 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         )
         torch.manual_seed(0)
         vae = AutoencoderKL(
-            block_out_channels=[32, 64],
+            block_out_channels=(32,),
             in_channels=3,
             out_channels=3,
-            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
-            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
+            down_block_types=["DownEncoderBlock2D"],
+            up_block_types=["UpDecoderBlock2D"],
             latent_channels=4,
-            sample_size=128,
+            sample_size=32,
         )
         torch.manual_seed(0)
         text_encoder_config = CLIPTextConfig(
             bos_token_id=0,
             eos_token_id=2,
-            hidden_size=32,
-            intermediate_size=37,
+            hidden_size=4,
+            intermediate_size=16,
             layer_norm_eps=1e-05,
-            num_attention_heads=4,
-            num_hidden_layers=5,
+            num_attention_heads=2,
+            num_hidden_layers=2,
             pad_token_id=1,
             vocab_size=1000,
             hidden_act="gelu",
-            projection_dim=512,
+            projection_dim=32,
         )
         text_encoder = CLIPTextModel(text_encoder_config)
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
@@ -141,8 +141,8 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         frames = sd_pipe(**inputs).frames
         image_slice = frames[0][-3:, -3:, -1]
 
-        assert frames[0].shape == (64, 64, 3)
-        expected_slice = np.array([158.0, 160.0, 153.0, 125.0, 100.0, 121.0, 111.0, 93.0, 113.0])
+        assert frames[0].shape == (32, 32, 3)
+        expected_slice = np.array([91.0, 152.0, 66.0, 192.0, 94.0, 126.0, 101.0, 123.0, 152.0])
 
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
diff --git a/tests/pipelines/text_to_video/test_video_to_video.py b/tests/pipelines/text_to_video/test_video_to_video.py
index 6b1c44ceb0..f057eb3499 100644
--- a/tests/pipelines/text_to_video/test_video_to_video.py
+++ b/tests/pipelines/text_to_video/test_video_to_video.py
@@ -82,7 +82,7 @@ class VideoToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
             beta_start=0.00085,
             beta_end=0.012,
             beta_schedule="scaled_linear",
-            clip_sample=False,
+            clip_sample=True,
             set_alpha_to_one=False,
         )
         torch.manual_seed(0)
diff --git a/tests/pipelines/unclip/test_unclip.py b/tests/pipelines/unclip/test_unclip.py
index 2fe6033712..98e105bbb7 100644
--- a/tests/pipelines/unclip/test_unclip.py
+++ b/tests/pipelines/unclip/test_unclip.py
@@ -373,8 +373,6 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
     # because UnCLIP undeterminism requires a looser check.
     @skip_mps
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
         additional_params_copy_to_batched_inputs = [
             "prior_num_inference_steps",
             "decoder_num_inference_steps",
@@ -382,9 +380,7 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
         ]
 
         self._test_inference_batch_single_identical(
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
-            additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs,
+            additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs, expected_max_diff=5e-3
         )
 
     def test_inference_batch_consistent(self):
diff --git a/tests/pipelines/unclip/test_unclip_image_variation.py b/tests/pipelines/unclip/test_unclip_image_variation.py
index 6affa22d15..c2adba4a69 100644
--- a/tests/pipelines/unclip/test_unclip_image_variation.py
+++ b/tests/pipelines/unclip/test_unclip_image_variation.py
@@ -448,17 +448,12 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa
     # because UnCLIP undeterminism requires a looser check.
     @skip_mps
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
         additional_params_copy_to_batched_inputs = [
             "decoder_num_inference_steps",
             "super_res_num_inference_steps",
         ]
-
         self._test_inference_batch_single_identical(
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
-            additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs,
+            additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs, expected_max_diff=5e-3
         )
 
     def test_inference_batch_consistent(self):
diff --git a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py
index 7891056d10..1442196251 100644
--- a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py
+++ b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py
@@ -170,15 +170,7 @@ class WuerstchenDecoderPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
 
     @skip_mps
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
-        test_mean_pixel_difference = False
-
-        self._test_inference_batch_single_identical(
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
-            test_mean_pixel_difference=test_mean_pixel_difference,
-        )
+        self._test_inference_batch_single_identical(expected_max_diff=1e-5)
 
     @skip_mps
     def test_attention_slicing_forward_pass(self):
diff --git a/tests/pipelines/wuerstchen/test_wuerstchen_prior.py b/tests/pipelines/wuerstchen/test_wuerstchen_prior.py
index 045729b30b..b8f51c9584 100644
--- a/tests/pipelines/wuerstchen/test_wuerstchen_prior.py
+++ b/tests/pipelines/wuerstchen/test_wuerstchen_prior.py
@@ -166,14 +166,7 @@ class WuerstchenPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
 
     @skip_mps
     def test_inference_batch_single_identical(self):
-        test_max_difference = torch_device == "cpu"
-        relax_max_difference = True
-        test_mean_pixel_difference = False
-
         self._test_inference_batch_single_identical(
-            test_max_difference=test_max_difference,
-            relax_max_difference=relax_max_difference,
-            test_mean_pixel_difference=test_mean_pixel_difference,
             expected_max_diff=2e-1,
         )