1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

[Tests] Speed up fast pipelines part II (#7521)

* start printing the tensors.

* print full throttle

* set static slices for 7 tests.

* remove printing.

* flatten

* disable test for controlnet

* what happens when things are seeded properly?

* set the right value

* style./

* make pia test fail to check things

* print.

* fix pia.

* checking for animatediff.

* fix: animatediff.

* video synthesis

* final piece.

* style.

* print guess.

* fix: assertion for control guess.

---------

Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>
This commit is contained in:
Sayak Paul
2024-04-02 13:24:56 +05:30
committed by GitHub
parent 000fa82a1e
commit 2b04ec2ff7
10 changed files with 76 additions and 11 deletions

View File

@@ -172,6 +172,12 @@ class AnimateDiffPipelineFastTests(
)
return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.4051, 0.4495, 0.4480, 0.5845, 0.4172, 0.6066, 0.4205, 0.3786, 0.5323])
return super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
def test_inference_batch_single_identical(
self,
batch_size=2,

View File

@@ -28,7 +28,7 @@ from diffusers import (
PNDMScheduler,
UNet2DConditionModel,
)
from diffusers.utils.testing_utils import enable_full_determinism
from diffusers.utils.testing_utils import enable_full_determinism, torch_device
from src.diffusers.pipelines.blip_diffusion.blip_image_processing import BlipImageProcessor
from src.diffusers.pipelines.blip_diffusion.modeling_blip2 import Blip2QFormerModel
from src.diffusers.pipelines.blip_diffusion.modeling_ctx_clip import ContextCLIPTextModel
@@ -196,6 +196,12 @@ class BlipDiffusionControlNetPipelineFastTests(PipelineTesterMixin, unittest.Tes
}
return inputs
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.4803, 0.3865, 0.1422, 0.6119, 0.2283, 0.6365, 0.5453, 0.5205, 0.3581])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
def test_blipdiffusion_controlnet(self):
device = "cpu"
components = self.get_dummy_components()

View File

@@ -57,6 +57,7 @@ class ControlNetPipelineSDXLFastTests(
image_latents_params = TEXT_TO_IMAGE_IMAGE_PARAMS
def get_dummy_components(self):
torch.manual_seed(0)
unet = UNet2DConditionModel(
block_out_channels=(32, 64),
layers_per_block=2,
@@ -74,6 +75,7 @@ class ControlNetPipelineSDXLFastTests(
projection_class_embeddings_input_dim=80, # 6 * 8 + 32
cross_attention_dim=64,
)
torch.manual_seed(0)
controlnet = ControlNetModel(
block_out_channels=(32, 64),
layers_per_block=2,
@@ -123,6 +125,7 @@ class ControlNetPipelineSDXLFastTests(
text_encoder = CLIPTextModel(text_encoder_config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
torch.manual_seed(0)
text_encoder_2 = CLIPTextModelWithProjection(text_encoder_config)
tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
@@ -182,6 +185,12 @@ class ControlNetPipelineSDXLFastTests(
def test_attention_slicing_forward_pass(self):
return self._test_attention_slicing_forward_pass(expected_max_diff=2e-3)
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.5490, 0.5053, 0.4676, 0.5816, 0.5364, 0.4830, 0.5937, 0.5719, 0.4318])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
@unittest.skipIf(
torch_device != "cuda" or not is_xformers_available(),
reason="XFormers attention is only available with CUDA and `xformers` installed",
@@ -289,9 +298,7 @@ class ControlNetPipelineSDXLFastTests(
output = sd_pipe(**inputs)
image_slice = output.images[0, -3:, -3:, -1]
expected_slice = np.array(
[0.5381963, 0.4836803, 0.45821992, 0.5577731, 0.51210403, 0.4794795, 0.59282357, 0.5647199, 0.43100584]
)
expected_slice = np.array([0.549, 0.5053, 0.4676, 0.5816, 0.5364, 0.483, 0.5937, 0.5719, 0.4318])
# make sure that it's equal
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-4

View File

@@ -36,6 +36,7 @@ from diffusers.utils.testing_utils import (
load_image,
require_torch_gpu,
slow,
torch_device,
)
from ..pipeline_params import (
@@ -152,6 +153,12 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
}
return inputs
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.5762, 0.6112, 0.4150, 0.6018, 0.6167, 0.4626, 0.5426, 0.5641, 0.6536])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
def test_kandinsky3_img2img(self):
device = "cpu"

View File

@@ -175,6 +175,12 @@ class PIAPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, PipelineFr
)
return super().test_ip_adapter_single(expected_pipe_slice=expected_pipe_slice)
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.3740, 0.4284, 0.4038, 0.5417, 0.4405, 0.5521, 0.4273, 0.4124, 0.4997])
return super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
@unittest.skip("Attention slicing is not enabled in this pipeline")
def test_attention_slicing_forward_pass(self):
pass

View File

@@ -32,6 +32,7 @@ from diffusers.utils.testing_utils import (
numpy_cosine_similarity_distance,
require_torch_gpu,
skip_mps,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
@@ -153,6 +154,12 @@ class StableDiffusionAttendAndExcitePipelineFastTests(
}
return inputs
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.6391, 0.6290, 0.4860, 0.5134, 0.5550, 0.4577, 0.5033, 0.5023, 0.4538])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice, expected_max_difference=3e-3)
def test_inference(self):
device = "cpu"
@@ -182,9 +189,6 @@ class StableDiffusionAttendAndExcitePipelineFastTests(
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(batch_size=2, expected_max_diff=7e-4)
def test_dict_tuple_outputs_equivalent(self):
super().test_dict_tuple_outputs_equivalent(expected_max_difference=3e-3)
def test_pt_np_pil_outputs_equivalent(self):
super().test_pt_np_pil_outputs_equivalent(expected_max_diff=5e-4)

View File

@@ -35,7 +35,7 @@ from diffusers import (
)
from diffusers.pipelines.stable_diffusion import CLIPImageProjection
from diffusers.utils import load_image
from diffusers.utils.testing_utils import enable_full_determinism
from diffusers.utils.testing_utils import enable_full_determinism, torch_device
from ..pipeline_params import (
TEXT_TO_IMAGE_BATCH_PARAMS,
@@ -160,6 +160,12 @@ class GligenTextImagePipelineFastTests(
}
return inputs
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.5052, 0.5546, 0.4567, 0.4770, 0.5195, 0.4085, 0.5026, 0.4909, 0.4495])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
def test_stable_diffusion_gligen_text_image_default_case(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components()

View File

@@ -1037,7 +1037,7 @@ class PipelineTesterMixin:
max_diff = np.abs(to_np(output_batch[0][0]) - to_np(output[0][0])).max()
assert max_diff < expected_max_diff
def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4):
def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4):
components = self.get_dummy_components()
pipe = self.pipeline_class(**components)
for component in pipe.components.values():
@@ -1048,10 +1048,21 @@ class PipelineTesterMixin:
pipe.set_progress_bar_config(disable=None)
generator_device = "cpu"
output = pipe(**self.get_dummy_inputs(generator_device))[0]
if expected_slice is None:
output = pipe(**self.get_dummy_inputs(generator_device))[0]
else:
output = expected_slice
output_tuple = pipe(**self.get_dummy_inputs(generator_device), return_dict=False)[0]
max_diff = np.abs(to_np(output) - to_np(output_tuple)).max()
if expected_slice is None:
max_diff = np.abs(to_np(output) - to_np(output_tuple)).max()
else:
if output_tuple.ndim != 5:
max_diff = np.abs(to_np(output) - to_np(output_tuple)[0, -3:, -3:, -1].flatten()).max()
else:
max_diff = np.abs(to_np(output) - to_np(output_tuple)[0, -3:, -3:, -1, -1].flatten()).max()
self.assertLess(max_diff, expected_max_difference)
def test_components_function(self):

View File

@@ -133,6 +133,12 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, SDFunctionTesterMixin,
}
return inputs
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.4903, 0.5649, 0.5504, 0.5179, 0.4821, 0.5466, 0.4131, 0.5052, 0.5077])
return super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
def test_text_to_video_default_case(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components()

View File

@@ -206,6 +206,12 @@ class UniDiffuserPipelineFastTests(
}
return inputs
def test_dict_tuple_outputs_equivalent(self):
expected_slice = None
if torch_device == "cpu":
expected_slice = np.array([0.7489, 0.3722, 0.4475, 0.5630, 0.5923, 0.4992, 0.3936, 0.5844, 0.4975])
super().test_dict_tuple_outputs_equivalent(expected_slice=expected_slice)
def test_unidiffuser_default_joint_v0(self):
device = "cpu" # ensure determinism for the device-dependent torch.Generator
components = self.get_dummy_components()