mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
Fast Tests on PR improvements: Batch Tests fixes (#5080)
* fix test * initial commit * change test * updates: * fix tests * test fix * test fix * fix tests * make test faster * clean up * fix precision in test * fix precision * Fix tests * Fix logging test * fix test * fix test --------- Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
@@ -359,7 +359,7 @@ class AudioLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
self._test_attention_slicing_forward_pass(test_mean_pixel_difference=False)
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(test_mean_pixel_difference=False)
|
||||
self._test_inference_batch_single_identical()
|
||||
|
||||
@unittest.skipIf(
|
||||
torch_device != "cuda" or not is_xformers_available(),
|
||||
|
||||
@@ -459,7 +459,7 @@ class AudioLDM2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
# increase tolerance from 1e-4 -> 2e-4 to account for large composite model
|
||||
self._test_inference_batch_single_identical(test_mean_pixel_difference=False, expected_max_diff=2e-4)
|
||||
self._test_inference_batch_single_identical(expected_max_diff=2e-4)
|
||||
|
||||
def test_save_load_local(self):
|
||||
# increase tolerance from 1e-4 -> 2e-4 to account for large composite model
|
||||
|
||||
@@ -96,7 +96,7 @@ class DiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
self.assertLessEqual(max_diff, 1e-3)
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(relax_max_difference=True, expected_max_diff=1e-3)
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||
|
||||
@unittest.skipIf(
|
||||
torch_device != "cuda" or not is_xformers_available(),
|
||||
|
||||
@@ -224,15 +224,7 @@ class KandinskyPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
@skip_mps
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
test_mean_pixel_difference = False
|
||||
|
||||
self._test_inference_batch_single_identical(
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
test_mean_pixel_difference=test_mean_pixel_difference,
|
||||
)
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-2)
|
||||
|
||||
@skip_mps
|
||||
def test_attention_slicing_forward_pass(self):
|
||||
|
||||
@@ -224,15 +224,7 @@ class KandinskyV22PriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
|
||||
@skip_mps
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
test_mean_pixel_difference = False
|
||||
|
||||
self._test_inference_batch_single_identical(
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
test_mean_pixel_difference=test_mean_pixel_difference,
|
||||
)
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||
|
||||
@skip_mps
|
||||
def test_attention_slicing_forward_pass(self):
|
||||
|
||||
@@ -234,15 +234,7 @@ class KandinskyV22PriorEmb2EmbPipelineFastTests(PipelineTesterMixin, unittest.Te
|
||||
|
||||
@skip_mps
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
test_mean_pixel_difference = False
|
||||
|
||||
self._test_inference_batch_single_identical(
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
test_mean_pixel_difference=test_mean_pixel_difference,
|
||||
)
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-2)
|
||||
|
||||
@skip_mps
|
||||
def test_attention_slicing_forward_pass(self):
|
||||
|
||||
@@ -373,7 +373,7 @@ class MusicLDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
self._test_attention_slicing_forward_pass(test_mean_pixel_difference=False)
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
self._test_inference_batch_single_identical(test_mean_pixel_difference=False)
|
||||
self._test_inference_batch_single_identical()
|
||||
|
||||
@unittest.skipIf(
|
||||
torch_device != "cuda" or not is_xformers_available(),
|
||||
|
||||
@@ -44,11 +44,11 @@ class ShapEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
@property
|
||||
def text_embedder_hidden_size(self):
|
||||
return 32
|
||||
return 16
|
||||
|
||||
@property
|
||||
def time_input_dim(self):
|
||||
return 32
|
||||
return 16
|
||||
|
||||
@property
|
||||
def time_embed_dim(self):
|
||||
@@ -201,14 +201,7 @@ class ShapEPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
self._test_inference_batch_consistent(batch_sizes=[1, 2])
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
|
||||
self._test_inference_batch_single_identical(
|
||||
batch_size=2,
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
)
|
||||
self._test_inference_batch_single_identical(batch_size=2, expected_max_diff=6e-3)
|
||||
|
||||
def test_num_images_per_prompt(self):
|
||||
components = self.get_dummy_components()
|
||||
|
||||
@@ -52,11 +52,11 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
@property
|
||||
def text_embedder_hidden_size(self):
|
||||
return 32
|
||||
return 16
|
||||
|
||||
@property
|
||||
def time_input_dim(self):
|
||||
return 32
|
||||
return 16
|
||||
|
||||
@property
|
||||
def time_embed_dim(self):
|
||||
@@ -71,10 +71,10 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
config = CLIPVisionConfig(
|
||||
hidden_size=self.text_embedder_hidden_size,
|
||||
image_size=64,
|
||||
image_size=32,
|
||||
projection_dim=self.text_embedder_hidden_size,
|
||||
intermediate_size=37,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=24,
|
||||
num_attention_heads=2,
|
||||
num_channels=3,
|
||||
num_hidden_layers=5,
|
||||
patch_size=1,
|
||||
@@ -170,7 +170,7 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
return components
|
||||
|
||||
def get_dummy_inputs(self, device, seed=0):
|
||||
input_image = floats_tensor((1, 3, 64, 64), rng=random.Random(seed)).to(device)
|
||||
input_image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device)
|
||||
|
||||
if str(device).startswith("mps"):
|
||||
generator = torch.manual_seed(seed)
|
||||
@@ -219,15 +219,12 @@ class ShapEImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
def test_inference_batch_consistent(self):
|
||||
# NOTE: Larger batch sizes cause this test to timeout, only test on smaller batches
|
||||
self._test_inference_batch_consistent(batch_sizes=[1, 2])
|
||||
self._test_inference_batch_consistent(batch_sizes=[2])
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
self._test_inference_batch_single_identical(
|
||||
batch_size=2,
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
expected_max_diff=5e-3,
|
||||
)
|
||||
|
||||
def test_num_images_per_prompt(self):
|
||||
|
||||
@@ -499,14 +499,7 @@ class StableDiffusionPipelineFastTests(
|
||||
negative_prompt = None
|
||||
num_images_per_prompt = 1
|
||||
logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion")
|
||||
|
||||
prompt = 25 * "@"
|
||||
with CaptureLogger(logger) as cap_logger_3:
|
||||
negative_text_embeddings_3, text_embeddings_3 = sd_pipe.encode_prompt(
|
||||
prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
|
||||
)
|
||||
if negative_text_embeddings_3 is not None:
|
||||
text_embeddings_3 = torch.cat([negative_text_embeddings_3, text_embeddings_3])
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
prompt = 100 * "@"
|
||||
with CaptureLogger(logger) as cap_logger:
|
||||
@@ -516,6 +509,9 @@ class StableDiffusionPipelineFastTests(
|
||||
if negative_text_embeddings is not None:
|
||||
text_embeddings = torch.cat([negative_text_embeddings, text_embeddings])
|
||||
|
||||
# 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25
|
||||
assert cap_logger.out.count("@") == 25
|
||||
|
||||
negative_prompt = "Hello"
|
||||
with CaptureLogger(logger) as cap_logger_2:
|
||||
negative_text_embeddings_2, text_embeddings_2 = sd_pipe.encode_prompt(
|
||||
@@ -524,12 +520,18 @@ class StableDiffusionPipelineFastTests(
|
||||
if negative_text_embeddings_2 is not None:
|
||||
text_embeddings_2 = torch.cat([negative_text_embeddings_2, text_embeddings_2])
|
||||
|
||||
assert cap_logger.out == cap_logger_2.out
|
||||
|
||||
prompt = 25 * "@"
|
||||
with CaptureLogger(logger) as cap_logger_3:
|
||||
negative_text_embeddings_3, text_embeddings_3 = sd_pipe.encode_prompt(
|
||||
prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
|
||||
)
|
||||
if negative_text_embeddings_3 is not None:
|
||||
text_embeddings_3 = torch.cat([negative_text_embeddings_3, text_embeddings_3])
|
||||
|
||||
assert text_embeddings_3.shape == text_embeddings_2.shape == text_embeddings.shape
|
||||
assert text_embeddings.shape[1] == 77
|
||||
|
||||
assert cap_logger.out == cap_logger_2.out
|
||||
# 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25
|
||||
assert cap_logger.out.count("@") == 25
|
||||
assert cap_logger_3.out == ""
|
||||
|
||||
def test_stable_diffusion_height_width_opt(self):
|
||||
|
||||
@@ -250,6 +250,7 @@ class StableDiffusion2PipelineFastTests(
|
||||
negative_prompt = None
|
||||
num_images_per_prompt = 1
|
||||
logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion")
|
||||
logger.setLevel(logging.WARNING)
|
||||
|
||||
prompt = 25 * "@"
|
||||
with CaptureLogger(logger) as cap_logger_3:
|
||||
|
||||
@@ -182,9 +182,7 @@ class StableUnCLIPPipelineFastTests(
|
||||
# Overriding PipelineTesterMixin::test_inference_batch_single_identical
|
||||
# because UnCLIP undeterminism requires a looser check.
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device in ["cpu", "mps"]
|
||||
|
||||
self._test_inference_batch_single_identical(test_max_difference=test_max_difference)
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||
|
||||
|
||||
@slow
|
||||
|
||||
@@ -196,9 +196,7 @@ class StableUnCLIPImg2ImgPipelineFastTests(
|
||||
# Overriding PipelineTesterMixin::test_inference_batch_single_identical
|
||||
# because undeterminism requires a looser check.
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device in ["cpu", "mps"]
|
||||
|
||||
self._test_inference_batch_single_identical(test_max_difference=test_max_difference)
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-3)
|
||||
|
||||
@unittest.skipIf(
|
||||
torch_device != "cuda" or not is_xformers_available(),
|
||||
|
||||
@@ -374,11 +374,11 @@ class PipelineTesterMixin:
|
||||
f"Required optional parameters not present: {remaining_required_optional_parameters}",
|
||||
)
|
||||
|
||||
def test_inference_batch_consistent(self, batch_sizes=[2, 4, 13]):
|
||||
def test_inference_batch_consistent(self, batch_sizes=[2]):
|
||||
self._test_inference_batch_consistent(batch_sizes=batch_sizes)
|
||||
|
||||
def _test_inference_batch_consistent(
|
||||
self, batch_sizes=[2, 4, 13], additional_params_copy_to_batched_inputs=["num_inference_steps"]
|
||||
self, batch_sizes=[2], additional_params_copy_to_batched_inputs=["num_inference_steps"]
|
||||
):
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
@@ -386,137 +386,103 @@ class PipelineTesterMixin:
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
inputs["generator"] = self.get_generator(0)
|
||||
|
||||
logger = logging.get_logger(pipe.__module__)
|
||||
logger.setLevel(level=diffusers.logging.FATAL)
|
||||
|
||||
# batchify inputs
|
||||
# prepare batched inputs
|
||||
batched_inputs = []
|
||||
for batch_size in batch_sizes:
|
||||
batched_inputs = {}
|
||||
for name, value in inputs.items():
|
||||
if name in self.batch_params:
|
||||
# prompt is string
|
||||
if name == "prompt":
|
||||
len_prompt = len(value)
|
||||
# make unequal batch sizes
|
||||
batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)]
|
||||
batched_input = {}
|
||||
batched_input.update(inputs)
|
||||
|
||||
for name in self.batch_params:
|
||||
if name not in inputs:
|
||||
continue
|
||||
|
||||
value = inputs[name]
|
||||
if name == "prompt":
|
||||
len_prompt = len(value)
|
||||
# make unequal batch sizes
|
||||
batched_input[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)]
|
||||
|
||||
# make last batch super long
|
||||
batched_input[name][-1] = 100 * "very long"
|
||||
|
||||
# make last batch super long
|
||||
batched_inputs[name][-1] = 100 * "very long"
|
||||
# or else we have images
|
||||
else:
|
||||
batched_inputs[name] = batch_size * [value]
|
||||
elif name == "batch_size":
|
||||
batched_inputs[name] = batch_size
|
||||
else:
|
||||
batched_inputs[name] = value
|
||||
batched_input[name] = batch_size * [value]
|
||||
|
||||
for arg in additional_params_copy_to_batched_inputs:
|
||||
batched_inputs[arg] = inputs[arg]
|
||||
if "generator" in inputs:
|
||||
batched_input["generator"] = [self.get_generator(i) for i in range(batch_size)]
|
||||
|
||||
batched_inputs["output_type"] = "np"
|
||||
if "batch_size" in inputs:
|
||||
batched_input["batch_size"] = batch_size
|
||||
|
||||
if self.pipeline_class.__name__ == "DanceDiffusionPipeline":
|
||||
batched_inputs.pop("output_type")
|
||||
|
||||
output = pipe(**batched_inputs)
|
||||
|
||||
assert len(output[0]) == batch_size
|
||||
|
||||
batched_inputs["output_type"] = "np"
|
||||
|
||||
if self.pipeline_class.__name__ == "DanceDiffusionPipeline":
|
||||
batched_inputs.pop("output_type")
|
||||
|
||||
output = pipe(**batched_inputs)[0]
|
||||
|
||||
assert output.shape[0] == batch_size
|
||||
batched_inputs.append(batched_input)
|
||||
|
||||
logger.setLevel(level=diffusers.logging.WARNING)
|
||||
for batch_size, batched_input in zip(batch_sizes, batched_inputs):
|
||||
output = pipe(**batched_input)
|
||||
assert len(output[0]) == batch_size
|
||||
|
||||
def test_inference_batch_single_identical(self, batch_size=3, expected_max_diff=1e-4):
|
||||
self._test_inference_batch_single_identical(batch_size=batch_size, expected_max_diff=expected_max_diff)
|
||||
|
||||
def _test_inference_batch_single_identical(
|
||||
self,
|
||||
batch_size=3,
|
||||
test_max_difference=None,
|
||||
test_mean_pixel_difference=None,
|
||||
relax_max_difference=False,
|
||||
batch_size=2,
|
||||
expected_max_diff=1e-4,
|
||||
additional_params_copy_to_batched_inputs=["num_inference_steps"],
|
||||
):
|
||||
if test_max_difference is None:
|
||||
# TODO(Pedro) - not sure why, but not at all reproducible at the moment it seems
|
||||
# make sure that batched and non-batched is identical
|
||||
test_max_difference = torch_device != "mps"
|
||||
|
||||
if test_mean_pixel_difference is None:
|
||||
# TODO same as above
|
||||
test_mean_pixel_difference = torch_device != "mps"
|
||||
|
||||
generator_device = "cpu"
|
||||
components = self.get_dummy_components()
|
||||
pipe = self.pipeline_class(**components)
|
||||
for components in pipe.components.values():
|
||||
if hasattr(components, "set_default_attn_processor"):
|
||||
components.set_default_attn_processor()
|
||||
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_dummy_inputs(generator_device)
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
# Reset generator in case it is has been used in self.get_dummy_inputs
|
||||
inputs["generator"] = self.get_generator(0)
|
||||
|
||||
logger = logging.get_logger(pipe.__module__)
|
||||
logger.setLevel(level=diffusers.logging.FATAL)
|
||||
|
||||
# batchify inputs
|
||||
batched_inputs = {}
|
||||
batch_size = batch_size
|
||||
for name, value in inputs.items():
|
||||
if name in self.batch_params:
|
||||
# prompt is string
|
||||
if name == "prompt":
|
||||
len_prompt = len(value)
|
||||
# make unequal batch sizes
|
||||
batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)]
|
||||
batched_inputs.update(inputs)
|
||||
|
||||
for name in self.batch_params:
|
||||
if name not in inputs:
|
||||
continue
|
||||
|
||||
value = inputs[name]
|
||||
if name == "prompt":
|
||||
len_prompt = len(value)
|
||||
batched_inputs[name] = [value[: len_prompt // i] for i in range(1, batch_size + 1)]
|
||||
batched_inputs[name][-1] = 100 * "very long"
|
||||
|
||||
# make last batch super long
|
||||
batched_inputs[name][-1] = 100 * "very long"
|
||||
# or else we have images
|
||||
else:
|
||||
batched_inputs[name] = batch_size * [value]
|
||||
elif name == "batch_size":
|
||||
batched_inputs[name] = batch_size
|
||||
elif name == "generator":
|
||||
batched_inputs[name] = [self.get_generator(i) for i in range(batch_size)]
|
||||
else:
|
||||
batched_inputs[name] = value
|
||||
batched_inputs[name] = batch_size * [value]
|
||||
|
||||
if "generator" in inputs:
|
||||
batched_inputs["generator"] = [self.get_generator(i) for i in range(batch_size)]
|
||||
|
||||
if "batch_size" in inputs:
|
||||
batched_inputs["batch_size"] = batch_size
|
||||
|
||||
for arg in additional_params_copy_to_batched_inputs:
|
||||
batched_inputs[arg] = inputs[arg]
|
||||
|
||||
if self.pipeline_class.__name__ != "DanceDiffusionPipeline":
|
||||
batched_inputs["output_type"] = "np"
|
||||
|
||||
output = pipe(**inputs)
|
||||
output_batch = pipe(**batched_inputs)
|
||||
|
||||
assert output_batch[0].shape[0] == batch_size
|
||||
|
||||
inputs["generator"] = self.get_generator(0)
|
||||
|
||||
output = pipe(**inputs)
|
||||
|
||||
logger.setLevel(level=diffusers.logging.WARNING)
|
||||
if test_max_difference:
|
||||
if relax_max_difference:
|
||||
# Taking the median of the largest <n> differences
|
||||
# is resilient to outliers
|
||||
diff = np.abs(output_batch[0][0] - output[0][0])
|
||||
diff = diff.flatten()
|
||||
diff.sort()
|
||||
max_diff = np.median(diff[-5:])
|
||||
else:
|
||||
max_diff = np.abs(output_batch[0][0] - output[0][0]).max()
|
||||
assert max_diff < expected_max_diff
|
||||
|
||||
if test_mean_pixel_difference:
|
||||
assert_mean_pixel_difference(output_batch[0][0], output[0][0])
|
||||
max_diff = np.abs(output_batch[0][0] - output[0][0]).max()
|
||||
assert max_diff < expected_max_diff
|
||||
|
||||
def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4):
|
||||
components = self.get_dummy_components()
|
||||
@@ -528,8 +494,9 @@ class PipelineTesterMixin:
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
output = pipe(**self.get_dummy_inputs(torch_device))[0]
|
||||
output_tuple = pipe(**self.get_dummy_inputs(torch_device), return_dict=False)[0]
|
||||
generator_device = "cpu"
|
||||
output = pipe(**self.get_dummy_inputs(generator_device))[0]
|
||||
output_tuple = pipe(**self.get_dummy_inputs(generator_device), return_dict=False)[0]
|
||||
|
||||
max_diff = np.abs(to_np(output) - to_np(output_tuple)).max()
|
||||
self.assertLess(max_diff, expected_max_difference)
|
||||
@@ -710,11 +677,12 @@ class PipelineTesterMixin:
|
||||
pipe.to(torch_device)
|
||||
pipe.set_progress_bar_config(disable=None)
|
||||
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
generator_device = "cpu"
|
||||
inputs = self.get_dummy_inputs(generator_device)
|
||||
output_without_slicing = pipe(**inputs)[0]
|
||||
|
||||
pipe.enable_attention_slicing(slice_size=1)
|
||||
inputs = self.get_dummy_inputs(torch_device)
|
||||
inputs = self.get_dummy_inputs(generator_device)
|
||||
output_with_slicing = pipe(**inputs)[0]
|
||||
|
||||
if test_max_difference:
|
||||
|
||||
@@ -62,14 +62,14 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
unet = UNet3DConditionModel(
|
||||
block_out_channels=(32, 64, 64, 64),
|
||||
block_out_channels=(32, 32),
|
||||
layers_per_block=2,
|
||||
sample_size=32,
|
||||
in_channels=4,
|
||||
out_channels=4,
|
||||
down_block_types=("CrossAttnDownBlock3D", "CrossAttnDownBlock3D", "CrossAttnDownBlock3D", "DownBlock3D"),
|
||||
up_block_types=("UpBlock3D", "CrossAttnUpBlock3D", "CrossAttnUpBlock3D", "CrossAttnUpBlock3D"),
|
||||
cross_attention_dim=32,
|
||||
down_block_types=("CrossAttnDownBlock3D", "DownBlock3D"),
|
||||
up_block_types=("UpBlock3D", "CrossAttnUpBlock3D"),
|
||||
cross_attention_dim=4,
|
||||
attention_head_dim=4,
|
||||
)
|
||||
scheduler = DDIMScheduler(
|
||||
@@ -81,27 +81,27 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
vae = AutoencoderKL(
|
||||
block_out_channels=[32, 64],
|
||||
block_out_channels=(32,),
|
||||
in_channels=3,
|
||||
out_channels=3,
|
||||
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
|
||||
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
|
||||
down_block_types=["DownEncoderBlock2D"],
|
||||
up_block_types=["UpDecoderBlock2D"],
|
||||
latent_channels=4,
|
||||
sample_size=128,
|
||||
sample_size=32,
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
text_encoder_config = CLIPTextConfig(
|
||||
bos_token_id=0,
|
||||
eos_token_id=2,
|
||||
hidden_size=32,
|
||||
intermediate_size=37,
|
||||
hidden_size=4,
|
||||
intermediate_size=16,
|
||||
layer_norm_eps=1e-05,
|
||||
num_attention_heads=4,
|
||||
num_hidden_layers=5,
|
||||
num_attention_heads=2,
|
||||
num_hidden_layers=2,
|
||||
pad_token_id=1,
|
||||
vocab_size=1000,
|
||||
hidden_act="gelu",
|
||||
projection_dim=512,
|
||||
projection_dim=32,
|
||||
)
|
||||
text_encoder = CLIPTextModel(text_encoder_config)
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
@@ -141,8 +141,8 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
frames = sd_pipe(**inputs).frames
|
||||
image_slice = frames[0][-3:, -3:, -1]
|
||||
|
||||
assert frames[0].shape == (64, 64, 3)
|
||||
expected_slice = np.array([158.0, 160.0, 153.0, 125.0, 100.0, 121.0, 111.0, 93.0, 113.0])
|
||||
assert frames[0].shape == (32, 32, 3)
|
||||
expected_slice = np.array([91.0, 152.0, 66.0, 192.0, 94.0, 126.0, 101.0, 123.0, 152.0])
|
||||
|
||||
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ class VideoToVideoSDPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
beta_start=0.00085,
|
||||
beta_end=0.012,
|
||||
beta_schedule="scaled_linear",
|
||||
clip_sample=False,
|
||||
clip_sample=True,
|
||||
set_alpha_to_one=False,
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
|
||||
@@ -373,8 +373,6 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
# because UnCLIP undeterminism requires a looser check.
|
||||
@skip_mps
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
additional_params_copy_to_batched_inputs = [
|
||||
"prior_num_inference_steps",
|
||||
"decoder_num_inference_steps",
|
||||
@@ -382,9 +380,7 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
]
|
||||
|
||||
self._test_inference_batch_single_identical(
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs,
|
||||
additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs, expected_max_diff=5e-3
|
||||
)
|
||||
|
||||
def test_inference_batch_consistent(self):
|
||||
|
||||
@@ -448,17 +448,12 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
# because UnCLIP undeterminism requires a looser check.
|
||||
@skip_mps
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
additional_params_copy_to_batched_inputs = [
|
||||
"decoder_num_inference_steps",
|
||||
"super_res_num_inference_steps",
|
||||
]
|
||||
|
||||
self._test_inference_batch_single_identical(
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs,
|
||||
additional_params_copy_to_batched_inputs=additional_params_copy_to_batched_inputs, expected_max_diff=5e-3
|
||||
)
|
||||
|
||||
def test_inference_batch_consistent(self):
|
||||
|
||||
@@ -170,15 +170,7 @@ class WuerstchenDecoderPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
|
||||
@skip_mps
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
test_mean_pixel_difference = False
|
||||
|
||||
self._test_inference_batch_single_identical(
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
test_mean_pixel_difference=test_mean_pixel_difference,
|
||||
)
|
||||
self._test_inference_batch_single_identical(expected_max_diff=1e-5)
|
||||
|
||||
@skip_mps
|
||||
def test_attention_slicing_forward_pass(self):
|
||||
|
||||
@@ -166,14 +166,7 @@ class WuerstchenPriorPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
@skip_mps
|
||||
def test_inference_batch_single_identical(self):
|
||||
test_max_difference = torch_device == "cpu"
|
||||
relax_max_difference = True
|
||||
test_mean_pixel_difference = False
|
||||
|
||||
self._test_inference_batch_single_identical(
|
||||
test_max_difference=test_max_difference,
|
||||
relax_max_difference=relax_max_difference,
|
||||
test_mean_pixel_difference=test_mean_pixel_difference,
|
||||
expected_max_diff=2e-1,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user