mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
@@ -71,7 +71,6 @@ class HunyuanVideoFramepackPipelineFastTests(
|
||||
)
|
||||
|
||||
supports_dduf = False
|
||||
# there is no xformers processor for Flux
|
||||
test_xformers_attention = False
|
||||
test_layerwise_casting = True
|
||||
test_group_offloading = True
|
||||
@@ -360,6 +359,30 @@ class HunyuanVideoFramepackPipelineFastTests(
|
||||
"VAE tiling should not affect the inference results",
|
||||
)
|
||||
|
||||
def test_float16_inference(self, expected_max_diff=0.2):
|
||||
# NOTE: this test needs a higher tolerance because of multiple forwards through
|
||||
# the model, which compounds the overall fp32 vs fp16 numerical differences. It
|
||||
# shouldn't be expected that the results are the same, so we bump the tolerance.
|
||||
return super().test_float16_inference(expected_max_diff)
|
||||
|
||||
@unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.")
|
||||
def test_sequential_cpu_offload_forward_pass(self):
|
||||
# https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803
|
||||
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
|
||||
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
|
||||
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
|
||||
# this test because of MHA (example: HunyuanDiT because of AttentionPooling layer).
|
||||
pass
|
||||
|
||||
@unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.")
|
||||
def test_sequential_offload_forward_pass_twice(self):
|
||||
# https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803
|
||||
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
|
||||
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
|
||||
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
|
||||
# this test because of MHA (example: HunyuanDiT because of AttentionPooling layer).
|
||||
pass
|
||||
|
||||
# TODO(aryan): Create a dummy gemma model with smol vocab size
|
||||
@unittest.skip(
|
||||
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
|
||||
|
||||
@@ -124,14 +124,22 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
|
||||
self.assertLessEqual(max_diff, 1e-3)
|
||||
|
||||
@unittest.skip("Not supported.")
|
||||
@unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.")
|
||||
def test_sequential_cpu_offload_forward_pass(self):
|
||||
# TODO(YiYi) need to fix later
|
||||
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
|
||||
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
|
||||
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
|
||||
# this test because of MHA (example: HunyuanVideo Framepack)
|
||||
pass
|
||||
|
||||
@unittest.skip("Not supported.")
|
||||
@unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.")
|
||||
def test_sequential_offload_forward_pass_twice(self):
|
||||
# TODO(YiYi) need to fix later
|
||||
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
|
||||
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
|
||||
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
|
||||
# this test because of MHA (example: HunyuanVideo Framepack)
|
||||
pass
|
||||
|
||||
def test_inference_batch_single_identical(self):
|
||||
|
||||
@@ -2270,9 +2270,10 @@ class PipelineTesterMixin:
|
||||
if hasattr(module, "_diffusers_hook")
|
||||
)
|
||||
)
|
||||
for component_name in ["vae", "vqvae"]:
|
||||
if hasattr(pipe, component_name):
|
||||
getattr(pipe, component_name).to(torch_device)
|
||||
for component_name in ["vae", "vqvae", "image_encoder"]:
|
||||
component = getattr(pipe, component_name, None)
|
||||
if isinstance(component, torch.nn.Module):
|
||||
component.to(torch_device)
|
||||
|
||||
def run_forward(pipe):
|
||||
torch.manual_seed(0)
|
||||
|
||||
Reference in New Issue
Block a user