1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

[tests] Fix HunyuanVideo Framepack device tests (#11789)

update
This commit is contained in:
Aryan
2025-06-24 13:49:37 +05:30
committed by GitHub
parent 7bc0a07b19
commit 474a248f10
3 changed files with 38 additions and 6 deletions

View File

@@ -71,7 +71,6 @@ class HunyuanVideoFramepackPipelineFastTests(
)
supports_dduf = False
# there is no xformers processor for Flux
test_xformers_attention = False
test_layerwise_casting = True
test_group_offloading = True
@@ -360,6 +359,30 @@ class HunyuanVideoFramepackPipelineFastTests(
"VAE tiling should not affect the inference results",
)
def test_float16_inference(self, expected_max_diff=0.2):
# NOTE: this test needs a higher tolerance because of multiple forwards through
# the model, which compounds the overall fp32 vs fp16 numerical differences. It
# shouldn't be expected that the results are the same, so we bump the tolerance.
return super().test_float16_inference(expected_max_diff)
@unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.")
def test_sequential_cpu_offload_forward_pass(self):
# https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
# this test because of MHA (example: HunyuanDiT because of AttentionPooling layer).
pass
@unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.")
def test_sequential_offload_forward_pass_twice(self):
# https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
# this test because of MHA (example: HunyuanDiT because of AttentionPooling layer).
pass
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."

View File

@@ -124,14 +124,22 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
self.assertLessEqual(max_diff, 1e-3)
@unittest.skip("Not supported.")
@unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.")
def test_sequential_cpu_offload_forward_pass(self):
# TODO(YiYi) need to fix later
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
# this test because of MHA (example: HunyuanVideo Framepack)
pass
@unittest.skip("Not supported.")
@unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.")
def test_sequential_offload_forward_pass_twice(self):
# TODO(YiYi) need to fix later
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
# this test because of MHA (example: HunyuanVideo Framepack)
pass
def test_inference_batch_single_identical(self):

View File

@@ -2270,9 +2270,10 @@ class PipelineTesterMixin:
if hasattr(module, "_diffusers_hook")
)
)
for component_name in ["vae", "vqvae"]:
if hasattr(pipe, component_name):
getattr(pipe, component_name).to(torch_device)
for component_name in ["vae", "vqvae", "image_encoder"]:
component = getattr(pipe, component_name, None)
if isinstance(component, torch.nn.Module):
component.to(torch_device)
def run_forward(pipe):
torch.manual_seed(0)