From 474a248f10a9ec3d02fb9bd5f220d5bec158bfd3 Mon Sep 17 00:00:00 2001 From: Aryan Date: Tue, 24 Jun 2025 13:49:37 +0530 Subject: [PATCH] [tests] Fix HunyuanVideo Framepack device tests (#11789) update --- .../test_hunyuan_video_framepack.py | 25 ++++++++++++++++++- .../pipelines/hunyuandit/test_hunyuan_dit.py | 12 +++++++-- tests/pipelines/test_pipelines_common.py | 7 +++--- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/tests/pipelines/hunyuan_video/test_hunyuan_video_framepack.py b/tests/pipelines/hunyuan_video/test_hunyuan_video_framepack.py index f4408e7cd5..9f685d34c9 100644 --- a/tests/pipelines/hunyuan_video/test_hunyuan_video_framepack.py +++ b/tests/pipelines/hunyuan_video/test_hunyuan_video_framepack.py @@ -71,7 +71,6 @@ class HunyuanVideoFramepackPipelineFastTests( ) supports_dduf = False - # there is no xformers processor for Flux test_xformers_attention = False test_layerwise_casting = True test_group_offloading = True @@ -360,6 +359,30 @@ class HunyuanVideoFramepackPipelineFastTests( "VAE tiling should not affect the inference results", ) + def test_float16_inference(self, expected_max_diff=0.2): + # NOTE: this test needs a higher tolerance because of multiple forwards through + # the model, which compounds the overall fp32 vs fp16 numerical differences. It + # shouldn't be expected that the results are the same, so we bump the tolerance. + return super().test_float16_inference(expected_max_diff) + + @unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.") + def test_sequential_cpu_offload_forward_pass(self): + # https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803 + # This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to + # `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never + # triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip + # this test because of MHA (example: HunyuanDiT because of AttentionPooling layer). + pass + + @unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.") + def test_sequential_offload_forward_pass_twice(self): + # https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803 + # This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to + # `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never + # triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip + # this test because of MHA (example: HunyuanDiT because of AttentionPooling layer). + pass + # TODO(aryan): Create a dummy gemma model with smol vocab size @unittest.skip( "A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error." diff --git a/tests/pipelines/hunyuandit/test_hunyuan_dit.py b/tests/pipelines/hunyuandit/test_hunyuan_dit.py index 5aa6372a89..7a5f807213 100644 --- a/tests/pipelines/hunyuandit/test_hunyuan_dit.py +++ b/tests/pipelines/hunyuandit/test_hunyuan_dit.py @@ -124,14 +124,22 @@ class HunyuanDiTPipelineFastTests(PipelineTesterMixin, unittest.TestCase): max_diff = np.abs(image_slice.flatten() - expected_slice).max() self.assertLessEqual(max_diff, 1e-3) - @unittest.skip("Not supported.") + @unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.") def test_sequential_cpu_offload_forward_pass(self): # TODO(YiYi) need to fix later + # This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to + # `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never + # triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip + # this test because of MHA (example: HunyuanVideo Framepack) pass - @unittest.skip("Not supported.") + @unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.") def test_sequential_offload_forward_pass_twice(self): # TODO(YiYi) need to fix later + # This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to + # `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never + # triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip + # this test because of MHA (example: HunyuanVideo Framepack) pass def test_inference_batch_single_identical(self): diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 687a28294c..207cff2a3c 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -2270,9 +2270,10 @@ class PipelineTesterMixin: if hasattr(module, "_diffusers_hook") ) ) - for component_name in ["vae", "vqvae"]: - if hasattr(pipe, component_name): - getattr(pipe, component_name).to(torch_device) + for component_name in ["vae", "vqvae", "image_encoder"]: + component = getattr(pipe, component_name, None) + if isinstance(component, torch.nn.Module): + component.to(torch_device) def run_forward(pipe): torch.manual_seed(0)