1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-29 07:22:12 +03:00

Revert "tighten compilation tests for quantization"

This reverts commit 8d431dc967.
This commit is contained in:
sayakpaul
2025-07-28 20:19:38 +05:30
parent d5c1772dc3
commit f38a64443f
2 changed files with 2 additions and 9 deletions

View File

@@ -886,7 +886,6 @@ class Bnb4BitCompileTests(QuantCompileTests, unittest.TestCase):
components_to_quantize=["transformer", "text_encoder_2"],
)
@require_bitsandbytes_version_greater("0.46.1")
def test_torch_compile(self):
torch._dynamo.config.capture_dynamic_output_shape_ops = True
super().test_torch_compile()

View File

@@ -56,18 +56,12 @@ class QuantCompileTests:
pipe.transformer.compile(fullgraph=True)
# small resolutions to ensure speedy execution.
with torch._dynamo.config.patch(error_on_recompile=True):
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)
def _test_torch_compile_with_cpu_offload(self, torch_dtype=torch.bfloat16):
pipe = self._init_pipeline(self.quantization_config, torch_dtype)
pipe.enable_model_cpu_offload()
# regional compilation is better for offloading.
# see: https://pytorch.org/blog/torch-compile-and-diffusers-a-hands-on-guide-to-peak-performance/
if getattr(pipe.transformer, "_repeated_blocks"):
pipe.transformer.compile_repeated_blocks(fullgraph=True)
else:
pipe.transformer.compile()
pipe.transformer.compile()
# small resolutions to ensure speedy execution.
pipe("a dog", num_inference_steps=2, max_sequence_length=16, height=256, width=256)