From 5c4eee56e5807ce76ba71919168cd7dd39fcf44e Mon Sep 17 00:00:00 2001
From: DN6 <dhruv.nair@gmail.com>
Date: Mon, 4 Aug 2025 21:37:06 +0530
Subject: [PATCH] update

---
 .github/workflows/nightly_tests.yml    |  2 +-
 docs/source/en/quantization/gguf.md    |  2 +-
 src/diffusers/quantizers/gguf/utils.py |  2 +-
 src/diffusers/utils/testing_utils.py   | 13 +++++++++++++
 tests/quantization/gguf/test_gguf.py   |  2 ++
 5 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
index 16e1a70b84..be3c59d08d 100644
--- a/.github/workflows/nightly_tests.yml
+++ b/.github/workflows/nightly_tests.yml
@@ -333,7 +333,7 @@ jobs:
             additional_deps: ["peft"]
           - backend: "gguf"
             test_location: "gguf"
-            additional_deps: ["peft"]
+            additional_deps: ["peft", "kernels"]
           - backend: "torchao"
             test_location: "torchao"
             additional_deps: []
diff --git a/docs/source/en/quantization/gguf.md b/docs/source/en/quantization/gguf.md
index cb4be67122..71321d5568 100644
--- a/docs/source/en/quantization/gguf.md
+++ b/docs/source/en/quantization/gguf.md
@@ -61,7 +61,7 @@ Optimized CUDA kernels can accelerate GGUF quantized model inference by approxim
 pip install -U kernels
 ```
 
-Once installed, GGUF inference automatically uses optimized kernels when available. Note that CUDA kernels may introduce minor numerical differences compared to the original GGUF implementation, potentially causing subtle visual variations in generated images. To disable CUDA kernel usage, set the environment variable `DIFFUSERS_GGUF_CUDA_KERNELS=false`.
+Once installed, set `DIFFUSERS_GGUF_CUDA_KERNELS=true`  to use optimized kernels when available. Note that CUDA kernels may introduce minor numerical differences compared to the original GGUF implementation, potentially causing subtle visual variations in generated images. To disable CUDA kernel usage, set the environment variable `DIFFUSERS_GGUF_CUDA_KERNELS=false`.
 
 ## Supported Quantization Types
 
diff --git a/src/diffusers/quantizers/gguf/utils.py b/src/diffusers/quantizers/gguf/utils.py
index aa6a2818d1..3dd00b2ce3 100644
--- a/src/diffusers/quantizers/gguf/utils.py
+++ b/src/diffusers/quantizers/gguf/utils.py
@@ -30,7 +30,7 @@ if is_accelerate_available():
 
 
 can_use_cuda_kernels = (
-    os.getenv("DIFFUSERS_GGUF_CUDA_KERNELS", "true").lower() in ["1", "true", "yes"]
+    os.getenv("DIFFUSERS_GGUF_CUDA_KERNELS", "false").lower() in ["1", "true", "yes"]
     and torch.cuda.is_available()
     and torch.cuda.get_device_capability()[0] >= 7
 )
diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
index e5da39c1d8..cfbfa5d0d8 100644
--- a/src/diffusers/utils/testing_utils.py
+++ b/src/diffusers/utils/testing_utils.py
@@ -35,6 +35,7 @@ from .import_utils import (
     is_compel_available,
     is_flax_available,
     is_gguf_available,
+    is_kernels_available,
     is_note_seq_available,
     is_onnx_available,
     is_opencv_available,
@@ -629,6 +630,18 @@ def require_torchao_version_greater_or_equal(torchao_version):
     return decorator
 
 
+def require_kernels_greater_or_equal(kernels_version):
+    def decorator(test_case):
+        correct_kernels_version = is_kernels_available() and version.parse(
+            version.parse(importlib.metadata.version("kernels")).base_version
+        ) >= version.parse(kernels_version)
+        return unittest.skipUnless(
+            correct_kernels_version, f"Test requires kernels with version greater than {kernels_version}."
+        )(test_case)
+
+    return decorator
+
+
 def deprecate_after_peft_backend(test_case):
     """
     Decorator marking a test that will be skipped after PEFT backend
diff --git a/tests/quantization/gguf/test_gguf.py b/tests/quantization/gguf/test_gguf.py
index a03efdd2be..130f19e309 100644
--- a/tests/quantization/gguf/test_gguf.py
+++ b/tests/quantization/gguf/test_gguf.py
@@ -32,6 +32,7 @@ from diffusers.utils.testing_utils import (
     require_accelerator,
     require_big_accelerator,
     require_gguf_version_greater_or_equal,
+    require_kernels_version_greater_or_equal,
     require_peft_backend,
     torch_device,
 )
@@ -49,6 +50,7 @@ enable_full_determinism()
 @require_accelerate
 @require_accelerator
 @require_gguf_version_greater_or_equal("0.10.0")
+@require_kernels_version_greater_or_equal("0.9.0")
 class GGUFCudaKernelsTests(unittest.TestCase):
     def setUp(self):
         gc.collect()