[quant] allow components_to_quantize to be a non-list for single components (#12234)

* allow non list components_to_quantize. * up * Apply suggestions from code review * Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * [docs] components_to_quantize (#12287) init Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> --------- Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
2026-01-27 17:22:53 +03:00 · 2025-09-11 01:17:08 +05:30
parent e1b7f1f240
commit eb7ef26736
6 changed files with 29 additions and 7 deletions
--- a/docs/source/en/api/pipelines/cogvideox.md
+++ b/docs/source/en/api/pipelines/cogvideox.md
@@ -50,7 +50,7 @@ from diffusers.utils import export_to_video
 pipeline_quant_config = PipelineQuantizationConfig(
  quant_backend="torchao",
  quant_kwargs={"quant_type": "int8wo"},
-  components_to_quantize=["transformer"]
+  components_to_quantize="transformer"
 )

 # fp8 layerwise weight-casting
--- a/docs/source/en/api/pipelines/hunyuan_video.md
+++ b/docs/source/en/api/pipelines/hunyuan_video.md
@@ -54,7 +54,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
      "bnb_4bit_quant_type": "nf4",
      "bnb_4bit_compute_dtype": torch.bfloat16
      },
-    components_to_quantize=["transformer"]
+    components_to_quantize="transformer"
 )

 pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -91,7 +91,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
      "bnb_4bit_quant_type": "nf4",
      "bnb_4bit_compute_dtype": torch.bfloat16
      },
-    components_to_quantize=["transformer"]
+    components_to_quantize="transformer"
 )

 pipeline = HunyuanVideoPipeline.from_pretrained(
@@ -139,7 +139,7 @@ export_to_video(video, "output.mp4", fps=15)
        "bnb_4bit_quant_type": "nf4",
        "bnb_4bit_compute_dtype": torch.bfloat16
        },
-      components_to_quantize=["transformer"]
+      components_to_quantize="transformer"
  )

  pipeline = HunyuanVideoPipeline.from_pretrained(
--- a/docs/source/en/quantization/overview.md
+++ b/docs/source/en/quantization/overview.md
@@ -34,7 +34,9 @@ Initialize [`~quantizers.PipelineQuantizationConfig`] with the following paramet
 > [!TIP]
 > These `quant_kwargs` arguments are different for each backend. Refer to the [Quantization API](../api/quantization) docs to view the arguments for each backend.

- `components_to_quantize` specifies which components of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact.
+- `components_to_quantize` specifies which component(s) of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact.
+
+   `components_to_quantize` accepts either a list for multiple models or a string for a single model.

 The example below loads the bitsandbytes backend with the following arguments from [`~quantizers.quantization_config.BitsAndBytesConfig`], `load_in_4bit`, `bnb_4bit_quant_type`, and `bnb_4bit_compute_dtype`.

@@ -62,6 +64,7 @@ pipe = DiffusionPipeline.from_pretrained(
 image = pipe("photo of a cute dog").images[0]
 ```

+
 ### Advanced quantization

 The `quant_mapping` argument provides more options for how to quantize each individual component in a pipeline, like combining different quantization backends.
--- a/docs/source/en/using-diffusers/text-img2vid.md
+++ b/docs/source/en/using-diffusers/text-img2vid.md
@@ -98,7 +98,7 @@ pipeline_quant_config = PipelineQuantizationConfig(
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_compute_dtype": torch.bfloat16
    },
-  components_to_quantize=["transformer"]
+  components_to_quantize="transformer"
 )

 pipeline = HunyuanVideoPipeline.from_pretrained(
--- a/src/diffusers/quantizers/pipe_quant_config.py
+++ b/src/diffusers/quantizers/pipe_quant_config.py
@@ -48,12 +48,15 @@ class PipelineQuantizationConfig:
        self,
        quant_backend: str = None,
        quant_kwargs: Dict[str, Union[str, float, int, dict]] = None,
-        components_to_quantize: Optional[List[str]] = None,
+        components_to_quantize: Optional[Union[List[str], str]] = None,
        quant_mapping: Dict[str, Union[DiffQuantConfigMixin, "TransformersQuantConfigMixin"]] = None,
    ):
        self.quant_backend = quant_backend
        # Initialize kwargs to be {} to set to the defaults.
        self.quant_kwargs = quant_kwargs or {}
+        if components_to_quantize:
+            if isinstance(components_to_quantize, str):
+                components_to_quantize = [components_to_quantize]
        self.components_to_quantize = components_to_quantize
        self.quant_mapping = quant_mapping
        self.config_mapping = {}  # book-keeping Example: `{module_name: quant_config}`
--- a/tests/quantization/test_pipeline_level_quantization.py
+++ b/tests/quantization/test_pipeline_level_quantization.py
@@ -299,3 +299,19 @@ transformer BitsAndBytesConfig {
        data = json.loads(json_part)

        return data
+
+    def test_single_component_to_quantize(self):
+        component_to_quantize = "transformer"
+        quant_config = PipelineQuantizationConfig(
+            quant_backend="bitsandbytes_8bit",
+            quant_kwargs={"load_in_8bit": True},
+            components_to_quantize=component_to_quantize,
+        )
+        pipe = DiffusionPipeline.from_pretrained(
+            self.model_name,
+            quantization_config=quant_config,
+            torch_dtype=torch.bfloat16,
+        )
+        for name, component in pipe.components.items():
+            if name == component_to_quantize:
+                self.assertTrue(hasattr(component.config, "quantization_config"))