From a8fa52ba2a07feef2355d8422fd16e10a880a05a Mon Sep 17 00:00:00 2001
From: DavidBert <davidbertoin@gmail.com>
Date: Thu, 16 Oct 2025 09:23:43 +0000
Subject: [PATCH] quantization example

---
 docs/source/en/api/pipelines/photon.md | 70 ++++++++------------------
 1 file changed, 20 insertions(+), 50 deletions(-)

diff --git a/docs/source/en/api/pipelines/photon.md b/docs/source/en/api/pipelines/photon.md
index a46e9a4fc5..737760aabd 100644
--- a/docs/source/en/api/pipelines/photon.md
+++ b/docs/source/en/api/pipelines/photon.md
@@ -54,36 +54,46 @@ image.save("photon_output.png")
 
 ### Manual Component Loading
 
-You can also load components individually:
+Load components individually to customize the pipeline for instance to use quantized models.
 
 ```py
 import torch
-from diffusers import PhotonPipeline
+from diffusers.pipelines.photon import PhotonPipeline
 from diffusers.models import AutoencoderKL, AutoencoderDC
 from diffusers.models.transformers.transformer_photon import PhotonTransformer2DModel
 from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
 from transformers import T5GemmaModel, GemmaTokenizerFast
+from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
+from transformers import BitsAndBytesConfig as BitsAndBytesConfig
 
+quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
 # Load transformer
 transformer = PhotonTransformer2DModel.from_pretrained(
-    "Photoroom/photon-512-t2i-sft", subfolder="transformer"
-).to(dtype=torch.bfloat16)
+    "checkpoints/photon-512-t2i-sft",
+    subfolder="transformer",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16,
+)
 
 # Load scheduler
 scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
-    "Photoroom/photon-512-t2i-sft", subfolder="scheduler"
+    "checkpoints/photon-512-t2i-sft", subfolder="scheduler"
 )
 
 # Load T5Gemma text encoder
-t5gemma_model = T5GemmaModel.from_pretrained("google/t5gemma-2b-2b-ul2")
+t5gemma_model = T5GemmaModel.from_pretrained("google/t5gemma-2b-2b-ul2",
+                                            quantization_config=quant_config,
+                                            torch_dtype=torch.bfloat16)
 text_encoder = t5gemma_model.encoder.to(dtype=torch.bfloat16)
 tokenizer = GemmaTokenizerFast.from_pretrained("google/t5gemma-2b-2b-ul2")
 tokenizer.model_max_length = 256
+
 # Load VAE - choose either Flux VAE or DC-AE
-# Flux VAE (16 latent channels):
-vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae").to(dtype=torch.bfloat16)
-# Or DC-AE (32 latent channels):
-# vae = AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers")
+# Flux VAE
+vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev",
+                                    subfolder="vae",
+                                    quantization_config=quant_config,
+                                    torch_dtype=torch.bfloat16)
 
 pipe = PhotonPipeline(
     transformer=transformer,
@@ -95,46 +105,6 @@ pipe = PhotonPipeline(
 pipe.to("cuda")
 ```
 
-## VAE Variants
-
-Photon supports two VAE configurations:
-
-### Flux VAE (AutoencoderKL)
-- **Compression**: 8x spatial compression
-- **Latent channels**: 16
-- **Model**: `black-forest-labs/FLUX.1-dev` (subfolder: "vae")
-- **Use case**: Balanced quality and speed
-
-### DC-AE (AutoencoderDC)
-- **Compression**: 32x spatial compression
-- **Latent channels**: 32
-- **Model**: `mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers`
-- **Use case**: Higher compression for faster processing
-
-The VAE type is automatically determined from the checkpoint's `model_index.json` configuration.
-
-## Generation Parameters
-
-Key parameters for image generation:
-
-- **num_inference_steps**: Number of denoising steps (default: 28). More steps generally improve quality at the cost of speed.
-- **guidance_scale**: Classifier-free guidance strength (default: 4.0). Higher values produce images more closely aligned with the prompt.
-- **height/width**: Output image dimensions (default: 512x512). Can be customized in the checkpoint configuration.
-
-```py
-# Example with custom parameters
-import torch
-from diffusers.pipelines.photon import PhotonPipeline
-pipe = PhotonPipeline.from_pretrained("Photoroom/photon-512-t2i-sft", torch_dtype=torch.bfloat16)
-pipe = pipe(
-    prompt = "A front-facing portrait of a lion the golden savanna at sunset."
-    num_inference_steps=28,
-    guidance_scale=4.0,
-    height=512,
-    width=512,
-    generator=torch.Generator("cuda").manual_seed(42)
-).images[0]
-```
 
 ## Memory Optimization