From 8da9ea7d4a49e65b4276c4dc168e02bbe74ce50b Mon Sep 17 00:00:00 2001
From: js1234567 <jiangshuo9@h-partners.com>
Date: Wed, 7 Jan 2026 09:50:02 +0800
Subject: [PATCH] Add FSDP option for Flux2

---
 examples/dreambooth/README_flux2.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/examples/dreambooth/README_flux2.md b/examples/dreambooth/README_flux2.md
index 41a77c3bbc..69bffc9d7a 100644
--- a/examples/dreambooth/README_flux2.md
+++ b/examples/dreambooth/README_flux2.md
@@ -169,6 +169,26 @@ To better track our training experiments, we're using the following flags in the
 > [!NOTE]
 > If you want to train using long prompts with the T5 text encoder, you can use `--max_sequence_length` to set the token limit. The default is 77, but it can be increased to as high as 512. Note that this will use more resources and may slow down the training in some cases.
 
+### FSDP on the Transformers
+By setting the accelerate configuration with FSDP, the transformer block will be wrapped automatically. E.g. set the configuration to:
+
+```shell
+distributed_type: FSDP
+fsdp_config:
+  fsdp_version: 2
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: HYBRID_SHARD
+  fsdp_auto_wrap_policy: TRANSFOMER_BASED_WRAP
+  fsdp_transformer_layer_cls_to_wrap: Flux2TransformerBlock, Flux2SingleTransformerBlock
+  fsdp_forward_prefetch: true
+  fsdp_sync_module_states: false
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_use_orig_params: false
+  fsdp_activation_checkpointing: true
+  fsdp_reshard_after_forward: true
+  fsdp_cpu_ram_efficient_loading: false
+```
+
 ## LoRA + DreamBooth
 
 [LoRA](https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora) is a popular parameter-efficient fine-tuning technique that allows you to achieve full-finetuning like performance but with a fraction of learnable parameters.