From 8ce37ab055372dedf4e9621ed63374a019d93f5d Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Thu, 31 Oct 2024 15:51:42 +0530 Subject: [PATCH] [training] use the lr when using 8bit adam. (#9796) * use the lr when using 8bit adam. * remove lr as we pack it in params_to_optimize. --------- Co-authored-by: Linoy Tsaban <57615435+linoytsaban@users.noreply.github.com> --- .../train_dreambooth_lora_flux_advanced.py | 14 +++----------- .../train_dreambooth_lora_sd15_advanced.py | 6 +----- .../train_dreambooth_lora_sdxl_advanced.py | 1 - .../train_cogvideox_image_to_video_lora.py | 1 - examples/cogvideo/train_cogvideox_lora.py | 1 - examples/dreambooth/train_dreambooth_flux.py | 6 +----- examples/dreambooth/train_dreambooth_lora_flux.py | 6 +----- examples/dreambooth/train_dreambooth_lora_sd3.py | 1 - examples/dreambooth/train_dreambooth_lora_sdxl.py | 1 - examples/dreambooth/train_dreambooth_sd3.py | 1 - .../dreambooth/train_dreambooth_lora_sdxl.py | 1 - 11 files changed, 6 insertions(+), 33 deletions(-) diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py index 92d296c0f1..bf726e65c9 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py @@ -1778,15 +1778,10 @@ def main(args): if not args.enable_t5_ti: # pure textual inversion - only clip if pure_textual_inversion: - params_to_optimize = [ - text_parameters_one_with_lr, - ] + params_to_optimize = [text_parameters_one_with_lr] te_idx = 0 else: # regular te training or regular pivotal for clip - params_to_optimize = [ - transformer_parameters_with_lr, - text_parameters_one_with_lr, - ] + params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr] te_idx = 1 elif args.enable_t5_ti: # pivotal tuning of clip & t5 @@ -1809,9 +1804,7 @@ def main(args): ] te_idx = 1 else: - params_to_optimize = [ - transformer_parameters_with_lr, - ] + params_to_optimize = [transformer_parameters_with_lr] # Optimizer creation if not (args.optimizer.lower() == "prodigy" or args.optimizer.lower() == "adamw"): @@ -1871,7 +1864,6 @@ def main(args): params_to_optimize[-1]["lr"] = args.learning_rate optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py index 024722536d..7fdea56dc5 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py @@ -1358,10 +1358,7 @@ def main(args): else args.adam_weight_decay, "lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate, } - params_to_optimize = [ - unet_lora_parameters_with_lr, - text_lora_parameters_one_with_lr, - ] + params_to_optimize = [unet_lora_parameters_with_lr, text_lora_parameters_one_with_lr] else: params_to_optimize = [unet_lora_parameters_with_lr] @@ -1423,7 +1420,6 @@ def main(args): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py index bc06cc9213..74d52186dd 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py @@ -1794,7 +1794,6 @@ def main(args): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/cogvideo/train_cogvideox_image_to_video_lora.py b/examples/cogvideo/train_cogvideox_image_to_video_lora.py index 4ef392baa2..1f055bcecb 100644 --- a/examples/cogvideo/train_cogvideox_image_to_video_lora.py +++ b/examples/cogvideo/train_cogvideox_image_to_video_lora.py @@ -947,7 +947,6 @@ def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/cogvideo/train_cogvideox_lora.py b/examples/cogvideo/train_cogvideox_lora.py index 011466bc7d..e591e0ee59 100644 --- a/examples/cogvideo/train_cogvideox_lora.py +++ b/examples/cogvideo/train_cogvideox_lora.py @@ -969,7 +969,6 @@ def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_flux.py b/examples/dreambooth/train_dreambooth_flux.py index f720afef65..d23d05f7e3 100644 --- a/examples/dreambooth/train_dreambooth_flux.py +++ b/examples/dreambooth/train_dreambooth_flux.py @@ -1226,10 +1226,7 @@ def main(args): "weight_decay": args.adam_weight_decay_text_encoder, "lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate, } - params_to_optimize = [ - transformer_parameters_with_lr, - text_parameters_one_with_lr, - ] + params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr] else: params_to_optimize = [transformer_parameters_with_lr] @@ -1291,7 +1288,6 @@ def main(args): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index b6e6572348..a0a197b1b2 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -1335,10 +1335,7 @@ def main(args): "weight_decay": args.adam_weight_decay_text_encoder, "lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate, } - params_to_optimize = [ - transformer_parameters_with_lr, - text_parameters_one_with_lr, - ] + params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr] else: params_to_optimize = [transformer_parameters_with_lr] @@ -1400,7 +1397,6 @@ def main(args): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index fc3c69b890..dcf093a94c 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -1468,7 +1468,6 @@ def main(args): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index bf8c8f7d05..6e621b3cae 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -1402,7 +1402,6 @@ def main(args): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_sd3.py b/examples/dreambooth/train_dreambooth_sd3.py index 5d10345304..525a4cc906 100644 --- a/examples/dreambooth/train_dreambooth_sd3.py +++ b/examples/dreambooth/train_dreambooth_sd3.py @@ -1328,7 +1328,6 @@ def main(args): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py index d167801311..2a98010389 100644 --- a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py @@ -1475,7 +1475,6 @@ def main(args): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay,