From faffe23627d1c19dd1c8d31ef6e7139287f2a99f Mon Sep 17 00:00:00 2001 From: Zhenhuan Liu Date: Fri, 23 Sep 2022 04:31:15 -0400 Subject: [PATCH] Update for multi-gpu trianing. --- examples/dreambooth/README.md | 4 ++-- examples/dreambooth/train_dreambooth.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/examples/dreambooth/README.md b/examples/dreambooth/README.md index 97a318b38a..ee936d419b 100644 --- a/examples/dreambooth/README.md +++ b/examples/dreambooth/README.md @@ -43,7 +43,7 @@ export MODEL_NAME="CompVis/stable-diffusion-v1-4" export INSTANCE_DIR="path-to-instance-images" export OUTPUT_DIR="path-to-save-model" -python train_dreambooth.py \ +accelerate launch train_dreambooth.py \ --pretrained_model_name_or_path=$MODEL_NAME --use_auth_token \ --instance_data_dir=$INSTANCE_DIR \ --output_dir=$OUTPUT_DIR \ @@ -64,7 +64,7 @@ export INSTANCE_DIR="path-to-instance-images" export CLASS_DIR="path-to-class-images" export OUTPUT_DIR="path-to-save-model" -python train_dreambooth.py \ +accelerate launch train_dreambooth.py \ --pretrained_model_name_or_path=$MODEL_NAME --use_auth_token \ --instance_data_dir=$INSTANCE_DIR \ --class_data_dir=$CLASS_DIR \ diff --git a/examples/dreambooth/train_dreambooth.py b/examples/dreambooth/train_dreambooth.py index 24be2418f4..e160e00c70 100644 --- a/examples/dreambooth/train_dreambooth.py +++ b/examples/dreambooth/train_dreambooth.py @@ -345,6 +345,7 @@ def main(): sd_model = StableDiffusionPipeline.from_pretrained( args.pretrained_model_name_or_path, use_auth_token=args.use_auth_token ) + sd_model.set_progress_bar_config(disable=True) num_new_images = args.num_class_images - cur_class_images logger.info(f"Number of class images to sample: {num_new_images}.") @@ -441,10 +442,14 @@ def main(): num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, ) - text_encoder, vae, unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( - text_encoder, vae, unet, optimizer, train_dataloader, lr_scheduler + unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + unet, optimizer, train_dataloader, lr_scheduler ) + # Move text_encode and vae to gpu + text_encoder.to(accelerator.device) + vae.to(accelerator.device) + # Keep text_encoder and vae in eval model as we don't train it text_encoder.eval() vae.eval() @@ -536,8 +541,8 @@ def main(): # Create the pipeline using using the trained modules and save it. if accelerator.is_main_process: pipeline = StableDiffusionPipeline( - text_encoder=accelerator.unwrap_model(text_encoder), - vae=accelerator.unwrap_model(vae), + text_encoder=text_encoder, + vae=vae, unet=accelerator.unwrap_model(unet), tokenizer=tokenizer, scheduler=PNDMScheduler(