From 151998e1c27d0e4432b3d2c488e1cfce4acfc8f3 Mon Sep 17 00:00:00 2001 From: clarencechen Date: Wed, 1 Nov 2023 13:22:56 -0700 Subject: [PATCH] Update final CPU offloading code for more diffusion pipelines (#5589) * Update final model offload for more pipelines Add test to ensure all pipeline components are returned to CPU after execution with model offloading * Add comment to explain early UNet offload in Text-to-Video pipeline * Style --- .../controlnet/pipeline_controlnet_inpaint_sd_xl.py | 5 ++--- .../controlnet/pipeline_controlnet_sd_xl_img2img.py | 5 ++--- .../stable_diffusion/pipeline_stable_diffusion_gligen.py | 5 ++--- .../pipeline_stable_diffusion_gligen_text_image.py | 5 ++--- .../stable_diffusion/pipeline_stable_diffusion_upscale.py | 5 ++--- .../pipelines/stable_diffusion/pipeline_stable_unclip.py | 5 ++--- .../stable_diffusion/pipeline_stable_unclip_img2img.py | 5 ++--- .../t2i_adapter/pipeline_stable_diffusion_xl_adapter.py | 5 ++--- .../pipeline_text_to_video_synth_img2img.py | 1 + tests/pipelines/test_pipelines_common.py | 8 ++++++++ 10 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index 46c9f25b6e..f29d3bd515 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -1604,9 +1604,8 @@ class StableDiffusionXLControlNetInpaintPipeline( image = self.image_processor.postprocess(image, output_type=output_type) - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() + # Offload all models + self.maybe_free_model_hooks() if not return_dict: return (image,) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index 5f9abb444f..78c7c1cd8d 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -1433,9 +1433,8 @@ class StableDiffusionXLControlNetImg2ImgPipeline( image = self.image_processor.postprocess(image, output_type=output_type) - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() + # Offload all models + self.maybe_free_model_hooks() if not return_dict: return (image,) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py index 90c3885168..ef88230b44 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen.py @@ -864,9 +864,8 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline): image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() + # Offload all models + self.maybe_free_model_hooks() if not return_dict: return (image, has_nsfw_concept) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py index eef5fbef58..c54114854a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_gligen_text_image.py @@ -1031,9 +1031,8 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline): image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() + # Offload all models + self.maybe_free_model_hooks() if not return_dict: return (image, has_nsfw_concept) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index 00ed46ffc6..da89505017 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -820,9 +820,8 @@ class StableDiffusionUpscalePipeline( if output_type == "pil" and self.watermarker is not None: image = self.watermarker.apply_watermark(image) - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() + # Offload all models + self.maybe_free_model_hooks() if not return_dict: return (image, has_nsfw_concept) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py index 6539a4c629..c81dd85f0e 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py @@ -942,9 +942,8 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL image = self.image_processor.postprocess(image, output_type=output_type) - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() + # Offload all models + self.maybe_free_model_hooks() if not return_dict: return (image,) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py index 4441e643e2..73638fdd15 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py @@ -839,9 +839,8 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin image = self.image_processor.postprocess(image, output_type=output_type) - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() + # Offload all models + self.maybe_free_model_hooks() if not return_dict: return (image,) diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 2a3fca7f46..a5d745ee69 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -1059,9 +1059,8 @@ class StableDiffusionXLAdapterPipeline( image = self.image_processor.postprocess(image, output_type=output_type) - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() + # Offload all models + self.maybe_free_model_hooks() if not return_dict: return (image,) diff --git a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py index 2f128aa448..45e0f5892d 100644 --- a/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +++ b/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py @@ -777,6 +777,7 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor if output_type == "latent": return TextToVideoSDPipelineOutput(frames=latents) + # manually for max memory savings if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.unet.to("cpu") diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index ae13d0d3e9..353add3b4d 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -742,6 +742,14 @@ class PipelineTesterMixin: max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results") + self.assertTrue( + all( + v.device == "cpu" + for k, v in pipe.components.values() + if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload + ), + "CPU offloading should leave all pipeline components on the CPU after inference", + ) @unittest.skipIf( torch_device != "cuda" or not is_xformers_available(),