From 0ab63ff6478b7cc6b5ae0d46c7c386d476cfa87f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tolga=20Cang=C3=B6z?= <46008593+standardAI@users.noreply.github.com> Date: Fri, 24 May 2024 21:25:29 +0300 Subject: [PATCH] Fix CPU Offloading Usage & Typos (#8230) * Fix typos * Fix `pipe.enable_model_cpu_offload()` usage * Fix cpu offloading * Update numbers --- README.md | 4 +- docs/source/en/optimization/tgate.md | 24 ++++----- .../inference_with_tcd_lora.md | 50 +++++++++---------- docs/source/en/using-diffusers/inpaint.md | 8 +-- examples/community/README.md | 20 ++++---- .../train_text_to_image_decoder.py | 1 - tests/lora/test_lora_layers_sd.py | 2 +- tests/pipelines/i2vgen_xl/test_i2vgenxl.py | 1 - .../test_ip_adapter_stable_diffusion.py | 4 +- .../test_stable_diffusion_v_pred.py | 1 - .../test_stable_video_diffusion.py | 1 - 11 files changed, 56 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index a8a903e2c3..e598c023f4 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggi ## Quickstart -Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 22000+ checkpoints): +Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 25.000+ checkpoints): ```python from diffusers import DiffusionPipeline @@ -219,7 +219,7 @@ Also, say 👋 in our public Discord channel - + Accelerate `StableDiffusionXLPipeline` with T-GATE: @@ -78,9 +78,9 @@ pipe = TgateSDXLLoader( ).to("cuda") image = pipe.tgate( - "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", - gate_step=gate_step, - num_inference_steps=inference_step + "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", + gate_step=gate_step, + num_inference_steps=inference_step ).images[0] ``` @@ -111,9 +111,9 @@ pipe = TgateSDXLDeepCacheLoader( ).to("cuda") image = pipe.tgate( - "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", - gate_step=gate_step, - num_inference_steps=inference_step + "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", + gate_step=gate_step, + num_inference_steps=inference_step ).images[0] ``` @@ -151,9 +151,9 @@ pipe = TgateSDXLLoader( ).to("cuda") image = pipe.tgate( - "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", - gate_step=gate_step, - num_inference_steps=inference_step + "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k.", + gate_step=gate_step, + num_inference_steps=inference_step ).images[0] ``` diff --git a/docs/source/en/using-diffusers/inference_with_tcd_lora.md b/docs/source/en/using-diffusers/inference_with_tcd_lora.md index 10ad674e73..df49fc8475 100644 --- a/docs/source/en/using-diffusers/inference_with_tcd_lora.md +++ b/docs/source/en/using-diffusers/inference_with_tcd_lora.md @@ -78,7 +78,7 @@ image = pipe( prompt=prompt, num_inference_steps=4, guidance_scale=0, - eta=0.3, + eta=0.3, generator=torch.Generator(device=device).manual_seed(0), ).images[0] ``` @@ -156,14 +156,14 @@ image = pipe( prompt=prompt, num_inference_steps=8, guidance_scale=0, - eta=0.3, + eta=0.3, generator=torch.Generator(device=device).manual_seed(0), ).images[0] ``` ![](https://github.com/jabir-zheng/TCD/raw/main/assets/animagine_xl.png) -TCD-LoRA also supports other LoRAs trained on different styles. For example, let's load the [TheLastBen/Papercut_SDXL](https://huggingface.co/TheLastBen/Papercut_SDXL) LoRA and fuse it with the TCD-LoRA with the [`~loaders.UNet2DConditionLoadersMixin.set_adapters`] method. +TCD-LoRA also supports other LoRAs trained on different styles. For example, let's load the [TheLastBen/Papercut_SDXL](https://huggingface.co/TheLastBen/Papercut_SDXL) LoRA and fuse it with the TCD-LoRA with the [`~loaders.UNet2DConditionLoadersMixin.set_adapters`] method. > [!TIP] > Check out the [Merge LoRAs](merge_loras) guide to learn more about efficient merging methods. @@ -171,7 +171,7 @@ TCD-LoRA also supports other LoRAs trained on different styles. For example, let ```python import torch from diffusers import StableDiffusionXLPipeline -from scheduling_tcd import TCDScheduler +from scheduling_tcd import TCDScheduler device = "cuda" base_model_id = "stabilityai/stable-diffusion-xl-base-1.0" @@ -191,7 +191,7 @@ image = pipe( prompt=prompt, num_inference_steps=4, guidance_scale=0, - eta=0.3, + eta=0.3, generator=torch.Generator(device=device).manual_seed(0), ).images[0] ``` @@ -215,7 +215,7 @@ from PIL import Image from transformers import DPTFeatureExtractor, DPTForDepthEstimation from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline from diffusers.utils import load_image, make_image_grid -from scheduling_tcd import TCDScheduler +from scheduling_tcd import TCDScheduler device = "cuda" depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device) @@ -249,13 +249,13 @@ controlnet = ControlNetModel.from_pretrained( controlnet_id, torch_dtype=torch.float16, variant="fp16", -).to(device) +) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( base_model_id, controlnet=controlnet, torch_dtype=torch.float16, variant="fp16", -).to(device) +) pipe.enable_model_cpu_offload() pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config) @@ -271,9 +271,9 @@ depth_image = get_depth_map(image) controlnet_conditioning_scale = 0.5 # recommended for good generalization image = pipe( - prompt, - image=depth_image, - num_inference_steps=4, + prompt, + image=depth_image, + num_inference_steps=4, guidance_scale=0, eta=0.3, controlnet_conditioning_scale=controlnet_conditioning_scale, @@ -290,7 +290,7 @@ grid_image = make_image_grid([depth_image, image], rows=1, cols=2) import torch from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline from diffusers.utils import load_image, make_image_grid -from scheduling_tcd import TCDScheduler +from scheduling_tcd import TCDScheduler device = "cuda" base_model_id = "stabilityai/stable-diffusion-xl-base-1.0" @@ -301,13 +301,13 @@ controlnet = ControlNetModel.from_pretrained( controlnet_id, torch_dtype=torch.float16, variant="fp16", -).to(device) +) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( base_model_id, controlnet=controlnet, torch_dtype=torch.float16, variant="fp16", -).to(device) +) pipe.enable_model_cpu_offload() pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config) @@ -322,9 +322,9 @@ canny_image = load_image("https://huggingface.co/datasets/hf-internal-testing/di controlnet_conditioning_scale = 0.5 # recommended for good generalization image = pipe( - prompt, - image=canny_image, - num_inference_steps=4, + prompt, + image=canny_image, + num_inference_steps=4, guidance_scale=0, eta=0.3, controlnet_conditioning_scale=controlnet_conditioning_scale, @@ -336,7 +336,7 @@ grid_image = make_image_grid([canny_image, image], rows=1, cols=2) ![](https://github.com/jabir-zheng/TCD/raw/main/assets/controlnet_canny_tcd.png) -The inference parameters in this example might not work for all examples, so we recommend you to try different values for `num_inference_steps`, `guidance_scale`, `controlnet_conditioning_scale` and `cross_attention_kwargs` parameters and choose the best one. +The inference parameters in this example might not work for all examples, so we recommend you to try different values for `num_inference_steps`, `guidance_scale`, `controlnet_conditioning_scale` and `cross_attention_kwargs` parameters and choose the best one. @@ -350,7 +350,7 @@ from diffusers import StableDiffusionXLPipeline from diffusers.utils import load_image, make_image_grid from ip_adapter import IPAdapterXL -from scheduling_tcd import TCDScheduler +from scheduling_tcd import TCDScheduler device = "cuda" base_model_path = "stabilityai/stable-diffusion-xl-base-1.0" @@ -359,8 +359,8 @@ ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin" tcd_lora_id = "h1t/TCD-SDXL-LoRA" pipe = StableDiffusionXLPipeline.from_pretrained( - base_model_path, - torch_dtype=torch.float16, + base_model_path, + torch_dtype=torch.float16, variant="fp16" ) pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config) @@ -375,13 +375,13 @@ ref_image = load_image("https://raw.githubusercontent.com/tencent-ailab/IP-Adapt prompt = "best quality, high quality, wearing sunglasses" image = ip_model.generate( - pil_image=ref_image, + pil_image=ref_image, prompt=prompt, scale=0.5, - num_samples=1, - num_inference_steps=4, + num_samples=1, + num_inference_steps=4, guidance_scale=0, - eta=0.3, + eta=0.3, seed=0, )[0] diff --git a/docs/source/en/using-diffusers/inpaint.md b/docs/source/en/using-diffusers/inpaint.md index 193f5a6d9f..ba43325f53 100644 --- a/docs/source/en/using-diffusers/inpaint.md +++ b/docs/source/en/using-diffusers/inpaint.md @@ -230,7 +230,7 @@ from diffusers.utils import load_image, make_image_grid pipeline = AutoPipelineForInpainting.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, variant="fp16" -).to("cuda") +) pipeline.enable_model_cpu_offload() # remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed pipeline.enable_xformers_memory_efficient_attention() @@ -255,7 +255,7 @@ from diffusers.utils import load_image, make_image_grid pipeline = AutoPipelineForInpainting.from_pretrained( "runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, variant="fp16" -).to("cuda") +) pipeline.enable_model_cpu_offload() # remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed pipeline.enable_xformers_memory_efficient_attention() @@ -296,7 +296,7 @@ from diffusers.utils import load_image, make_image_grid pipeline = AutoPipelineForInpainting.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, variant="fp16" -).to("cuda") +) pipeline.enable_model_cpu_offload() # remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed pipeline.enable_xformers_memory_efficient_attention() @@ -319,7 +319,7 @@ from diffusers.utils import load_image, make_image_grid pipeline = AutoPipelineForInpainting.from_pretrained( "runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, variant="fp16" -).to("cuda") +) pipeline.enable_model_cpu_offload() # remove following line if xFormers is not installed or you have PyTorch 2.0 or higher installed pipeline.enable_xformers_memory_efficient_attention() diff --git a/examples/community/README.md b/examples/community/README.md index 6d7830621e..600761aae7 100755 --- a/examples/community/README.md +++ b/examples/community/README.md @@ -240,12 +240,12 @@ pipeline_output = pipe( # denoising_steps=10, # (optional) Number of denoising steps of each inference pass. Default: 10. # ensemble_size=10, # (optional) Number of inference passes in the ensemble. Default: 10. # ------------------------------------------------ - + # ----- recommended setting for LCM version ------ # denoising_steps=4, # ensemble_size=5, # ------------------------------------------------- - + # processing_res=768, # (optional) Maximum resolution of processing. If set to 0: will not resize at all. Defaults to 768. # match_input_res=True, # (optional) Resize depth prediction to match input resolution. # batch_size=0, # (optional) Inference batch size, no bigger than `num_ensemble`. If set to 0, the script will automatically decide the proper batch size. Defaults to 0. @@ -1032,7 +1032,7 @@ image = pipe().images[0] Make sure you have @crowsonkb's installed: -``` +```sh pip install k-diffusion ``` @@ -1854,13 +1854,13 @@ To use this pipeline, you need to: You can simply use pip to install IPEX with the latest version. -```python +```sh python -m pip install intel_extension_for_pytorch ``` **Note:** To install a specific version, run with the following command: -``` +```sh python -m pip install intel_extension_for_pytorch== -f https://developer.intel.com/ipex-whl-stable-cpu ``` @@ -1958,13 +1958,13 @@ To use this pipeline, you need to: You can simply use pip to install IPEX with the latest version. -```python +```sh python -m pip install intel_extension_for_pytorch ``` **Note:** To install a specific version, run with the following command: -``` +```sh python -m pip install intel_extension_for_pytorch== -f https://developer.intel.com/ipex-whl-stable-cpu ``` @@ -3010,8 +3010,8 @@ This code implements a pipeline for the Stable Diffusion model, enabling the div ### Sample Code -``` -from from examples.community.regional_prompting_stable_diffusion import RegionalPromptingStableDiffusionPipeline +```py +from examples.community.regional_prompting_stable_diffusion import RegionalPromptingStableDiffusionPipeline pipe = RegionalPromptingStableDiffusionPipeline.from_single_file(model_path, vae=vae) rp_args = { @@ -4131,7 +4131,7 @@ This implementation is based on [Diffusers](https://huggingface.co/docs/diffuser ## Example Usage -``` +```py import os import torch diff --git a/examples/kandinsky2_2/text_to_image/train_text_to_image_decoder.py b/examples/kandinsky2_2/text_to_image/train_text_to_image_decoder.py index 78f9b7f18b..409978cb53 100644 --- a/examples/kandinsky2_2/text_to_image/train_text_to_image_decoder.py +++ b/examples/kandinsky2_2/text_to_image/train_text_to_image_decoder.py @@ -896,7 +896,6 @@ def main(): images = [] if args.validation_prompts is not None: logger.info("Running inference for collecting generated images...") - pipeline = pipeline.to(accelerator.device) pipeline.torch_dtype = weight_dtype pipeline.set_progress_bar_config(disable=True) pipeline.enable_model_cpu_offload() diff --git a/tests/lora/test_lora_layers_sd.py b/tests/lora/test_lora_layers_sd.py index fc28d94c24..46b965ec33 100644 --- a/tests/lora/test_lora_layers_sd.py +++ b/tests/lora/test_lora_layers_sd.py @@ -642,7 +642,7 @@ class LoraIntegrationTests(unittest.TestCase): This test simply checks that loading a LoRA with an empty network alpha works fine See: https://github.com/huggingface/diffusers/issues/5606 """ - pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(torch_device) + pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") pipeline.enable_sequential_cpu_offload() civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors") pipeline.load_lora_weights(civitai_path, adapter_name="ahri") diff --git a/tests/pipelines/i2vgen_xl/test_i2vgenxl.py b/tests/pipelines/i2vgen_xl/test_i2vgenxl.py index 0273e972a6..426e258122 100644 --- a/tests/pipelines/i2vgen_xl/test_i2vgenxl.py +++ b/tests/pipelines/i2vgen_xl/test_i2vgenxl.py @@ -243,7 +243,6 @@ class I2VGenXLPipelineSlowTests(unittest.TestCase): def test_i2vgen_xl(self): pipe = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16") - pipe = pipe.to(torch_device) pipe.enable_model_cpu_offload() pipe.set_progress_bar_config(disable=None) image = load_image( diff --git a/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py b/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py index 8c95fbc703..bf74b2f060 100644 --- a/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py +++ b/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py @@ -612,10 +612,10 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): def test_instant_style_multiple_masks(self): image_encoder = CLIPVisionModelWithProjection.from_pretrained( "h94/IP-Adapter", subfolder="models/image_encoder", torch_dtype=torch.float16 - ).to("cuda") + ) pipeline = StableDiffusionXLPipeline.from_pretrained( "RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.float16, image_encoder=image_encoder, variant="fp16" - ).to("cuda") + ) pipeline.enable_model_cpu_offload() pipeline.load_ip_adapter( diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py index c3b1b9b854..923fba1272 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py @@ -420,7 +420,6 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase): pipe.scheduler = DDIMScheduler.from_config( pipe.scheduler.config, timestep_spacing="trailing", rescale_betas_zero_snr=True ) - pipe.to(torch_device) pipe.enable_model_cpu_offload() pipe.set_progress_bar_config(disable=None) diff --git a/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py b/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py index 199ed57bc2..60fc21e202 100644 --- a/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py +++ b/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py @@ -534,7 +534,6 @@ class StableVideoDiffusionPipelineSlowTests(unittest.TestCase): variant="fp16", torch_dtype=torch.float16, ) - pipe = pipe.to(torch_device) pipe.enable_model_cpu_offload() pipe.set_progress_bar_config(disable=None) image = load_image(