From 62a151d8f46cf9cb66f7932b9d9fb31bef70d90a Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Sun, 25 Jun 2023 21:26:31 +0000 Subject: [PATCH] proof that in works in run local xl --- run_local_xl.py | 56 +++++++++++++++++++ .../pipeline_stable_diffusion_upscale.py | 3 +- .../pipeline_stable_diffusion_xl.py | 26 +++++++-- 3 files changed, 80 insertions(+), 5 deletions(-) create mode 100755 run_local_xl.py diff --git a/run_local_xl.py b/run_local_xl.py new file mode 100755 index 0000000000..db41d2cdf9 --- /dev/null +++ b/run_local_xl.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +from diffusers import DiffusionPipeline, EulerDiscreteScheduler, StableDiffusionPipeline, KDPM2DiscreteScheduler, StableDiffusionImg2ImgPipeline, HeunDiscreteScheduler, KDPM2AncestralDiscreteScheduler, DDIMScheduler +import time +import os +from huggingface_hub import HfApi +# from compel import Compel +import torch +import sys +from pathlib import Path +import requests +from PIL import Image +from io import BytesIO + +path = sys.argv[1] + +api = HfApi() +start_time = time.time() +pipe = DiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16) +pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config) +# pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) +# pipe = StableDiffusionImg2ImgPipeline.from_pretrained(path, torch_dtype=torch.float16, safety_checker=None + +# compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder) + + +pipe = pipe.to("cuda") + +prompt = "An astronaut riding a green horse on Mars" + +# rompts = ["a cat playing with a ball++ in the forest", "a cat playing with a ball++ in the forest", "a cat playing with a ball-- in the forest"] + +# prompt_embeds = torch.cat([compel.build_conditioning_tensor(prompt) for prompt in prompts]) + +# generator = [torch.Generator(device="cuda").manual_seed(0) for _ in range(prompt_embeds.shape[0])] +# +# url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg" +# +# response = requests.get(url) +# image = Image.open(BytesIO(response.content)).convert("RGB") +# image.thumbnail((768, 768)) +# + +# pipe.unet.set_default_attn_processor() +image = pipe(prompt=prompt).images[0] + +file_name = f"aaa" +path = os.path.join(Path.home(), "images", f"{file_name}.png") +image.save(path) + +api.upload_file( + path_or_fileobj=path, + path_in_repo=path.split("/")[-1], + repo_id="patrickvonplaten/images", + repo_type="dataset", +) +print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png") diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py index 0fda05ea5e..06b6628bd3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py @@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer from ...image_processor import VaeImageProcessor from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel -from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor +from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, LoRAAttnProcessor2_0 from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging, randn_tensor from ..pipeline_utils import DiffusionPipeline @@ -747,6 +747,7 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi AttnProcessor2_0, XFormersAttnProcessor, LoRAXFormersAttnProcessor, + LoRAAttnProcessor2_0, ] # if xformers or torch_2_0 is used attention block does not need # to be in float32 which can save lots of memory diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 85a8959f69..14e674ca86 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -25,6 +25,7 @@ from ...image_processor import VaeImageProcessor from ...loaders import FromCkptMixin, LoraLoaderMixin, TextualInversionLoaderMixin from ...models import AutoencoderKL, UNet2DConditionModel from ...schedulers import KarrasDiffusionSchedulers +from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, LoRAAttnProcessor2_0 from ...utils import ( deprecate, is_accelerate_available, @@ -648,8 +649,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline): ) prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = self.encode_prompt( prompt, - "cpu", - # device, + device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt, @@ -727,10 +727,28 @@ class StableDiffusionXLPipeline(DiffusionPipeline): if callback is not None and i % callback_steps == 0: callback(i, t, latents) + # make sure the VAE is in float32 mode, as it overflows in float16 + self.vae.to(dtype=torch.float32) + + use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [ + AttnProcessor2_0, + XFormersAttnProcessor, + LoRAXFormersAttnProcessor, + LoRAAttnProcessor2_0, + ] + # if xformers or torch_2_0 is used attention block does not need + # to be in float32 which can save lots of memory + if not use_torch_2_0_or_xformers: + self.vae.post_quant_conv.to(latents.dtype) + self.vae.decoder.conv_in.to(latents.dtype) + self.vae.decoder.mid_block.to(latents.dtype) + else: + latents = latents.float() + + if not output_type == "latent": # CHECK there is problem here (PVP) - with torch.autocast("cuda", enabled=False): - image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] + image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] #image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) has_nsfw_concept = None else: