1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-29 07:22:12 +03:00

proof that in works in run local xl

This commit is contained in:
Patrick von Platen
2023-06-25 21:26:31 +00:00
parent 277bc9d623
commit 62a151d8f4
3 changed files with 80 additions and 5 deletions

56
run_local_xl.py Executable file
View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
from diffusers import DiffusionPipeline, EulerDiscreteScheduler, StableDiffusionPipeline, KDPM2DiscreteScheduler, StableDiffusionImg2ImgPipeline, HeunDiscreteScheduler, KDPM2AncestralDiscreteScheduler, DDIMScheduler
import time
import os
from huggingface_hub import HfApi
# from compel import Compel
import torch
import sys
from pathlib import Path
import requests
from PIL import Image
from io import BytesIO
path = sys.argv[1]
api = HfApi()
start_time = time.time()
pipe = DiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
# pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
# pipe = StableDiffusionImg2ImgPipeline.from_pretrained(path, torch_dtype=torch.float16, safety_checker=None
# compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder)
pipe = pipe.to("cuda")
prompt = "An astronaut riding a green horse on Mars"
# rompts = ["a cat playing with a ball++ in the forest", "a cat playing with a ball++ in the forest", "a cat playing with a ball-- in the forest"]
# prompt_embeds = torch.cat([compel.build_conditioning_tensor(prompt) for prompt in prompts])
# generator = [torch.Generator(device="cuda").manual_seed(0) for _ in range(prompt_embeds.shape[0])]
#
# url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
#
# response = requests.get(url)
# image = Image.open(BytesIO(response.content)).convert("RGB")
# image.thumbnail((768, 768))
#
# pipe.unet.set_default_attn_processor()
image = pipe(prompt=prompt).images[0]
file_name = f"aaa"
path = os.path.join(Path.home(), "images", f"{file_name}.png")
image.save(path)
api.upload_file(
path_or_fileobj=path,
path_in_repo=path.split("/")[-1],
repo_id="patrickvonplaten/images",
repo_type="dataset",
)
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")

View File

@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from ...image_processor import VaeImageProcessor
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor
from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, LoRAAttnProcessor2_0
from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging, randn_tensor
from ..pipeline_utils import DiffusionPipeline
@@ -747,6 +747,7 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
AttnProcessor2_0,
XFormersAttnProcessor,
LoRAXFormersAttnProcessor,
LoRAAttnProcessor2_0,
]
# if xformers or torch_2_0 is used attention block does not need
# to be in float32 which can save lots of memory

View File

@@ -25,6 +25,7 @@ from ...image_processor import VaeImageProcessor
from ...loaders import FromCkptMixin, LoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, UNet2DConditionModel
from ...schedulers import KarrasDiffusionSchedulers
from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, LoRAAttnProcessor2_0
from ...utils import (
deprecate,
is_accelerate_available,
@@ -648,8 +649,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline):
)
prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = self.encode_prompt(
prompt,
"cpu",
# device,
device,
num_images_per_prompt,
do_classifier_free_guidance,
negative_prompt,
@@ -727,10 +727,28 @@ class StableDiffusionXLPipeline(DiffusionPipeline):
if callback is not None and i % callback_steps == 0:
callback(i, t, latents)
# make sure the VAE is in float32 mode, as it overflows in float16
self.vae.to(dtype=torch.float32)
use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [
AttnProcessor2_0,
XFormersAttnProcessor,
LoRAXFormersAttnProcessor,
LoRAAttnProcessor2_0,
]
# if xformers or torch_2_0 is used attention block does not need
# to be in float32 which can save lots of memory
if not use_torch_2_0_or_xformers:
self.vae.post_quant_conv.to(latents.dtype)
self.vae.decoder.conv_in.to(latents.dtype)
self.vae.decoder.mid_block.to(latents.dtype)
else:
latents = latents.float()
if not output_type == "latent":
# CHECK there is problem here (PVP)
with torch.autocast("cuda", enabled=False):
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
#image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
has_nsfw_concept = None
else: