proof that in works in run local xl

2026-01-29 07:22:12 +03:00 · 2023-06-25 21:26:31 +00:00
parent 277bc9d623
commit 62a151d8f4
3 changed files with 80 additions and 5 deletions
--- a/run_local_xl.py
+++ b/run_local_xl.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+from diffusers import DiffusionPipeline, EulerDiscreteScheduler, StableDiffusionPipeline, KDPM2DiscreteScheduler, StableDiffusionImg2ImgPipeline, HeunDiscreteScheduler, KDPM2AncestralDiscreteScheduler, DDIMScheduler
+import time
+import os
+from huggingface_hub import HfApi
+# from compel import Compel
+import torch
+import sys
+from pathlib import Path
+import requests
+from PIL import Image
+from io import BytesIO
+
+path = sys.argv[1]
+
+api = HfApi()
+start_time = time.time()
+pipe = DiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
+pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
+# pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+# pipe = StableDiffusionImg2ImgPipeline.from_pretrained(path, torch_dtype=torch.float16, safety_checker=None
+
+# compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder)
+
+
+pipe = pipe.to("cuda")
+
+prompt = "An astronaut riding a green horse on Mars"
+
+# rompts = ["a cat playing with a ball++ in the forest", "a cat playing with a ball++ in the forest", "a cat playing with a ball-- in the forest"]
+
+# prompt_embeds = torch.cat([compel.build_conditioning_tensor(prompt) for prompt in prompts])
+
+# generator = [torch.Generator(device="cuda").manual_seed(0) for _ in range(prompt_embeds.shape[0])]
+#
+# url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
+# 
+# response = requests.get(url)
+# image = Image.open(BytesIO(response.content)).convert("RGB")
+# image.thumbnail((768, 768))
+#
+
+# pipe.unet.set_default_attn_processor()
+image = pipe(prompt=prompt).images[0]
+
+file_name = f"aaa"
+path = os.path.join(Path.home(), "images", f"{file_name}.png")
+image.save(path)
+
+api.upload_file(
+    path_or_fileobj=path,
+    path_in_repo=path.split("/")[-1],
+    repo_id="patrickvonplaten/images",
+    repo_type="dataset",
+)
+print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
 from ...image_processor import VaeImageProcessor
 from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
 from ...models import AutoencoderKL, UNet2DConditionModel
-from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor
+from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, LoRAAttnProcessor2_0
 from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
 from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging, randn_tensor
 from ..pipeline_utils import DiffusionPipeline
@@ -747,6 +747,7 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
            AttnProcessor2_0,
            XFormersAttnProcessor,
            LoRAXFormersAttnProcessor,
+            LoRAAttnProcessor2_0,
        ]
        # if xformers or torch_2_0 is used attention block does not need
        # to be in float32 which can save lots of memory
--- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -25,6 +25,7 @@ from ...image_processor import VaeImageProcessor
 from ...loaders import FromCkptMixin, LoraLoaderMixin, TextualInversionLoaderMixin
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...schedulers import KarrasDiffusionSchedulers
+from ...models.attention_processor import AttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor, LoRAAttnProcessor2_0
 from ...utils import (
    deprecate,
    is_accelerate_available,
@@ -648,8 +649,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline):
        )
        prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = self.encode_prompt(
            prompt,
-            "cpu",
-            # device,
+            device,
            num_images_per_prompt,
            do_classifier_free_guidance,
            negative_prompt,
@@ -727,10 +727,28 @@ class StableDiffusionXLPipeline(DiffusionPipeline):
                    if callback is not None and i % callback_steps == 0:
                        callback(i, t, latents)

+        # make sure the VAE is in float32 mode, as it overflows in float16
+        self.vae.to(dtype=torch.float32)
+
+        use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [
+            AttnProcessor2_0,
+            XFormersAttnProcessor,
+            LoRAXFormersAttnProcessor,
+            LoRAAttnProcessor2_0,
+        ]
+        # if xformers or torch_2_0 is used attention block does not need
+        # to be in float32 which can save lots of memory
+        if not use_torch_2_0_or_xformers:
+            self.vae.post_quant_conv.to(latents.dtype)
+            self.vae.decoder.conv_in.to(latents.dtype)
+            self.vae.decoder.mid_block.to(latents.dtype)
+        else:
+            latents = latents.float()
+
+
        if not output_type == "latent":
            # CHECK there is problem here (PVP)
-            with torch.autocast("cuda", enabled=False):
-                image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
            #image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
            has_nsfw_concept = None
        else: