From a793b1fe7e05adaa74f35193a6c3119265c6d889 Mon Sep 17 00:00:00 2001 From: MarkRich Date: Tue, 1 Nov 2022 03:17:51 -0700 Subject: [PATCH 01/88] Add imagic to community pipelines (#958) * initial commit to add imagic to stable diffusion community pipelines * remove some testing changes * comments from PR review for imagic stable diffusion * remove changes from pipeline_stable_diffusion as part of imagic pipeline * clean up example code and add line back in to pipeline_stable_diffusion for imagic pipeline * remove unused functions * small code quality changes for imagic pipeline * clean up readme * remove hardcoded logging values for imagic community example * undo change for DDIMScheduler --- examples/community/README.md | 46 +- examples/community/imagic_stable_diffusion.py | 476 ++++++++++++++++++ 2 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 examples/community/imagic_stable_diffusion.py diff --git a/examples/community/README.md b/examples/community/README.md index 2ef84291fc..bb3964e1a7 100644 --- a/examples/community/README.md +++ b/examples/community/README.md @@ -18,6 +18,7 @@ If a community doesn't work as expected, please open an issue and ping the autho | Composable Stable Diffusion| Stable Diffusion Pipeline that supports prompts that contain "|" in prompts (as an AND condition) and weights (separated by "|" as well) to positively / negatively weight prompts. | [Composable Stable Diffusion](#composable-stable-diffusion) | - | [Mark Rich](https://github.com/MarkRich) | | Seed Resizing Stable Diffusion| Stable Diffusion Pipeline that supports resizing an image and retaining the concepts of the 512 by 512 generation. | [Seed Resizing](#seed-resizing) | - | [Mark Rich](https://github.com/MarkRich) | +| Imagic Stable Diffusion | Stable Diffusion Pipeline that enables writing a text prompt to edit an existing image| [Imagic Stable Diffusion](#imagic-stable-diffusion) | - | [Mark Rich](https://github.com/MarkRich) | To load a custom pipeline you just need to pass the `custom_pipeline` argument to `DiffusionPipeline`, as one of the files in `diffusers/examples/community`. Feel free to send a PR with your own pipelines, we will merge them quickly. @@ -373,6 +374,49 @@ for i in range(4): for i, img in enumerate(images): img.save(f"./composable_diffusion/image_{i}.png") ``` + +### Imagic Stable Diffusion +Allows you to edit an image using stable diffusion. + +```python +import requests +from PIL import Image +from io import BytesIO +import torch +from diffusers import DiffusionPipeline, DDIMScheduler +has_cuda = torch.cuda.is_available() +device = torch.device('cpu' if not has_cuda else 'cuda') +pipe = DiffusionPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + safety_checker=None, + use_auth_token=True, + custom_pipeline="imagic_stable_diffusion", + scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False) +).to(device) +generator = th.Generator("cuda").manual_seed(0) +seed = 0 +prompt = "A photo of Barack Obama smiling with a big grin" +url = 'https://www.dropbox.com/s/6tlwzr73jd1r9yk/obama.png?dl=1' +response = requests.get(url) +init_image = Image.open(BytesIO(response.content)).convert("RGB") +init_image = init_image.resize((512, 512)) +res = pipe.train( + prompt, + init_image, + guidance_scale=7.5, + num_inference_steps=50, + generator=generator) +res = pipe(alpha=1) +image = res.images[0] +image.save('./imagic/imagic_image_alpha_1.png') +res = pipe(alpha=1.5) +image = res.images[0] +image.save('./imagic/imagic_image_alpha_1_5.png') +res = pipe(alpha=2) +image = res.images[0] +image.save('./imagic/imagic_image_alpha_2.png') +``` + ### Seed Resizing Test seed resizing. Originally generate an image in 512 by 512, then generate image with same seed at 512 by 592 using seed resizing. Finally, generate 512 by 592 using original stable diffusion pipeline. @@ -456,4 +500,4 @@ res = pipe_compare( image = res.images[0] image.save('./seed_resize/seed_resize_{w}_{h}_image_compare.png'.format(w=width, h=height)) -``` \ No newline at end of file +``` diff --git a/examples/community/imagic_stable_diffusion.py b/examples/community/imagic_stable_diffusion.py new file mode 100644 index 0000000000..92aa677b46 --- /dev/null +++ b/examples/community/imagic_stable_diffusion.py @@ -0,0 +1,476 @@ +""" + modeled after the textual_inversion.py / train_dreambooth.py and the work + of justinpinkney here: https://github.com/justinpinkney/stable-diffusion/blob/main/notebooks/imagic.ipynb +""" +import inspect +import warnings +from typing import List, Optional, Union + +import numpy as np +import torch +import torch.nn.functional as F + +import PIL +from accelerate import Accelerator +from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput +from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker +from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler +from diffusers.utils import logging +from tqdm.auto import tqdm +from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def preprocess(image): + w, h = image.size + w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + image = image.resize((w, h), resample=PIL.Image.LANCZOS) + image = np.array(image).astype(np.float32) / 255.0 + image = image[None].transpose(0, 3, 1, 2) + image = torch.from_numpy(image) + return 2.0 * image - 1.0 + + +class ImagicStableDiffusionPipeline(DiffusionPipeline): + r""" + Pipeline for imagic image editing. + See paper here: https://arxiv.org/pdf/2210.09276.pdf + + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + Args: + vae ([`AutoencoderKL`]): + Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. + text_encoder ([`CLIPTextModel`]): + Frozen text-encoder. Stable Diffusion uses the text portion of + [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically + the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant. + tokenizer (`CLIPTokenizer`): + Tokenizer of class + [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). + unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. + scheduler ([`SchedulerMixin`]): + A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. + safety_checker ([`StableDiffusionSafetyChecker`]): + Classification module that estimates whether generated images could be considered offsensive or harmful. + Please, refer to the [model card](https://huggingface.co/CompVis/stable-diffusion-v1-4) for details. + feature_extractor ([`CLIPFeatureExtractor`]): + Model that extracts features from generated images to be used as inputs for the `safety_checker`. + """ + + def __init__( + self, + vae: AutoencoderKL, + text_encoder: CLIPTextModel, + tokenizer: CLIPTokenizer, + unet: UNet2DConditionModel, + scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], + safety_checker: StableDiffusionSafetyChecker, + feature_extractor: CLIPFeatureExtractor, + ): + super().__init__() + self.register_modules( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=safety_checker, + feature_extractor=feature_extractor, + ) + + def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): + r""" + Enable sliced attention computation. + When this option is enabled, the attention module will split the input tensor in slices, to compute attention + in several steps. This is useful to save some memory in exchange for a small speed decrease. + Args: + slice_size (`str` or `int`, *optional*, defaults to `"auto"`): + When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If + a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, + `attention_head_dim` must be a multiple of `slice_size`. + """ + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = self.unet.config.attention_head_dim // 2 + self.unet.set_attention_slice(slice_size) + + def disable_attention_slicing(self): + r""" + Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go + back to computing attention in one step. + """ + # set slice_size = `None` to disable `attention slicing` + self.enable_attention_slicing(None) + + def train( + self, + prompt: Union[str, List[str]], + init_image: Union[torch.FloatTensor, PIL.Image.Image], + height: Optional[int] = 512, + width: Optional[int] = 512, + generator: Optional[torch.Generator] = None, + embedding_learning_rate: float = 0.001, + diffusion_model_learning_rate: float = 2e-6, + text_embedding_optimization_steps: int = 500, + model_fine_tuning_optimization_steps: int = 1000, + **kwargs, + ): + r""" + Function invoked when calling the pipeline for generation. + Args: + prompt (`str` or `List[str]`): + The prompt or prompts to guide the image generation. + height (`int`, *optional*, defaults to 512): + The height in pixels of the generated image. + width (`int`, *optional*, defaults to 512): + The width in pixels of the generated image. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + eta (`float`, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by sampling using the supplied random `generator`. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a + plain tuple. + Returns: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. + When returning a tuple, the first element is a list with the generated images, and the second element is a + list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" + (nsfw) content, according to the `safety_checker`. + """ + accelerator = Accelerator( + gradient_accumulation_steps=1, + mixed_precision="fp16", + ) + + if "torch_device" in kwargs: + device = kwargs.pop("torch_device") + warnings.warn( + "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0." + " Consider using `pipe.to(torch_device)` instead." + ) + + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + self.to(device) + + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + + # Freeze vae and unet + self.vae.requires_grad_(False) + self.unet.requires_grad_(False) + self.text_encoder.requires_grad_(False) + self.unet.eval() + self.vae.eval() + self.text_encoder.eval() + + if accelerator.is_main_process: + accelerator.init_trackers( + "imagic", + config={ + "embedding_learning_rate": embedding_learning_rate, + "text_embedding_optimization_steps": text_embedding_optimization_steps, + }, + ) + + # get text embeddings for prompt + text_input = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + truncaton=True, + return_tensors="pt", + ) + text_embeddings = torch.nn.Parameter( + self.text_encoder(text_input.input_ids.to(self.device))[0], requires_grad=True + ) + text_embeddings = text_embeddings.detach() + text_embeddings.requires_grad_() + text_embeddings_orig = text_embeddings.clone() + + # Initialize the optimizer + optimizer = torch.optim.Adam( + [text_embeddings], # only optimize the embeddings + lr=embedding_learning_rate, + ) + + if isinstance(init_image, PIL.Image.Image): + init_image = preprocess(init_image) + + latents_dtype = text_embeddings.dtype + init_image = init_image.to(device=self.device, dtype=latents_dtype) + init_latent_image_dist = self.vae.encode(init_image).latent_dist + init_image_latents = init_latent_image_dist.sample(generator=generator) + init_image_latents = 0.18215 * init_image_latents + + progress_bar = tqdm(range(text_embedding_optimization_steps), disable=not accelerator.is_local_main_process) + progress_bar.set_description("Steps") + + global_step = 0 + + logger.info("First optimizing the text embedding to better reconstruct the init image") + for _ in range(text_embedding_optimization_steps): + with accelerator.accumulate(text_embeddings): + # Sample noise that we'll add to the latents + noise = torch.randn(init_image_latents.shape).to(init_image_latents.device) + timesteps = torch.randint(1000, (1,), device=init_image_latents.device) + + # Add noise to the latents according to the noise magnitude at each timestep + # (this is the forward diffusion process) + noisy_latents = self.scheduler.add_noise(init_image_latents, noise, timesteps) + + # Predict the noise residual + noise_pred = self.unet(noisy_latents, timesteps, text_embeddings).sample + + loss = F.mse_loss(noise_pred, noise, reduction="none").mean([1, 2, 3]).mean() + accelerator.backward(loss) + + optimizer.step() + optimizer.zero_grad() + + # Checks if the accelerator has performed an optimization step behind the scenes + if accelerator.sync_gradients: + progress_bar.update(1) + global_step += 1 + + logs = {"loss": loss.detach().item()} # , "lr": lr_scheduler.get_last_lr()[0]} + progress_bar.set_postfix(**logs) + accelerator.log(logs, step=global_step) + + accelerator.wait_for_everyone() + + text_embeddings.requires_grad_(False) + + # Now we fine tune the unet to better reconstruct the image + self.unet.requires_grad_(True) + self.unet.train() + optimizer = torch.optim.Adam( + self.unet.parameters(), # only optimize unet + lr=diffusion_model_learning_rate, + ) + progress_bar = tqdm(range(model_fine_tuning_optimization_steps), disable=not accelerator.is_local_main_process) + + logger.info("Next fine tuning the entire model to better reconstruct the init image") + for _ in range(model_fine_tuning_optimization_steps): + with accelerator.accumulate(self.unet.parameters()): + # Sample noise that we'll add to the latents + noise = torch.randn(init_image_latents.shape).to(init_image_latents.device) + timesteps = torch.randint(1000, (1,), device=init_image_latents.device) + + # Add noise to the latents according to the noise magnitude at each timestep + # (this is the forward diffusion process) + noisy_latents = self.scheduler.add_noise(init_image_latents, noise, timesteps) + + # Predict the noise residual + noise_pred = self.unet(noisy_latents, timesteps, text_embeddings).sample + + loss = F.mse_loss(noise_pred, noise, reduction="none").mean([1, 2, 3]).mean() + accelerator.backward(loss) + + optimizer.step() + optimizer.zero_grad() + + # Checks if the accelerator has performed an optimization step behind the scenes + if accelerator.sync_gradients: + progress_bar.update(1) + global_step += 1 + + logs = {"loss": loss.detach().item()} # , "lr": lr_scheduler.get_last_lr()[0]} + progress_bar.set_postfix(**logs) + accelerator.log(logs, step=global_step) + + accelerator.wait_for_everyone() + self.text_embeddings_orig = text_embeddings_orig + self.text_embeddings = text_embeddings + + @torch.no_grad() + def __call__( + self, + alpha: float = 1.2, + height: Optional[int] = 512, + width: Optional[int] = 512, + num_inference_steps: Optional[int] = 50, + generator: Optional[torch.Generator] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + guidance_scale: float = 7.5, + eta: float = 0.0, + **kwargs, + ): + r""" + Function invoked when calling the pipeline for generation. + Args: + prompt (`str` or `List[str]`): + The prompt or prompts to guide the image generation. + height (`int`, *optional*, defaults to 512): + The height in pixels of the generated image. + width (`int`, *optional*, defaults to 512): + The width in pixels of the generated image. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + eta (`float`, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by sampling using the supplied random `generator`. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a + plain tuple. + Returns: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. + When returning a tuple, the first element is a list with the generated images, and the second element is a + list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" + (nsfw) content, according to the `safety_checker`. + """ + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + if self.text_embeddings is None: + raise ValueError("Please run the pipe.train() before trying to generate an image.") + if self.text_embeddings_orig is None: + raise ValueError("Please run the pipe.train() before trying to generate an image.") + + text_embeddings = alpha * self.text_embeddings_orig + (1 - alpha) * self.text_embeddings + + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens = [""] + max_length = self.tokenizer.model_max_length + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.view(1, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + # get the initial random noise unless the user supplied it + + # Unlike in other pipelines, latents need to be generated in the target device + # for 1-to-1 results reproducibility with the CompVis implementation. + # However this currently doesn't work in `mps`. + latents_shape = (1, self.unet.in_channels, height // 8, width // 8) + latents_dtype = text_embeddings.dtype + if self.device.type == "mps": + # randn does not exist on mps + latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to( + self.device + ) + else: + latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype) + + # set timesteps + self.scheduler.set_timesteps(num_inference_steps) + + # Some schedulers like PNDM have timesteps as arrays + # It's more optimized to move all timesteps to correct device beforehand + timesteps_tensor = self.scheduler.timesteps.to(self.device) + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + for i, t in enumerate(self.progress_bar(timesteps_tensor)): + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + + image = (image / 2 + 0.5).clamp(0, 1) + + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( + self.device + ) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) + ) + else: + has_nsfw_concept = None + + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image, has_nsfw_concept) + + return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) From 98c42134a5615e1c26f2cca70ff9a4c142850f65 Mon Sep 17 00:00:00 2001 From: MatthieuTPHR <75613333+MatthieuTPHR@users.noreply.github.com> Date: Wed, 2 Nov 2022 10:29:06 +0100 Subject: [PATCH 02/88] Up to 2x speedup on GPUs using memory efficient attention (#532) * 2x speedup using memory efficient attention * remove einops dependency * Swap K, M in op instantiation * Simplify code, remove unnecessary maybe_init call and function, remove unused self.scale parameter * make xformers a soft dependency * remove one-liner functions * change one letter variable to appropriate names * Remove Env variable dependency, remove MemoryEfficientCrossAttention class and use enable_xformers_memory_efficient_attention method * Add memory efficient attention toggle to img2img and inpaint pipelines * Clearer management of xformers' availability * update optimizations markdown to add info about memory efficient attention * add benchmarks for TITAN RTX * More detailed explanation of how the mem eff benchmark were ran * Removing autocast from optimization markdown * import_utils: import torch only if is available Co-authored-by: Nouamane Tazi --- docs/source/optimization/fp16.mdx | 39 +++++++++++++ src/diffusers/models/attention.py | 55 +++++++++++++++++-- src/diffusers/models/unet_2d_blocks.py | 12 ++++ src/diffusers/models/unet_2d_condition.py | 11 ++++ .../pipeline_stable_diffusion.py | 18 ++++++ .../pipeline_stable_diffusion_img2img.py | 18 ++++++ .../pipeline_stable_diffusion_inpaint.py | 18 ++++++ src/diffusers/utils/import_utils.py | 16 ++++++ 8 files changed, 183 insertions(+), 4 deletions(-) diff --git a/docs/source/optimization/fp16.mdx b/docs/source/optimization/fp16.mdx index f12c067ba5..4371daacc9 100644 --- a/docs/source/optimization/fp16.mdx +++ b/docs/source/optimization/fp16.mdx @@ -22,6 +22,7 @@ We present some techniques and ideas to optimize 🤗 Diffusers _inference_ for | fp16 | 3.61s | x2.63 | | channels last | 3.30s | x2.88 | | traced UNet | 3.21s | x2.96 | +| memory efficient attention | 2.63s | x3.61 | obtained on NVIDIA TITAN RTX by generating a single image of size 512x512 from @@ -290,3 +291,41 @@ pipe.unet = TracedUNet() with torch.inference_mode(): image = pipe([prompt] * 1, num_inference_steps=50).images[0] ``` + + +## Memory Efficient Attention +Recent work on optimizing the bandwitdh in the attention block have generated huge speed ups and gains in GPU memory usage. The most recent being Flash Attention (from @tridao, [code](https://github.com/HazyResearch/flash-attention), [paper](https://arxiv.org/pdf/2205.14135.pdf)) . +Here are the speedups we obtain on a few Nvidia GPUs when running the inference at 512x512 with a batch size of 1 (one prompt): + +| GPU | Base Attention FP16 | Memory Efficient Attention FP16 | +|------------------ |--------------------- |--------------------------------- | +| NVIDIA Tesla T4 | 3.5it/s | 5.5it/s | +| NVIDIA 3060 RTX | 4.6it/s | 7.8it/s | +| NVIDIA A10G | 8.88it/s | 15.6it/s | +| NVIDIA RTX A6000 | 11.7it/s | 21.09it/s | +| NVIDIA TITAN RTX | 12.51it/s | 18.22it/s | +| A100-SXM4-40GB | 18.6it/s | 29.it/s | +| A100-SXM-80GB | 18.7it/s | 29.5it/s | + +To leverage it just make sure you have: + - PyTorch > 1.12 + - Cuda available + - Installed the [xformers](https://github.com/facebookresearch/xformers) library +```python +from diffusers import StableDiffusionPipeline +import torch + +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + revision="fp16", + torch_dtype=torch.float16, +).to("cuda") + +pipe.enable_xformers_memory_efficient_attention() + +with torch.inference_mode(): + sample = pipe("a small cat") + +# optional: You can disable it via +# pipe.disable_xformers_memory_efficient_attention() +``` \ No newline at end of file diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py index af441ef861..1f9cf641c3 100644 --- a/src/diffusers/models/attention.py +++ b/src/diffusers/models/attention.py @@ -18,6 +18,15 @@ import torch import torch.nn.functional as F from torch import nn +from diffusers.utils.import_utils import is_xformers_available + + +if is_xformers_available(): + import xformers + import xformers.ops +else: + xformers = None + class AttentionBlock(nn.Module): """ @@ -150,6 +159,10 @@ class SpatialTransformer(nn.Module): for block in self.transformer_blocks: block._set_attention_slice(slice_size) + def _set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): + for block in self.transformer_blocks: + block._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) + def forward(self, hidden_states, context=None): # note: if no context is given, cross-attention defaults to self-attention batch, channel, height, weight = hidden_states.shape @@ -206,6 +219,32 @@ class BasicTransformerBlock(nn.Module): self.attn1._slice_size = slice_size self.attn2._slice_size = slice_size + def _set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): + if not is_xformers_available(): + print("Here is how to install it") + raise ModuleNotFoundError( + "Refer to https://github.com/facebookresearch/xformers for more information on how to install" + " xformers", + name="xformers", + ) + elif not torch.cuda.is_available(): + raise ValueError( + "torch.cuda.is_available() should be True but is False. xformers' memory efficient attention is only" + " available for GPU " + ) + else: + try: + # Make sure we can run the memory efficient attention + _ = xformers.ops.memory_efficient_attention( + torch.randn((1, 2, 40), device="cuda"), + torch.randn((1, 2, 40), device="cuda"), + torch.randn((1, 2, 40), device="cuda"), + ) + except Exception as e: + raise e + self.attn1._use_memory_efficient_attention_xformers = use_memory_efficient_attention_xformers + self.attn2._use_memory_efficient_attention_xformers = use_memory_efficient_attention_xformers + def forward(self, hidden_states, context=None): hidden_states = self.attn1(self.norm1(hidden_states)) + hidden_states hidden_states = self.attn2(self.norm2(hidden_states), context=context) + hidden_states @@ -239,6 +278,7 @@ class CrossAttention(nn.Module): # is split across the batch axis to save memory # You can set slice_size with `set_attention_slice` self._slice_size = None + self._use_memory_efficient_attention_xformers = False self.to_q = nn.Linear(query_dim, inner_dim, bias=False) self.to_k = nn.Linear(context_dim, inner_dim, bias=False) @@ -279,11 +319,13 @@ class CrossAttention(nn.Module): # TODO(PVP) - mask is currently never used. Remember to re-implement when used # attention, what we cannot get enough of - - if self._slice_size is None or query.shape[0] // self._slice_size == 1: - hidden_states = self._attention(query, key, value) + if self._use_memory_efficient_attention_xformers: + hidden_states = self._memory_efficient_attention_xformers(query, key, value) else: - hidden_states = self._sliced_attention(query, key, value, sequence_length, dim) + if self._slice_size is None or query.shape[0] // self._slice_size == 1: + hidden_states = self._attention(query, key, value) + else: + hidden_states = self._sliced_attention(query, key, value, sequence_length, dim) # linear proj hidden_states = self.to_out[0](hidden_states) @@ -341,6 +383,11 @@ class CrossAttention(nn.Module): hidden_states = self.reshape_batch_dim_to_heads(hidden_states) return hidden_states + def _memory_efficient_attention_xformers(self, query, key, value): + hidden_states = xformers.ops.memory_efficient_attention(query, key, value, attn_bias=None) + hidden_states = self.reshape_batch_dim_to_heads(hidden_states) + return hidden_states + class FeedForward(nn.Module): r""" diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index f4081c5c1c..ae4fe2d8bb 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -367,6 +367,10 @@ class UNetMidBlock2DCrossAttn(nn.Module): for attn in self.attentions: attn._set_attention_slice(slice_size) + def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): + for attn in self.attentions: + attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): hidden_states = self.resnets[0](hidden_states, temb) for attn, resnet in zip(self.attentions, self.resnets[1:]): @@ -542,6 +546,10 @@ class CrossAttnDownBlock2D(nn.Module): for attn in self.attentions: attn._set_attention_slice(slice_size) + def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): + for attn in self.attentions: + attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): output_states = () @@ -1117,6 +1125,10 @@ class CrossAttnUpBlock2D(nn.Module): self.gradient_checkpointing = False + def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): + for attn in self.attentions: + attn._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) + def forward( self, hidden_states, diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py index d271b78a65..7f7f3ecd44 100644 --- a/src/diffusers/models/unet_2d_condition.py +++ b/src/diffusers/models/unet_2d_condition.py @@ -225,6 +225,17 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin): if hasattr(block, "attentions") and block.attentions is not None: block.set_attention_slice(slice_size) + def set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): + for block in self.down_blocks: + if hasattr(block, "attentions") and block.attentions is not None: + block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) + + self.mid_block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) + + for block in self.up_blocks: + if hasattr(block, "attentions") and block.attentions is not None: + block.set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) + def _set_gradient_checkpointing(self, module, value=False): if isinstance(module, (CrossAttnDownBlock2D, DownBlock2D, CrossAttnUpBlock2D, UpBlock2D)): module.gradient_checkpointing = value diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 5927f36b12..3c1eb734a4 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -113,6 +113,24 @@ class StableDiffusionPipeline(DiffusionPipeline): feature_extractor=feature_extractor, ) + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.unet.set_use_memory_efficient_attention_xformers(True) + + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.unet.set_use_memory_efficient_attention_xformers(False) + def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 00c364f8e5..e61fb27acc 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -151,6 +151,24 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): # set slice_size = `None` to disable `set_attention_slice` self.enable_attention_slicing(None) + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.unet.set_use_memory_efficient_attention_xformers(True) + + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.unet.set_use_memory_efficient_attention_xformers(False) + @torch.no_grad() def __call__( self, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 57f9b65716..bbe6ee6083 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -151,6 +151,24 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.unet.set_use_memory_efficient_attention_xformers(True) + + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.unet.set_use_memory_efficient_attention_xformers(False) + @torch.no_grad() def __call__( self, diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py index 2a5f7f64dd..4ea02dcc94 100644 --- a/src/diffusers/utils/import_utils.py +++ b/src/diffusers/utils/import_utils.py @@ -168,6 +168,18 @@ try: except importlib_metadata.PackageNotFoundError: _accelerate_available = False +_xformers_available = importlib.util.find_spec("xformers") is not None +try: + _xformers_version = importlib_metadata.version("xformers") + if _torch_available: + import torch + + if torch.__version__ < version.Version("1.12"): + raise ValueError("PyTorch should be >= 1.12") + logger.debug(f"Successfully imported xformers version {_xformers_version}") +except importlib_metadata.PackageNotFoundError: + _xformers_available = False + def is_torch_available(): return _torch_available @@ -205,6 +217,10 @@ def is_scipy_available(): return _scipy_available +def is_xformers_available(): + return _xformers_available + + def is_accelerate_available(): return _accelerate_available From 86087957117649524c671563e3e051eadfdad5e8 Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Wed, 2 Nov 2022 11:32:46 +0100 Subject: [PATCH 03/88] [docs] add euler scheduler in docs, how to use differnet schedulers (#1089) * add euler scheduler in docs * add a section for how to use different scheds * address patrck's comments --- docs/source/api/pipelines/stable_diffusion.mdx | 15 +++++++++++++++ docs/source/api/schedulers.mdx | 16 ++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/docs/source/api/pipelines/stable_diffusion.mdx b/docs/source/api/pipelines/stable_diffusion.mdx index 46ffc3412e..26d6a210ad 100644 --- a/docs/source/api/pipelines/stable_diffusion.mdx +++ b/docs/source/api/pipelines/stable_diffusion.mdx @@ -31,6 +31,21 @@ For more details about how Stable Diffusion works and how it differs from the ba ## Tips +### How to load and use different schedulers. + +The stable diffusion pipeline uses [`PNDMScheduler`] scheduler by default. But `diffusers` provides many other schedulers that can be used with the stable diffusion pipeline such as [`DDIMScheduler`], [`LMSDiscreteScheduler`], [`EulerDiscreteScheduler`], [`EulerAncestralDiscreteScheduler`] etc. +To use a different scheduler, you can pass the `scheduler` argument to `from_pretrained` method of the pipeline. For example, to use the [`EulerDiscreteScheduler`], you can do the following: + +```python +from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler + +euler_scheduler = EulerDiscreteScheduler.from_config("CompVis/stable-diffusion-v1-4", subfolder="scheduler") +pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", scheduler=euler_scheduler) +``` + + +### How to conver all use cases with multiple or single pipeline + If you want to use all possible use cases in a single `DiffusionPipeline` you can either: - Make use of the [Stable Diffusion Mega Pipeline](https://github.com/huggingface/diffusers/tree/main/examples/community#stable-diffusion-mega) or - Make use of the `components` functionality to instantiate all components in the most memory-efficient way: diff --git a/docs/source/api/schedulers.mdx b/docs/source/api/schedulers.mdx index 3f88e563de..6616a3e515 100644 --- a/docs/source/api/schedulers.mdx +++ b/docs/source/api/schedulers.mdx @@ -112,3 +112,19 @@ Score SDE-VP is under construction. [[autodoc]] schedulers.scheduling_sde_vp.ScoreSdeVpScheduler + + +#### Euler scheduler + +Euler scheduler (Algorithm 2) from the paper [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Karras et al. (2022). Based on the original [k-diffusion](https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51) implementation by Katherine Crowson. +Fast scheduler which often times generates good outputs with 20-30 steps. + +[[autodoc]] EulerDiscreteScheduler + + +#### Euler Ancestral scheduler + +Ancestral sampling with Euler method steps. Based on the original (k-diffusion)[https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72] implementation by Katherine Crowson. +Fast scheduler which often times generates good outputs with 20-30 steps. + +[[autodoc]] EulerAncestralDiscreteScheduler \ No newline at end of file From 8ee21915bfd5a81f1f6bb17dd8ac7c6ba5693ce4 Mon Sep 17 00:00:00 2001 From: Lewington-pitsos Date: Wed, 2 Nov 2022 21:47:26 +1100 Subject: [PATCH 04/88] Integration tests precision improvement for inpainting (#1052) * improve test precision get tests passing with greater precision using lewington images * make old numpy load function a wrapper around a more flexible numpy loading function * adhere to black formatting * add more black formatting * adhere to isort * loosen precision and replace path Co-authored-by: Patrick von Platen --- src/diffusers/utils/__init__.py | 1 + src/diffusers/utils/testing_utils.py | 31 +++++++++++++++---- tests/models/test_models_unet_2d.py | 14 +++++++-- tests/models/test_models_vae.py | 4 +-- .../test_stable_diffusion_inpaint.py | 27 +++++++--------- .../test_stable_diffusion_inpaint_legacy.py | 20 ++++++------ 6 files changed, 60 insertions(+), 37 deletions(-) diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py index 12d7311283..7395f4edfa 100644 --- a/src/diffusers/utils/__init__.py +++ b/src/diffusers/utils/__init__.py @@ -42,6 +42,7 @@ from .outputs import BaseOutput if is_torch_available(): from .testing_utils import ( floats_tensor, + load_hf_numpy, load_image, load_numpy, parse_flag_from_env, diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py index bd3b08d54a..bf398e5b6f 100644 --- a/src/diffusers/utils/testing_utils.py +++ b/src/diffusers/utils/testing_utils.py @@ -139,6 +139,29 @@ def require_onnxruntime(test_case): return unittest.skipUnless(is_onnx_available(), "test requires onnxruntime")(test_case) +def load_numpy(arry: Union[str, np.ndarray]) -> np.ndarray: + if isinstance(arry, str): + if arry.startswith("http://") or arry.startswith("https://"): + response = requests.get(arry) + response.raise_for_status() + arry = np.load(BytesIO(response.content)) + elif os.path.isfile(arry): + arry = np.load(arry) + else: + raise ValueError( + f"Incorrect path or url, URLs must start with `http://` or `https://`, and {arry} is not a valid path" + ) + elif isinstance(arry, np.ndarray): + pass + else: + raise ValueError( + "Incorrect format used for numpy ndarray. Should be an url linking to an image, a local path, or a" + " ndarray." + ) + + return arry + + def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image: """ Args: @@ -168,17 +191,13 @@ def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image: return image -def load_numpy(path) -> np.ndarray: +def load_hf_numpy(path) -> np.ndarray: if not path.startswith("http://") or path.startswith("https://"): path = os.path.join( "https://huggingface.co/datasets/fusing/diffusers-testing/resolve/main", urllib.parse.quote(path) ) - response = requests.get(path) - response.raise_for_status() - array = np.load(BytesIO(response.content)) - - return array + return load_numpy(path) # --- pytest conf functions --- # diff --git a/tests/models/test_models_unet_2d.py b/tests/models/test_models_unet_2d.py index 548588918c..20371708a4 100644 --- a/tests/models/test_models_unet_2d.py +++ b/tests/models/test_models_unet_2d.py @@ -21,7 +21,15 @@ import unittest import torch from diffusers import UNet2DConditionModel, UNet2DModel -from diffusers.utils import floats_tensor, load_numpy, logging, require_torch_gpu, slow, torch_all_close, torch_device +from diffusers.utils import ( + floats_tensor, + load_hf_numpy, + logging, + require_torch_gpu, + slow, + torch_all_close, + torch_device, +) from parameterized import parameterized from ..test_modeling_common import ModelTesterMixin @@ -423,7 +431,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): def get_latents(self, seed=0, shape=(4, 4, 64, 64), fp16=False): dtype = torch.float16 if fp16 else torch.float32 - image = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) + image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) return image def get_unet_model(self, fp16=False, model_id="CompVis/stable-diffusion-v1-4"): @@ -439,7 +447,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): def get_encoder_hidden_states(self, seed=0, shape=(4, 77, 768), fp16=False): dtype = torch.float16 if fp16 else torch.float32 - hidden_states = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) + hidden_states = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) return hidden_states @parameterized.expand( diff --git a/tests/models/test_models_vae.py b/tests/models/test_models_vae.py index f6333d6cd9..3da7b50e34 100644 --- a/tests/models/test_models_vae.py +++ b/tests/models/test_models_vae.py @@ -20,7 +20,7 @@ import torch from diffusers import AutoencoderKL from diffusers.modeling_utils import ModelMixin -from diffusers.utils import floats_tensor, load_numpy, require_torch_gpu, slow, torch_all_close, torch_device +from diffusers.utils import floats_tensor, load_hf_numpy, require_torch_gpu, slow, torch_all_close, torch_device from parameterized import parameterized from ..test_modeling_common import ModelTesterMixin @@ -147,7 +147,7 @@ class AutoencoderKLIntegrationTests(unittest.TestCase): def get_sd_image(self, seed=0, shape=(4, 3, 512, 512), fp16=False): dtype = torch.float16 if fp16 else torch.float32 - image = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) + image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) return image def get_sd_vae_model(self, model_id="CompVis/stable-diffusion-v1-4", fp16=False): diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index 0a373ada68..f5a8b3cf9e 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -28,7 +28,7 @@ from diffusers import ( UNet2DModel, VQModel, ) -from diffusers.utils import floats_tensor, load_image, slow, torch_device +from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device from diffusers.utils.testing_utils import require_torch_gpu from PIL import Image from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -278,11 +278,10 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/yellow_cat_sitting_on_a_park_bench.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/yellow_cat_sitting_on_a_park_bench.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "runwayml/stable-diffusion-inpainting" pipe = StableDiffusionInpaintPipeline.from_pretrained( @@ -307,7 +306,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_inpaint_pipeline_fp16(self): init_image = load_image( @@ -318,11 +317,10 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/yellow_cat_sitting_on_a_park_bench_fp16.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/yellow_cat_sitting_on_a_park_bench_fp16.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "runwayml/stable-diffusion-inpainting" pipe = StableDiffusionInpaintPipeline.from_pretrained( @@ -360,11 +358,10 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/yellow_cat_sitting_on_a_park_bench_pndm.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/yellow_cat_sitting_on_a_park_bench_pndm.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "runwayml/stable-diffusion-inpainting" pndm = PNDMScheduler.from_config(model_id, subfolder="scheduler") @@ -388,4 +385,4 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py index d25342a35a..81deba67f2 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py @@ -31,7 +31,7 @@ from diffusers import ( VQModel, ) from diffusers.utils import floats_tensor, load_image, slow, torch_device -from diffusers.utils.testing_utils import require_torch_gpu +from diffusers.utils.testing_utils import load_numpy, require_torch_gpu from PIL import Image from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -358,11 +358,10 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/red_cat_sitting_on_a_park_bench.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/red_cat_sitting_on_a_park_bench.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "CompVis/stable-diffusion-v1-4" pipe = StableDiffusionInpaintPipeline.from_pretrained( @@ -389,7 +388,7 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self): # TODO(Anton, Patrick) - I think we can remove this test soon @@ -401,11 +400,10 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/red_cat_sitting_on_a_park_bench_k_lms.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/red_cat_sitting_on_a_park_bench_k_lms.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "CompVis/stable-diffusion-v1-4" lms = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler") @@ -434,7 +432,7 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_inpaint_legacy_intermediate_state(self): number_of_steps = 0 From bdbcaa9852565d3854100f6504266ffcc7405b05 Mon Sep 17 00:00:00 2001 From: rafael Date: Wed, 2 Nov 2022 06:51:18 -0400 Subject: [PATCH 05/88] lpw_stable_diffusion: Add is_cancelled_callback (#1053) * [Community Pipelines] lpw_stable_diffusion: Add is_cancelled_callback * [Community pipelines] lpw_stable_diffusion_onnx: Add is_cancelled_callback --- examples/community/lpw_stable_diffusion.py | 12 ++++++++++-- examples/community/lpw_stable_diffusion_onnx.py | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index 1b2279de72..74aed2fec8 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -498,6 +498,7 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): output_type: Optional[str] = "pil", return_dict: bool = True, callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + is_cancelled_callback: Optional[Callable[[], bool]] = None, callback_steps: Optional[int] = 1, **kwargs, ): @@ -560,11 +561,15 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): callback (`Callable`, *optional*): A function that will be called every `callback_steps` steps during inference. The function will be called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. + is_cancelled_callback (`Callable`, *optional*): + A function that will be called every `callback_steps` steps during inference. If the function returns + `True`, the inference will be cancelled. callback_steps (`int`, *optional*, defaults to 1): The frequency at which the `callback` function will be called. If not specified, the callback will be called at every step. Returns: + `None` if cancelled by `is_cancelled_callback`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the generated images, and the second element is a @@ -757,8 +762,11 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): latents = (init_latents_proper * mask) + (latents * (1 - mask)) # call the callback, if provided - if callback is not None and i % callback_steps == 0: - callback(i, t, latents) + if i % callback_steps == 0: + if callback is not None: + callback(i, t, latents) + if is_cancelled_callback is not None and is_cancelled_callback(): + return None latents = 1 / 0.18215 * latents image = self.vae.decode(latents).sample diff --git a/examples/community/lpw_stable_diffusion_onnx.py b/examples/community/lpw_stable_diffusion_onnx.py index 37f03c86f2..69b942f9ef 100644 --- a/examples/community/lpw_stable_diffusion_onnx.py +++ b/examples/community/lpw_stable_diffusion_onnx.py @@ -435,6 +435,7 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): output_type: Optional[str] = "pil", return_dict: bool = True, callback: Optional[Callable[[int, int, np.ndarray], None]] = None, + is_cancelled_callback: Optional[Callable[[], bool]] = None, callback_steps: Optional[int] = 1, **kwargs, ): @@ -496,11 +497,15 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): callback (`Callable`, *optional*): A function that will be called every `callback_steps` steps during inference. The function will be called with the following arguments: `callback(step: int, timestep: int, latents: np.ndarray)`. + is_cancelled_callback (`Callable`, *optional*): + A function that will be called every `callback_steps` steps during inference. If the function returns + `True`, the inference will be cancelled. callback_steps (`int`, *optional*, defaults to 1): The frequency at which the `callback` function will be called. If not specified, the callback will be called at every step. Returns: + `None` if cancelled by `is_cancelled_callback`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the generated images, and the second element is a @@ -668,8 +673,11 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): latents = (init_latents_proper * mask) + (latents * (1 - mask)) # call the callback, if provided - if callback is not None and i % callback_steps == 0: - callback(i, t, latents) + if i % callback_steps == 0: + if callback is not None: + callback(i, t, latents) + if is_cancelled_callback is not None and is_cancelled_callback(): + return None latents = 1 / 0.18215 * latents # image = self.vae_decoder(latent_sample=latents)[0] From d53ffbbdf448eddb35d0e73868053be943b1e17a Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 2 Nov 2022 11:59:00 +0100 Subject: [PATCH 06/88] Rename latent (#1102) * Rename latent * uP --- docs/source/api/pipelines/overview.mdx | 2 +- src/diffusers/pipelines/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/api/pipelines/overview.mdx b/docs/source/api/pipelines/overview.mdx index af711a02d9..9bb351d93c 100644 --- a/docs/source/api/pipelines/overview.mdx +++ b/docs/source/api/pipelines/overview.mdx @@ -28,7 +28,7 @@ or created independently from each other. To that end, we strive to offer all open-sourced, state-of-the-art diffusion system under a unified API. More specifically, we strive to provide pipelines that -- 1. can load the officially published weights and yield 1-to-1 the same outputs as the original implementation according to the corresponding paper (*e.g.* [LatentDiffusionPipeline](https://github.com/huggingface/diffusers/tree/main/src/diffusers/pipelines/latent_diffusion), uses the officially released weights of [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)), +- 1. can load the officially published weights and yield 1-to-1 the same outputs as the original implementation according to the corresponding paper (*e.g.* [LDMTextToImagePipeline](https://github.com/huggingface/diffusers/tree/main/src/diffusers/pipelines/latent_diffusion), uses the officially released weights of [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)), - 2. have a simple user interface to run the model in inference (see the [Pipelines API](#pipelines-api) section), - 3. are easy to understand with code that is self-explanatory and can be read along-side the official paper (see [Pipelines summary](#pipelines-summary)), - 4. can easily be contributed by the community (see the [Contribution](#contribution) section). diff --git a/src/diffusers/pipelines/README.md b/src/diffusers/pipelines/README.md index 86048eb5a0..2941660fa2 100644 --- a/src/diffusers/pipelines/README.md +++ b/src/diffusers/pipelines/README.md @@ -16,7 +16,7 @@ or created independently from each other. To that end, we strive to offer all open-sourced, state-of-the-art diffusion system under a unified API. More specifically, we strive to provide pipelines that -- 1. can load the officially published weights and yield 1-to-1 the same outputs as the original implementation according to the corresponding paper (*e.g.* [LatentDiffusionPipeline](https://github.com/huggingface/diffusers/tree/main/src/diffusers/pipelines/latent_diffusion), uses the officially released weights of [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)), +- 1. can load the officially published weights and yield 1-to-1 the same outputs as the original implementation according to the corresponding paper (*e.g.* [LDMTextToImagePipeline](https://github.com/huggingface/diffusers/tree/main/src/diffusers/pipelines/latent_diffusion), uses the officially released weights of [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752)), - 2. have a simple user interface to run the model in inference (see the [Pipelines API](#pipelines-api) section), - 3. are easy to understand with code that is self-explanatory and can be read along-side the official paper (see [Pipelines summary](#pipelines-summary)), - 4. can easily be contributed by the community (see the [Contribution](#contribution) section). From 0025626cd9feca260e05f295f272af4f7b9ce44a Mon Sep 17 00:00:00 2001 From: Jonathan Rahn Date: Wed, 2 Nov 2022 13:15:30 +0100 Subject: [PATCH 07/88] fix typo in examples dreambooth README.md (#1073) Update README.md fixed typo --- examples/dreambooth/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/dreambooth/README.md b/examples/dreambooth/README.md index 5b091aece3..3c9d04abc2 100644 --- a/examples/dreambooth/README.md +++ b/examples/dreambooth/README.md @@ -185,7 +185,7 @@ accelerate launch train_dreambooth.py \ --class_prompt="a photo of dog" \ --resolution=512 \ --train_batch_size=1 \ - --use_8bit_adam + --use_8bit_adam \ --gradient_checkpointing \ --learning_rate=2e-6 \ --lr_scheduler="constant" \ @@ -291,4 +291,4 @@ python train_dreambooth_flax.py \ --learning_rate=2e-6 \ --num_class_images=200 \ --max_train_steps=800 -``` \ No newline at end of file +``` From b1ec61ee45e14403f6f5e81a5097a50e8f5385a7 Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Wed, 2 Nov 2022 14:02:52 +0100 Subject: [PATCH 08/88] fix model card url in text inversion readme. (#1103) Update README.md --- examples/textual_inversion/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/textual_inversion/README.md b/examples/textual_inversion/README.md index 9f3aa67145..2edf34cb49 100644 --- a/examples/textual_inversion/README.md +++ b/examples/textual_inversion/README.md @@ -29,7 +29,7 @@ accelerate config ### Cat toy example -You need to accept the model license before downloading or using the weights. In this example we'll use model version `v1-4`, so you'll need to visit [its card](https://huggingface.co/CompVis/stable-diffusion-v1-4), read the license and tick the checkbox if you agree. +You need to accept the model license before downloading or using the weights. In this example we'll use model version `v1-5`, so you'll need to visit [its card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work. For more information on access tokens, please refer to [this section of the documentation](https://huggingface.co/docs/hub/security-tokens). @@ -111,4 +111,4 @@ python textual_inversion_flax.py \ --learning_rate=5.0e-04 --scale_lr \ --output_dir="textual_inversion_cat" ``` -It should be at least 70% faster than the PyTorch script with the same configuration. \ No newline at end of file +It should be at least 70% faster than the PyTorch script with the same configuration. From 4e59bcc680ba4f68bc8d45249db5fe7a078413db Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Wed, 2 Nov 2022 14:07:07 +0100 Subject: [PATCH 09/88] [CI] Framework and hardware-specific CI tests (#997) * [WIP][CI] Framework and hardware-specific docker images for CI tests * username * fix cpu * try out the image * push latest * update workspace * no root isolation for actions * add a flax image * flax and onnx matrix * fix runners * add reports * onnxruntime image * retry tpu * fix * fix * build onnxruntime * naming * onnxruntime-gpu image * onnxruntime-gpu image, slow tests * latest jax version * trigger flax * run flax tests in one thread * fast flax tests on cpu * fast flax tests on cpu * trigger slow tests * rebuild torch cuda * force cuda provider * fix onnxruntime tests * trigger slow * don't specify gpu for tpu * optimize * memory limit * fix flax tests * disable docker cache --- .github/workflows/build_docker_images.yml | 50 ++++++++++ .github/workflows/pr_tests.yml | 78 +++++++++++++--- .github/workflows/push_tests.yml | 91 ++++++++++++++----- docker/diffusers-flax-cpu/Dockerfile | 42 +++++++++ docker/diffusers-flax-tpu/Dockerfile | 44 +++++++++ docker/diffusers-onnxruntime-cpu/Dockerfile | 42 +++++++++ docker/diffusers-onnxruntime-cuda/Dockerfile | 42 +++++++++ docker/diffusers-pytorch-cpu/Dockerfile | 41 +++++++++ docker/diffusers-pytorch-cuda/Dockerfile | 41 +++++++++ setup.py | 6 +- src/diffusers/dependency_versions_table.py | 5 +- .../test_onnx_stable_diffusion.py | 23 ++++- .../test_onnx_stable_diffusion_img2img.py | 21 ++++- .../test_onnx_stable_diffusion_inpaint.py | 22 ++++- tests/test_pipelines_flax.py | 6 +- tests/test_scheduler_flax.py | 53 ++++++++--- 16 files changed, 540 insertions(+), 67 deletions(-) create mode 100644 .github/workflows/build_docker_images.yml create mode 100644 docker/diffusers-flax-cpu/Dockerfile create mode 100644 docker/diffusers-flax-tpu/Dockerfile create mode 100644 docker/diffusers-onnxruntime-cpu/Dockerfile create mode 100644 docker/diffusers-onnxruntime-cuda/Dockerfile create mode 100644 docker/diffusers-pytorch-cpu/Dockerfile create mode 100644 docker/diffusers-pytorch-cuda/Dockerfile diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml new file mode 100644 index 0000000000..ff4bd66fdd --- /dev/null +++ b/.github/workflows/build_docker_images.yml @@ -0,0 +1,50 @@ +name: Build Docker images (nightly) + +on: + workflow_dispatch: + schedule: + - cron: "0 0 * * *" # every day at midnight + +concurrency: + group: docker-image-builds + cancel-in-progress: false + +env: + REGISTRY: diffusers + +jobs: + build-docker-images: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + strategy: + fail-fast: false + matrix: + image-name: + - diffusers-pytorch-cpu + - diffusers-pytorch-cuda + - diffusers-flax-cpu + - diffusers-flax-tpu + - diffusers-onnxruntime-cpu + - diffusers-onnxruntime-cuda + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ env.REGISTRY }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v3 + with: + no-cache: true + context: ./docker/${{ matrix.image-name }} + push: true + tags: ${{ env.REGISTRY }}/${{ matrix.image-name }}:latest diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index 163e24136f..242e9552d9 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -11,19 +11,45 @@ concurrency: env: DIFFUSERS_IS_CI: yes - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 + OMP_NUM_THREADS: 4 + MKL_NUM_THREADS: 4 PYTEST_TIMEOUT: 60 MPS_TORCH_VERSION: 1.13.0 jobs: - run_tests_cpu: - name: CPU tests on Ubuntu - runs-on: [ self-hosted, docker-gpu ] + run_fast_tests: + strategy: + fail-fast: false + matrix: + config: + - name: Fast PyTorch CPU tests on Ubuntu + framework: pytorch + runner: docker-cpu + image: diffusers/diffusers-pytorch-cpu + report: torch_cpu + - name: Fast Flax CPU tests on Ubuntu + framework: flax + runner: docker-cpu + image: diffusers/diffusers-flax-cpu + report: flax_cpu + - name: Fast ONNXRuntime CPU tests on Ubuntu + framework: onnxruntime + runner: docker-cpu + image: diffusers/diffusers-onnxruntime-cpu + report: onnx_cpu + + name: ${{ matrix.config.name }} + + runs-on: ${{ matrix.config.runner }} + container: - image: python:3.7 + image: ${{ matrix.config.image }} options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ + defaults: + run: + shell: bash + steps: - name: Checkout diffusers uses: actions/checkout@v3 @@ -32,8 +58,6 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu python -m pip install -e .[quality,test] python -m pip install git+https://github.com/huggingface/accelerate @@ -41,25 +65,49 @@ jobs: run: | python utils/print_env.py - - name: Run all fast tests on CPU + - name: Run fast PyTorch CPU tests + if: ${{ matrix.config.framework == 'pytorch' }} env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | - python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_cpu tests/ + python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "not Flax and not Onnx" \ + --make-reports=tests_${{ matrix.config.report }} \ + tests/ + + - name: Run fast Flax TPU tests + if: ${{ matrix.config.framework == 'flax' }} + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "Flax" \ + --make-reports=tests_${{ matrix.config.report }} \ + tests/ + + - name: Run fast ONNXRuntime CPU tests + if: ${{ matrix.config.framework == 'onnxruntime' }} + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "Onnx" \ + --make-reports=tests_${{ matrix.config.report }} \ + tests/ - name: Failure short reports if: ${{ failure() }} - run: cat reports/tests_torch_cpu_failures_short.txt + run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} uses: actions/upload-artifact@v2 with: - name: pr_torch_cpu_test_reports + name: pr_${{ matrix.config.report }}_test_reports path: reports - run_tests_apple_m1: - name: MPS tests on Apple M1 + run_fast_tests_apple_m1: + name: Fast PyTorch MPS tests on MacOS runs-on: [ self-hosted, apple-m1 ] steps: @@ -91,7 +139,7 @@ jobs: run: | ${CONDA_RUN} python utils/print_env.py - - name: Run all fast tests on MPS + - name: Run fast PyTorch tests on M1 (MPS) shell: arch -arch arm64 bash {0} env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 397fecb560..2beb05e8ea 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -14,12 +14,38 @@ env: RUN_SLOW: yes jobs: - run_tests_single_gpu: - name: Diffusers tests - runs-on: [ self-hosted, docker-gpu, single-gpu ] + run_slow_tests: + strategy: + fail-fast: false + matrix: + config: + - name: Slow PyTorch CUDA tests on Ubuntu + framework: pytorch + runner: docker-gpu + image: diffusers/diffusers-pytorch-cuda + report: torch_cuda + - name: Slow Flax TPU tests on Ubuntu + framework: flax + runner: docker-tpu + image: diffusers/diffusers-flax-tpu + report: flax_tpu + - name: Slow ONNXRuntime CUDA tests on Ubuntu + framework: onnxruntime + runner: docker-gpu + image: diffusers/diffusers-onnxruntime-cuda + report: onnx_cuda + + name: ${{ matrix.config.name }} + + runs-on: ${{ matrix.config.runner }} + container: - image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache + image: ${{ matrix.config.image }} + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ ${{ matrix.config.runner == 'docker-tpu' && '--privileged' || '--gpus 0'}} + + defaults: + run: + shell: bash steps: - name: Checkout diffusers @@ -28,14 +54,12 @@ jobs: fetch-depth: 2 - name: NVIDIA-SMI + if : ${{ matrix.config.runner == 'docker-gpu' }} run: | nvidia-smi - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip uninstall -y torch torchvision torchtext - python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu117 python -m pip install -e .[quality,test] python -m pip install git+https://github.com/huggingface/accelerate @@ -43,29 +67,55 @@ jobs: run: | python utils/print_env.py - - name: Run all (incl. slow) tests on GPU + - name: Run slow PyTorch CUDA tests + if: ${{ matrix.config.framework == 'pytorch' }} env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | - python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_gpu tests/ + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "not Flax and not Onnx" \ + --make-reports=tests_${{ matrix.config.report }} \ + tests/ + + - name: Run slow Flax TPU tests + if: ${{ matrix.config.framework == 'flax' }} + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 0 \ + -s -v -k "Flax" \ + --make-reports=tests_${{ matrix.config.report }} \ + tests/ + + - name: Run slow ONNXRuntime CUDA tests + if: ${{ matrix.config.framework == 'onnxruntime' }} + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + -s -v -k "Onnx" \ + --make-reports=tests_${{ matrix.config.report }} \ + tests/ - name: Failure short reports if: ${{ failure() }} - run: cat reports/tests_torch_gpu_failures_short.txt + run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} uses: actions/upload-artifact@v2 with: - name: torch_test_reports + name: ${{ matrix.config.report }}_test_reports path: reports - run_examples_single_gpu: - name: Examples tests - runs-on: [ self-hosted, docker-gpu, single-gpu ] + run_examples_tests: + name: Examples PyTorch CUDA tests on Ubuntu + + runs-on: docker-gpu + container: - image: nvcr.io/nvidia/pytorch:22.07-py3 - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache + image: diffusers/diffusers-pytorch-cuda + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ steps: - name: Checkout diffusers @@ -79,9 +129,6 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip uninstall -y torch torchvision torchtext - python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu117 python -m pip install -e .[quality,test,training] python -m pip install git+https://github.com/huggingface/accelerate @@ -93,11 +140,11 @@ jobs: env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | - python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_gpu examples/ + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/ - name: Failure short reports if: ${{ failure() }} - run: cat reports/examples_torch_gpu_failures_short.txt + run: cat reports/examples_torch_cuda_failures_short.txt - name: Test suite reports artifacts if: ${{ always() }} diff --git a/docker/diffusers-flax-cpu/Dockerfile b/docker/diffusers-flax-cpu/Dockerfile new file mode 100644 index 0000000000..a4b4ccd65b --- /dev/null +++ b/docker/diffusers-flax-cpu/Dockerfile @@ -0,0 +1,42 @@ +FROM ubuntu:20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + python3.8 \ + python3-pip \ + python3.8-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --upgrade --no-cache-dir \ + clu \ + "jax[cpu]>=0.2.16,!=0.3.2" \ + "flax>=0.4.1" \ + "jaxlib>=0.1.65" && \ + python3 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + modelcards \ + numpy \ + scipy \ + tensorboard \ + transformers + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/docker/diffusers-flax-tpu/Dockerfile b/docker/diffusers-flax-tpu/Dockerfile new file mode 100644 index 0000000000..5508af6622 --- /dev/null +++ b/docker/diffusers-flax-tpu/Dockerfile @@ -0,0 +1,44 @@ +FROM ubuntu:20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + python3.8 \ + python3-pip \ + python3.8-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +# follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir \ + "jax[tpu]>=0.2.16,!=0.3.2" \ + -f https://storage.googleapis.com/jax-releases/libtpu_releases.html && \ + python3 -m pip install --upgrade --no-cache-dir \ + clu \ + "flax>=0.4.1" \ + "jaxlib>=0.1.65" && \ + python3 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + modelcards \ + numpy \ + scipy \ + tensorboard \ + transformers + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/docker/diffusers-onnxruntime-cpu/Dockerfile b/docker/diffusers-onnxruntime-cpu/Dockerfile new file mode 100644 index 0000000000..c925715915 --- /dev/null +++ b/docker/diffusers-onnxruntime-cpu/Dockerfile @@ -0,0 +1,42 @@ +FROM ubuntu:20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + python3.8 \ + python3-pip \ + python3.8-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir \ + torch \ + torchvision \ + torchaudio \ + onnxruntime \ + --extra-index-url https://download.pytorch.org/whl/cpu && \ + python3 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + modelcards \ + numpy \ + scipy \ + tensorboard \ + transformers + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/docker/diffusers-onnxruntime-cuda/Dockerfile b/docker/diffusers-onnxruntime-cuda/Dockerfile new file mode 100644 index 0000000000..e51a5e0ba3 --- /dev/null +++ b/docker/diffusers-onnxruntime-cuda/Dockerfile @@ -0,0 +1,42 @@ +FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + python3.8 \ + python3-pip \ + python3.8-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir \ + torch \ + torchvision \ + torchaudio \ + "onnxruntime-gpu>=1.13.1" \ + --extra-index-url https://download.pytorch.org/whl/cu117 && \ + python3 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + modelcards \ + numpy \ + scipy \ + tensorboard \ + transformers + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/docker/diffusers-pytorch-cpu/Dockerfile b/docker/diffusers-pytorch-cpu/Dockerfile new file mode 100644 index 0000000000..41d1672f60 --- /dev/null +++ b/docker/diffusers-pytorch-cpu/Dockerfile @@ -0,0 +1,41 @@ +FROM ubuntu:20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + python3.8 \ + python3-pip \ + python3.8-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir \ + torch \ + torchvision \ + torchaudio \ + --extra-index-url https://download.pytorch.org/whl/cpu && \ + python3 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + modelcards \ + numpy \ + scipy \ + tensorboard \ + transformers + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/docker/diffusers-pytorch-cuda/Dockerfile b/docker/diffusers-pytorch-cuda/Dockerfile new file mode 100644 index 0000000000..ba80395c89 --- /dev/null +++ b/docker/diffusers-pytorch-cuda/Dockerfile @@ -0,0 +1,41 @@ +FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04 +LABEL maintainer="Hugging Face" +LABEL repository="diffusers" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + git-lfs \ + curl \ + ca-certificates \ + python3.8 \ + python3-pip \ + python3.8-venv && \ + rm -rf /var/lib/apt/lists + +# make sure to use venv +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir \ + torch \ + torchvision \ + torchaudio \ + --extra-index-url https://download.pytorch.org/whl/cu117 && \ + python3 -m pip install --no-cache-dir \ + accelerate \ + datasets \ + hf-doc-builder \ + huggingface-hub \ + modelcards \ + numpy \ + scipy \ + tensorboard \ + transformers + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/setup.py b/setup.py index 6f0742e83e..8904242a31 100644 --- a/setup.py +++ b/setup.py @@ -89,11 +89,10 @@ _deps = [ "huggingface-hub>=0.10.0", "importlib_metadata", "isort>=5.5.4", - "jax>=0.2.8,!=0.3.2,<=0.3.6", - "jaxlib>=0.1.65,<=0.3.6", + "jax>=0.2.8,!=0.3.2", + "jaxlib>=0.1.65", "modelcards>=0.1.4", "numpy", - "onnxruntime", "parameterized", "pytest", "pytest-timeout", @@ -181,7 +180,6 @@ extras["training"] = deps_list("accelerate", "datasets", "tensorboard", "modelca extras["test"] = deps_list( "accelerate", "datasets", - "onnxruntime", "parameterized", "pytest", "pytest-timeout", diff --git a/src/diffusers/dependency_versions_table.py b/src/diffusers/dependency_versions_table.py index 64e55e932c..59e13da0f2 100644 --- a/src/diffusers/dependency_versions_table.py +++ b/src/diffusers/dependency_versions_table.py @@ -13,11 +13,10 @@ deps = { "huggingface-hub": "huggingface-hub>=0.10.0", "importlib_metadata": "importlib_metadata", "isort": "isort>=5.5.4", - "jax": "jax>=0.2.8,!=0.3.2,<=0.3.6", - "jaxlib": "jaxlib>=0.1.65,<=0.3.6", + "jax": "jax>=0.2.8,!=0.3.2", + "jaxlib": "jaxlib>=0.1.65", "modelcards": "modelcards>=0.1.4", "numpy": "numpy", - "onnxruntime": "onnxruntime", "parameterized": "parameterized", "pytest": "pytest", "pytest-timeout": "pytest-timeout", diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py index 1275b7f980..d8356675e9 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py @@ -18,11 +18,15 @@ import unittest import numpy as np from diffusers import OnnxStableDiffusionPipeline -from diffusers.utils.testing_utils import require_onnxruntime, slow +from diffusers.utils.testing_utils import is_onnx_available, require_onnxruntime, require_torch_gpu, slow from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin +if is_onnx_available(): + import onnxruntime as ort + + class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): # FIXME: add fast tests pass @@ -30,10 +34,23 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes @slow @require_onnxruntime +@require_torch_gpu class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_inference(self): + provider = ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "17179869184", # 16GB. + "arena_extend_strategy": "kSameAsRequested", + }, + ) + options = ort.SessionOptions() + options.enable_mem_pattern = False sd_pipe = OnnxStableDiffusionPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider" + "CompVis/stable-diffusion-v1-4", + revision="onnx", + provider=provider, + sess_options=options, ) prompt = "A painting of a squirrel eating a burger" @@ -72,7 +89,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): test_callback_fn.has_been_called = False pipe = OnnxStableDiffusionPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider" + "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CUDAExecutionProvider" ) pipe.set_progress_bar_config(disable=None) diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py index 25f1b75742..3ffbfc3d4f 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py @@ -18,11 +18,15 @@ import unittest import numpy as np from diffusers import OnnxStableDiffusionImg2ImgPipeline -from diffusers.utils.testing_utils import load_image, require_onnxruntime, slow +from diffusers.utils.testing_utils import is_onnx_available, load_image, require_onnxruntime, require_torch_gpu, slow from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin +if is_onnx_available(): + import onnxruntime as ort + + class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): # FIXME: add fast tests pass @@ -30,6 +34,7 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes @slow @require_onnxruntime +@require_torch_gpu class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_inference(self): init_image = load_image( @@ -37,8 +42,20 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): "/img2img/sketch-mountains-input.jpg" ) init_image = init_image.resize((768, 512)) + provider = ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "17179869184", # 16GB. + "arena_extend_strategy": "kSameAsRequested", + }, + ) + options = ort.SessionOptions() + options.enable_mem_pattern = False pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider" + "CompVis/stable-diffusion-v1-4", + revision="onnx", + provider=provider, + sess_options=options, ) pipe.set_progress_bar_config(disable=None) diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py index 3f33022c11..81cbed4e51 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py @@ -18,11 +18,15 @@ import unittest import numpy as np from diffusers import OnnxStableDiffusionInpaintPipeline -from diffusers.utils.testing_utils import load_image, require_onnxruntime, slow +from diffusers.utils.testing_utils import is_onnx_available, load_image, require_onnxruntime, require_torch_gpu, slow from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin +if is_onnx_available(): + import onnxruntime as ort + + class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): # FIXME: add fast tests pass @@ -30,6 +34,7 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes @slow @require_onnxruntime +@require_torch_gpu class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_stable_diffusion_inpaint_onnx(self): init_image = load_image( @@ -40,9 +45,20 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - + provider = ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "17179869184", # 16GB. + "arena_extend_strategy": "kSameAsRequested", + }, + ) + options = ort.SessionOptions() + options.enable_mem_pattern = False pipe = OnnxStableDiffusionInpaintPipeline.from_pretrained( - "runwayml/stable-diffusion-inpainting", revision="onnx", provider="CPUExecutionProvider" + "runwayml/stable-diffusion-inpainting", + revision="onnx", + provider=provider, + sess_options=options, ) pipe.set_progress_bar_config(disable=None) diff --git a/tests/test_pipelines_flax.py b/tests/test_pipelines_flax.py index 436e139d91..ae52fa689b 100644 --- a/tests/test_pipelines_flax.py +++ b/tests/test_pipelines_flax.py @@ -59,9 +59,9 @@ class FlaxPipelineTests(unittest.TestCase): images = p_sample(prompt_ids, params, prng_seed, num_inference_steps).images - assert images.shape == (8, 1, 64, 64, 3) - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 4.151474)) < 1e-3 - assert np.abs((np.abs(images, dtype=np.float32).sum() - 49947.875)) < 5e-1 + assert images.shape == (8, 1, 128, 128, 3) + assert np.abs(np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 3.1111548) < 1e-3 + assert np.abs(np.abs(images, dtype=np.float32).sum() - 199746.95) < 5e-1 images_pil = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:]))) diff --git a/tests/test_scheduler_flax.py b/tests/test_scheduler_flax.py index d2feaa752a..d29a8bfcc2 100644 --- a/tests/test_scheduler_flax.py +++ b/tests/test_scheduler_flax.py @@ -22,9 +22,12 @@ from diffusers.utils.testing_utils import require_flax if is_flax_available(): + import jax import jax.numpy as jnp from jax import random + jax_device = jax.default_backend() + @require_flax class FlaxSchedulerCommonTest(unittest.TestCase): @@ -308,8 +311,12 @@ class FlaxDDPMSchedulerTest(FlaxSchedulerCommonTest): result_sum = jnp.sum(jnp.abs(sample)) result_mean = jnp.mean(jnp.abs(sample)) - assert abs(result_sum - 255.1113) < 1e-2 - assert abs(result_mean - 0.332176) < 1e-3 + if jax_device == "tpu": + assert abs(result_sum - 255.0714) < 1e-2 + assert abs(result_mean - 0.332124) < 1e-3 + else: + assert abs(result_sum - 255.1113) < 1e-2 + assert abs(result_mean - 0.332176) < 1e-3 @require_flax @@ -570,8 +577,12 @@ class FlaxDDIMSchedulerTest(FlaxSchedulerCommonTest): result_sum = jnp.sum(jnp.abs(sample)) result_mean = jnp.mean(jnp.abs(sample)) - assert abs(result_sum - 149.8295) < 1e-2 - assert abs(result_mean - 0.1951) < 1e-3 + if jax_device == "tpu": + assert abs(result_sum - 149.8409) < 1e-2 + assert abs(result_mean - 0.1951) < 1e-3 + else: + assert abs(result_sum - 149.8295) < 1e-2 + assert abs(result_mean - 0.1951) < 1e-3 def test_full_loop_with_no_set_alpha_to_one(self): # We specify different beta, so that the first alpha is 0.99 @@ -579,8 +590,14 @@ class FlaxDDIMSchedulerTest(FlaxSchedulerCommonTest): result_sum = jnp.sum(jnp.abs(sample)) result_mean = jnp.mean(jnp.abs(sample)) - assert abs(result_sum - 149.0784) < 1e-2 - assert abs(result_mean - 0.1941) < 1e-3 + if jax_device == "tpu": + pass + # FIXME: both result_sum and result_mean are nan on TPU + # assert jnp.isnan(result_sum) + # assert jnp.isnan(result_mean) + else: + assert abs(result_sum - 149.0784) < 1e-2 + assert abs(result_mean - 0.1941) < 1e-3 @require_flax @@ -841,8 +858,12 @@ class FlaxPNDMSchedulerTest(FlaxSchedulerCommonTest): result_sum = jnp.sum(jnp.abs(sample)) result_mean = jnp.mean(jnp.abs(sample)) - assert abs(result_sum - 198.1318) < 1e-2 - assert abs(result_mean - 0.2580) < 1e-3 + if jax_device == "tpu": + assert abs(result_sum - 198.1542) < 1e-2 + assert abs(result_mean - 0.2580) < 1e-3 + else: + assert abs(result_sum - 198.1318) < 1e-2 + assert abs(result_mean - 0.2580) < 1e-3 def test_full_loop_with_set_alpha_to_one(self): # We specify different beta, so that the first alpha is 0.99 @@ -850,8 +871,12 @@ class FlaxPNDMSchedulerTest(FlaxSchedulerCommonTest): result_sum = jnp.sum(jnp.abs(sample)) result_mean = jnp.mean(jnp.abs(sample)) - assert abs(result_sum - 186.9466) < 1e-2 - assert abs(result_mean - 0.24342) < 1e-3 + if jax_device == "tpu": + assert abs(result_sum - 185.4352) < 1e-2 + assert abs(result_mean - 0.24145) < 1e-3 + else: + assert abs(result_sum - 186.9466) < 1e-2 + assert abs(result_mean - 0.24342) < 1e-3 def test_full_loop_with_no_set_alpha_to_one(self): # We specify different beta, so that the first alpha is 0.99 @@ -859,5 +884,9 @@ class FlaxPNDMSchedulerTest(FlaxSchedulerCommonTest): result_sum = jnp.sum(jnp.abs(sample)) result_mean = jnp.mean(jnp.abs(sample)) - assert abs(result_sum - 186.9482) < 1e-2 - assert abs(result_mean - 0.2434) < 1e-3 + if jax_device == "tpu": + assert abs(result_sum - 185.4352) < 1e-2 + assert abs(result_mean - 0.2414) < 1e-3 + else: + assert abs(result_sum - 186.9482) < 1e-2 + assert abs(result_mean - 0.2434) < 1e-3 From 1216a3b122ba7f13062619c33254d2e737f379dc Mon Sep 17 00:00:00 2001 From: Omiita <77219025+omihub777@users.noreply.github.com> Date: Wed, 2 Nov 2022 22:46:52 +0900 Subject: [PATCH 10/88] Fix a small typo of a variable name (#1063) Fix a small typo fix a typo in `models/attention.py`. weight -> width --- src/diffusers/models/attention.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py index 1f9cf641c3..372c8492b4 100644 --- a/src/diffusers/models/attention.py +++ b/src/diffusers/models/attention.py @@ -165,15 +165,15 @@ class SpatialTransformer(nn.Module): def forward(self, hidden_states, context=None): # note: if no context is given, cross-attention defaults to self-attention - batch, channel, height, weight = hidden_states.shape + batch, channel, height, width = hidden_states.shape residual = hidden_states hidden_states = self.norm(hidden_states) hidden_states = self.proj_in(hidden_states) inner_dim = hidden_states.shape[1] - hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * weight, inner_dim) + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * width, inner_dim) for block in self.transformer_blocks: hidden_states = block(hidden_states, context=context) - hidden_states = hidden_states.reshape(batch, height, weight, inner_dim).permute(0, 3, 1, 2) + hidden_states = hidden_states.reshape(batch, height, width, inner_dim).permute(0, 3, 1, 2) hidden_states = self.proj_out(hidden_states) return hidden_states + residual From 5cd29d623ac38ccb3bdd8a5f654b85d4765d9751 Mon Sep 17 00:00:00 2001 From: Grigory Sizov Date: Wed, 2 Nov 2022 14:50:32 +0100 Subject: [PATCH 11/88] Fix tests for equivalence of DDIM and DDPM pipelines (#1069) * Fix equality test for ddim and ddpm * add docs for use_clipped_model_output in DDIM * fix inline comment * reorder imports in test_pipelines.py * Ignore use_clipped_model_output if scheduler doesn't take it --- src/diffusers/pipelines/ddim/pipeline_ddim.py | 16 +++++++-- src/diffusers/schedulers/scheduling_ddim.py | 5 ++- tests/test_pipelines.py | 33 ++++++++++++++----- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/diffusers/pipelines/ddim/pipeline_ddim.py b/src/diffusers/pipelines/ddim/pipeline_ddim.py index 74607fe87a..733a28c9f3 100644 --- a/src/diffusers/pipelines/ddim/pipeline_ddim.py +++ b/src/diffusers/pipelines/ddim/pipeline_ddim.py @@ -13,7 +13,7 @@ # limitations under the License. - +import inspect from typing import Optional, Tuple, Union import torch @@ -44,6 +44,7 @@ class DDIMPipeline(DiffusionPipeline): generator: Optional[torch.Generator] = None, eta: float = 0.0, num_inference_steps: int = 50, + use_clipped_model_output: Optional[bool] = None, output_type: Optional[str] = "pil", return_dict: bool = True, **kwargs, @@ -60,6 +61,9 @@ class DDIMPipeline(DiffusionPipeline): num_inference_steps (`int`, *optional*, defaults to 50): The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. + use_clipped_model_output (`bool`, *optional*, defaults to `None`): + if `True` or `False`, see documentation for `DDIMScheduler.step`. If `None`, nothing is passed + downstream to the scheduler. So use `None` for schedulers which don't support this argument. output_type (`str`, *optional*, defaults to `"pil"`): The output format of the generate image. Choose between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. @@ -82,6 +86,14 @@ class DDIMPipeline(DiffusionPipeline): # set step values self.scheduler.set_timesteps(num_inference_steps) + # Ignore use_clipped_model_output if the scheduler doesn't accept this argument + accepts_use_clipped_model_output = "use_clipped_model_output" in set( + inspect.signature(self.scheduler.step).parameters.keys() + ) + extra_kwargs = {} + if accepts_use_clipped_model_output: + extra_kwargs["use_clipped_model_output"] = use_clipped_model_output + for t in self.progress_bar(self.scheduler.timesteps): # 1. predict noise model_output model_output = self.unet(image, t).sample @@ -89,7 +101,7 @@ class DDIMPipeline(DiffusionPipeline): # 2. predict previous mean of image x_t-1 and add variance depending on eta # eta corresponds to η in paper and should be between [0, 1] # do x_t -> x_t-1 - image = self.scheduler.step(model_output, t, image, eta).prev_sample + image = self.scheduler.step(model_output, t, image, eta, **extra_kwargs).prev_sample image = (image / 2 + 0.5).clamp(0, 1) image = image.cpu().permute(0, 2, 3, 1).numpy() diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py index f95c18d9fa..23648d1bc3 100644 --- a/src/diffusers/schedulers/scheduling_ddim.py +++ b/src/diffusers/schedulers/scheduling_ddim.py @@ -220,7 +220,10 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): sample (`torch.FloatTensor`): current instance of sample being created by diffusion process. eta (`float`): weight of noise for added noise in diffusion step. - use_clipped_model_output (`bool`): TODO + use_clipped_model_output (`bool`): if `True`, compute "corrected" `model_output` from the clipped + predicted original sample. Necessary because predicted original sample is clipped to [-1, 1] when + `self.config.clip_sample` is `True`. If no clipping has happened, "corrected" `model_output` would + coincide with the one provided as input and `use_clipped_model_output` will have not effect. generator: random number generator. return_dict (`bool`): option for returning tuple rather than DDIMSchedulerOutput class diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index e355a19493..c11287339a 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -42,6 +42,7 @@ from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, WEIGHTS_NAME, floats_tensor, slow, torch_device from diffusers.utils.testing_utils import CaptureLogger, get_tests_dir +from parameterized import parameterized from PIL import Image from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -445,7 +446,9 @@ class PipelineSlowTests(unittest.TestCase): assert isinstance(images, list) assert isinstance(images[0], PIL.Image.Image) - def test_ddpm_ddim_equality(self): + # Make sure the test passes for different values of random seed + @parameterized.expand([(0,), (4,)]) + def test_ddpm_ddim_equality(self, seed): model_id = "google/ddpm-cifar10-32" unet = UNet2DModel.from_pretrained(model_id, device_map="auto") @@ -459,17 +462,24 @@ class PipelineSlowTests(unittest.TestCase): ddim.to(torch_device) ddim.set_progress_bar_config(disable=None) - generator = torch.manual_seed(0) + generator = torch.manual_seed(seed) ddpm_image = ddpm(generator=generator, output_type="numpy").images - generator = torch.manual_seed(0) - ddim_image = ddim(generator=generator, num_inference_steps=1000, eta=1.0, output_type="numpy").images + generator = torch.manual_seed(seed) + ddim_image = ddim( + generator=generator, + num_inference_steps=1000, + eta=1.0, + output_type="numpy", + use_clipped_model_output=True, # Need this to make DDIM match DDPM + ).images # the values aren't exactly equal, but the images look the same visually assert np.abs(ddpm_image - ddim_image).max() < 1e-1 - @unittest.skip("(Anton) The test is failing for large batch sizes, needs investigation") - def test_ddpm_ddim_equality_batched(self): + # Make sure the test passes for different values of random seed + @parameterized.expand([(0,), (4,)]) + def test_ddpm_ddim_equality_batched(self, seed): model_id = "google/ddpm-cifar10-32" unet = UNet2DModel.from_pretrained(model_id, device_map="auto") @@ -484,12 +494,17 @@ class PipelineSlowTests(unittest.TestCase): ddim.to(torch_device) ddim.set_progress_bar_config(disable=None) - generator = torch.manual_seed(0) + generator = torch.manual_seed(seed) ddpm_images = ddpm(batch_size=4, generator=generator, output_type="numpy").images - generator = torch.manual_seed(0) + generator = torch.manual_seed(seed) ddim_images = ddim( - batch_size=4, generator=generator, num_inference_steps=1000, eta=1.0, output_type="numpy" + batch_size=4, + generator=generator, + num_inference_steps=1000, + eta=1.0, + output_type="numpy", + use_clipped_model_output=True, # Need this to make DDIM match DDPM ).images # the values aren't exactly equal, but the images look the same visually From 33c487455e21d17b02aaca8cee22cbeca83f74ed Mon Sep 17 00:00:00 2001 From: Yuta Hayashibe Date: Thu, 3 Nov 2022 00:37:05 +0900 Subject: [PATCH 12/88] Fix padding in dreambooth (#1030) --- examples/dreambooth/train_dreambooth.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/dreambooth/train_dreambooth.py b/examples/dreambooth/train_dreambooth.py index c37197fdc6..9c512ef571 100644 --- a/examples/dreambooth/train_dreambooth.py +++ b/examples/dreambooth/train_dreambooth.py @@ -494,7 +494,12 @@ def main(args): pixel_values = torch.stack(pixel_values) pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float() - input_ids = tokenizer.pad({"input_ids": input_ids}, padding=True, return_tensors="pt").input_ids + input_ids = tokenizer.pad( + {"input_ids": input_ids}, + padding="max_length", + max_length=tokenizer.model_max_length, + return_tensors="pt", + ).input_ids batch = { "input_ids": input_ids, From 0b61cea347e9b464fb03506cb78a49d38e1c74ee Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Wed, 2 Nov 2022 18:54:30 +0300 Subject: [PATCH 13/88] [Flax] time embedding (#1081) * initial get_sinusoidal_embeddings * added asserts * better var name * fix docs --- src/diffusers/models/embeddings_flax.py | 50 +++++++++++++++++-------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/src/diffusers/models/embeddings_flax.py b/src/diffusers/models/embeddings_flax.py index e2d607499c..1e2272c1fe 100644 --- a/src/diffusers/models/embeddings_flax.py +++ b/src/diffusers/models/embeddings_flax.py @@ -17,23 +17,41 @@ import flax.linen as nn import jax.numpy as jnp -# This is like models.embeddings.get_timestep_embedding (PyTorch) but -# less general (only handles the case we currently need). -def get_sinusoidal_embeddings(timesteps, embedding_dim, freq_shift: float = 1): +def get_sinusoidal_embeddings( + timesteps: jnp.ndarray, + embedding_dim: int, + freq_shift: float = 1, + min_timescale: float = 1, + max_timescale: float = 1.0e4, + flip_sin_to_cos: bool = False, + scale: float = 1.0, +) -> jnp.ndarray: + """Returns the positional encoding (same as Tensor2Tensor). + Args: + timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + embedding_dim: The number of output channels. + min_timescale: The smallest time unit (should probably be 0.0). + max_timescale: The largest time unit. + Returns: + a Tensor of timing signals [N, num_channels] """ - This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. + assert timesteps.ndim == 1, "Timesteps should be a 1d-array" + assert embedding_dim % 2 == 0, f"Embedding dimension {embedding_dim} should be even" + num_timescales = float(embedding_dim // 2) + log_timescale_increment = math.log(max_timescale / min_timescale) / (num_timescales - freq_shift) + inv_timescales = min_timescale * jnp.exp(jnp.arange(num_timescales, dtype=jnp.float32) * -log_timescale_increment) + emb = jnp.expand_dims(timesteps, 1) * jnp.expand_dims(inv_timescales, 0) - :param timesteps: a 1-D tensor of N indices, one per batch element. - These may be fractional. - :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the - embeddings. :return: an [N x dim] tensor of positional embeddings. - """ - half_dim = embedding_dim // 2 - emb = math.log(10000) / (half_dim - freq_shift) - emb = jnp.exp(jnp.arange(half_dim) * -emb) - emb = timesteps[:, None] * emb[None, :] - emb = jnp.concatenate([jnp.cos(emb), jnp.sin(emb)], -1) - return emb + # scale embeddings + scaled_time = scale * emb + + if flip_sin_to_cos: + signal = jnp.concatenate([jnp.cos(scaled_time), jnp.sin(scaled_time)], axis=1) + else: + signal = jnp.concatenate([jnp.sin(scaled_time), jnp.cos(scaled_time)], axis=1) + signal = jnp.reshape(signal, [jnp.shape(timesteps)[0], embedding_dim]) + return signal class FlaxTimestepEmbedding(nn.Module): @@ -70,4 +88,4 @@ class FlaxTimesteps(nn.Module): @nn.compact def __call__(self, timesteps): - return get_sinusoidal_embeddings(timesteps, self.dim, freq_shift=self.freq_shift) + return get_sinusoidal_embeddings(timesteps, embedding_dim=self.dim, freq_shift=self.freq_shift) From cbcd0512f04064bb25206ad844ebc4a9a022cad1 Mon Sep 17 00:00:00 2001 From: Denis Date: Wed, 2 Nov 2022 17:43:40 +0100 Subject: [PATCH 14/88] Training to predict x0 in training example (#1031) * changed training example to add option to train model that predicts x0 (instead of eps), changed DDPM pipeline accordingly * Revert "changed training example to add option to train model that predicts x0 (instead of eps), changed DDPM pipeline accordingly" This reverts commit c5efb525648885f2e7df71f4483a9f248515ad61. * changed training example to add option to train model that predicts x0 (instead of eps), changed DDPM pipeline accordingly * fixed code style Co-authored-by: lukovnikov --- .../train_unconditional.py | 55 +++++++++++++++++-- src/diffusers/pipelines/ddpm/pipeline_ddpm.py | 5 +- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/examples/unconditional_image_generation/train_unconditional.py b/examples/unconditional_image_generation/train_unconditional.py index 2bc8114cac..3f9ffb11ef 100644 --- a/examples/unconditional_image_generation/train_unconditional.py +++ b/examples/unconditional_image_generation/train_unconditional.py @@ -29,6 +29,24 @@ from tqdm.auto import tqdm logger = get_logger(__name__) +def _extract_into_tensor(arr, timesteps, broadcast_shape): + """ + Extract values from a 1-D numpy array for a batch of indices. + + :param arr: the 1-D numpy array. + :param timesteps: a tensor of indices into the array to extract. + :param broadcast_shape: a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + if not isinstance(arr, torch.Tensor): + arr = torch.from_numpy(arr) + res = arr[timesteps].float().to(timesteps.device) + while len(res.shape) < len(broadcast_shape): + res = res[..., None] + return res.expand(broadcast_shape) + + def parse_args(): parser = argparse.ArgumentParser(description="Simple example of a training script.") parser.add_argument( @@ -171,6 +189,16 @@ def parse_args(): ), ) + parser.add_argument( + "--predict_mode", + type=str, + default="eps", + help="What the model should predict. 'eps' to predict error, 'x0' to directly predict reconstruction", + ) + + parser.add_argument("--ddpm_num_steps", type=int, default=1000) + parser.add_argument("--ddpm_beta_schedule", type=str, default="linear") + args = parser.parse_args() env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) if env_local_rank != -1 and env_local_rank != args.local_rank: @@ -224,7 +252,7 @@ def main(args): "UpBlock2D", ), ) - noise_scheduler = DDPMScheduler(num_train_timesteps=1000) + noise_scheduler = DDPMScheduler(num_train_timesteps=args.ddpm_num_steps, beta_schedule=args.ddpm_beta_schedule) optimizer = torch.optim.AdamW( model.parameters(), lr=args.learning_rate, @@ -257,6 +285,8 @@ def main(args): images = [augmentations(image.convert("RGB")) for image in examples["image"]] return {"input": images} + logger.info(f"Dataset size: {len(dataset)}") + dataset.set_transform(transforms) train_dataloader = torch.utils.data.DataLoader( dataset, batch_size=args.train_batch_size, shuffle=True, num_workers=args.dataloader_num_workers @@ -319,8 +349,20 @@ def main(args): with accelerator.accumulate(model): # Predict the noise residual - noise_pred = model(noisy_images, timesteps).sample - loss = F.mse_loss(noise_pred, noise) + model_output = model(noisy_images, timesteps).sample + + if args.predict_mode == "eps": + loss = F.mse_loss(model_output, noise) # this could have different weights! + elif args.predict_mode == "x0": + alpha_t = _extract_into_tensor( + noise_scheduler.alphas_cumprod, timesteps, (clean_images.shape[0], 1, 1, 1) + ) + snr_weights = alpha_t / (1 - alpha_t) + loss = snr_weights * F.mse_loss( + model_output, clean_images, reduction="none" + ) # use SNR weighting from distillation paper + loss = loss.mean() + accelerator.backward(loss) if accelerator.sync_gradients: @@ -355,7 +397,12 @@ def main(args): generator = torch.manual_seed(0) # run pipeline in inference (sample random noise and denoise) - images = pipeline(generator=generator, batch_size=args.eval_batch_size, output_type="numpy").images + images = pipeline( + generator=generator, + batch_size=args.eval_batch_size, + output_type="numpy", + predict_epsilon=args.predict_mode == "eps", + ).images # denormalize the images and save to tensorboard images_processed = (images * 255).round().astype("uint8") diff --git a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py index 85db691dc6..811614ecbd 100644 --- a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py +++ b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py @@ -45,6 +45,7 @@ class DDPMPipeline(DiffusionPipeline): num_inference_steps: int = 1000, output_type: Optional[str] = "pil", return_dict: bool = True, + predict_epsilon: bool = True, **kwargs, ) -> Union[ImagePipelineOutput, Tuple]: r""" @@ -84,7 +85,9 @@ class DDPMPipeline(DiffusionPipeline): model_output = self.unet(image, t).sample # 2. compute previous image: x_t -> x_t-1 - image = self.scheduler.step(model_output, t, image, generator=generator).prev_sample + image = self.scheduler.step( + model_output, t, image, generator=generator, predict_epsilon=predict_epsilon + ).prev_sample image = (image / 2 + 0.5).clamp(0, 1) image = image.cpu().permute(0, 2, 3, 1).numpy() From c39a511b5ff7d37e594aad22b8ac85d0fef20406 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 2 Nov 2022 19:20:42 +0100 Subject: [PATCH 15/88] [Loading] Ignore unneeded files (#1107) * [Loading] Ignore unneeded files * up --- src/diffusers/pipeline_flax_utils.py | 12 +++++++++++- src/diffusers/pipeline_utils.py | 4 ++++ tests/test_pipelines.py | 16 ++++++++++++++++ tests/test_pipelines_flax.py | 21 ++++++++++++++++++++- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipeline_flax_utils.py b/src/diffusers/pipeline_flax_utils.py index 80555f2785..c281c772db 100644 --- a/src/diffusers/pipeline_flax_utils.py +++ b/src/diffusers/pipeline_flax_utils.py @@ -302,10 +302,19 @@ class FlaxDiffusionPipeline(ConfigMixin): allow_patterns = [os.path.join(k, "*") for k in folder_names] allow_patterns += [FLAX_WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, CONFIG_NAME, cls.config_name] + # make sure we don't download PyTorch weights + ignore_patterns = "*.bin" + if cls != FlaxDiffusionPipeline: requested_pipeline_class = cls.__name__ else: requested_pipeline_class = config_dict.get("_class_name", cls.__name__) + requested_pipeline_class = ( + requested_pipeline_class + if requested_pipeline_class.startswith("Flax") + else "Flax" + requested_pipeline_class + ) + user_agent = {"pipeline_class": requested_pipeline_class} user_agent = http_user_agent(user_agent) @@ -319,6 +328,7 @@ class FlaxDiffusionPipeline(ConfigMixin): use_auth_token=use_auth_token, revision=revision, allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, user_agent=user_agent, ) else: @@ -337,7 +347,7 @@ class FlaxDiffusionPipeline(ConfigMixin): if config_dict["_class_name"].startswith("Flax") else "Flax" + config_dict["_class_name"] ) - pipeline_class = getattr(diffusers_module, config_dict["_class_name"]) + pipeline_class = getattr(diffusers_module, class_name) # some modules can be passed directly to the init # in this case they are already instantiated in `kwargs` diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 5c94df25cc..94c1e135ab 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -395,6 +395,9 @@ class DiffusionPipeline(ConfigMixin): allow_patterns = [os.path.join(k, "*") for k in folder_names] allow_patterns += [WEIGHTS_NAME, SCHEDULER_CONFIG_NAME, CONFIG_NAME, ONNX_WEIGHTS_NAME, cls.config_name] + # make sure we don't download flax weights + ignore_patterns = "*.msgpack" + if custom_pipeline is not None: allow_patterns += [CUSTOM_PIPELINE_FILE_NAME] @@ -417,6 +420,7 @@ class DiffusionPipeline(ConfigMixin): use_auth_token=use_auth_token, revision=revision, allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, user_agent=user_agent, ) else: diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index c11287339a..1654518f1e 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -73,6 +73,22 @@ def test_progress_bar(capsys): assert captured.err == "", "Progress bar should be disabled" +class DownloadTests(unittest.TestCase): + def test_download_only_pytorch(self): + with tempfile.TemporaryDirectory() as tmpdirname: + # pipeline has Flax weights + _ = DiffusionPipeline.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None, cache_dir=tmpdirname + ) + + all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))] + files = [item for sublist in all_root_files for item in sublist] + + # None of the downloaded files should be a flax file even if we have some here: + # https://huggingface.co/hf-internal-testing/tiny-stable-diffusion-pipe/blob/main/unet/diffusion_flax_model.msgpack + assert not any(f.endswith(".msgpack") for f in files) + + class CustomPipelineTests(unittest.TestCase): def test_load_custom_pipeline(self): pipeline = DiffusionPipeline.from_pretrained( diff --git a/tests/test_pipelines_flax.py b/tests/test_pipelines_flax.py index ae52fa689b..ac5e2621a5 100644 --- a/tests/test_pipelines_flax.py +++ b/tests/test_pipelines_flax.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import tempfile import unittest import numpy as np @@ -24,12 +26,29 @@ from diffusers.utils.testing_utils import require_flax, slow if is_flax_available(): import jax import jax.numpy as jnp - from diffusers import FlaxDDIMScheduler, FlaxStableDiffusionPipeline + from diffusers import FlaxDDIMScheduler, FlaxDiffusionPipeline, FlaxStableDiffusionPipeline from flax.jax_utils import replicate from flax.training.common_utils import shard from jax import pmap +@require_flax +class DownloadTests(unittest.TestCase): + def test_download_only_pytorch(self): + with tempfile.TemporaryDirectory() as tmpdirname: + # pipeline has Flax weights + _ = FlaxDiffusionPipeline.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-pipe", safety_checker=None, cache_dir=tmpdirname + ) + + all_root_files = [t[-1] for t in os.walk(os.path.join(tmpdirname, os.listdir(tmpdirname)[0], "snapshots"))] + files = [item for sublist in all_root_files for item in sublist] + + # None of the downloaded files should be a PyTorch file even if we have some here: + # https://huggingface.co/hf-internal-testing/tiny-stable-diffusion-pipe/blob/main/unet/diffusion_pytorch_model.bin + assert not any(f.endswith(".bin") for f in files) + + @slow @require_flax class FlaxPipelineTests(unittest.TestCase): From 0edf9ca082b0b405435767bc9e96b49b15390fd9 Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Thu, 3 Nov 2022 15:24:32 +0100 Subject: [PATCH 16/88] Fix hub-dependent tests for PRs (#1119) * Remove the hub token * replace repos * style --- .github/workflows/pr_tests.yml | 8 -------- tests/test_config.py | 14 ++++++++------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index 242e9552d9..c978efe3b7 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -67,8 +67,6 @@ jobs: - name: Run fast PyTorch CPU tests if: ${{ matrix.config.framework == 'pytorch' }} - env: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \ -s -v -k "not Flax and not Onnx" \ @@ -77,8 +75,6 @@ jobs: - name: Run fast Flax TPU tests if: ${{ matrix.config.framework == 'flax' }} - env: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \ -s -v -k "Flax" \ @@ -87,8 +83,6 @@ jobs: - name: Run fast ONNXRuntime CPU tests if: ${{ matrix.config.framework == 'onnxruntime' }} - env: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \ -s -v -k "Onnx" \ @@ -141,8 +135,6 @@ jobs: - name: Run fast PyTorch tests on M1 (MPS) shell: arch -arch arm64 bash {0} - env: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps tests/ diff --git a/tests/test_config.py b/tests/test_config.py index 1c6178d720..7a9f270af3 100755 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -244,28 +244,30 @@ class ConfigTester(unittest.TestCase): logger = logging.get_logger("diffusers.configuration_utils") with CaptureLogger(logger) as cap_logger: - ddim = DDIMScheduler.from_config("runwayml/stable-diffusion-v1-5", subfolder="scheduler") + ddim = DDIMScheduler.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler") assert ddim.__class__ == DDIMScheduler # no warning should be thrown assert cap_logger.out == "" - def test_load_ddim_from_euler(self): + def test_load_euler_from_pndm(self): logger = logging.get_logger("diffusers.configuration_utils") with CaptureLogger(logger) as cap_logger: - euler = EulerDiscreteScheduler.from_config("runwayml/stable-diffusion-v1-5", subfolder="scheduler") + euler = EulerDiscreteScheduler.from_config( + "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler" + ) assert euler.__class__ == EulerDiscreteScheduler # no warning should be thrown assert cap_logger.out == "" - def test_load_ddim_from_euler_ancestral(self): + def test_load_euler_ancestral_from_pndm(self): logger = logging.get_logger("diffusers.configuration_utils") with CaptureLogger(logger) as cap_logger: euler = EulerAncestralDiscreteScheduler.from_config( - "runwayml/stable-diffusion-v1-5", subfolder="scheduler" + "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler" ) assert euler.__class__ == EulerAncestralDiscreteScheduler @@ -276,7 +278,7 @@ class ConfigTester(unittest.TestCase): logger = logging.get_logger("diffusers.configuration_utils") with CaptureLogger(logger) as cap_logger: - pndm = PNDMScheduler.from_config("runwayml/stable-diffusion-v1-5", subfolder="scheduler") + pndm = PNDMScheduler.from_config("hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler") assert pndm.__class__ == PNDMScheduler # no warning should be thrown From 4a38166afeea498e8d333d40264e778ac5b16d81 Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Thu, 3 Nov 2022 15:41:33 +0100 Subject: [PATCH 17/88] Allow saving `None` pipeline components (#1118) * Allow saving `None` pipeline components * support flax as well * style --- src/diffusers/pipeline_flax_utils.py | 9 +++++++++ src/diffusers/pipeline_utils.py | 9 +++++++++ .../stable_diffusion/test_stable_diffusion.py | 11 +++++++++++ 3 files changed, 29 insertions(+) diff --git a/src/diffusers/pipeline_flax_utils.py b/src/diffusers/pipeline_flax_utils.py index c281c772db..e63009b49c 100644 --- a/src/diffusers/pipeline_flax_utils.py +++ b/src/diffusers/pipeline_flax_utils.py @@ -161,6 +161,10 @@ class FlaxDiffusionPipeline(ConfigMixin): for pipeline_component_name in model_index_dict.keys(): sub_model = getattr(self, pipeline_component_name) + if sub_model is None: + # edge case for saving a pipeline with safety_checker=None + continue + model_cls = sub_model.__class__ save_method_name = None @@ -367,6 +371,11 @@ class FlaxDiffusionPipeline(ConfigMixin): # 3. Load each module in the pipeline for name, (library_name, class_name) in init_dict.items(): + if class_name is None: + # edge case for when the pipeline was saved with safety_checker=None + init_kwargs[name] = None + continue + is_pipeline_module = hasattr(pipelines, library_name) loaded_sub_model = None sub_model_should_be_defined = True diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 94c1e135ab..4ba8d2d930 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -176,6 +176,10 @@ class DiffusionPipeline(ConfigMixin): for pipeline_component_name in model_index_dict.keys(): sub_model = getattr(self, pipeline_component_name) + if sub_model is None: + # edge case for saving a pipeline with safety_checker=None + continue + model_cls = sub_model.__class__ save_method_name = None @@ -477,6 +481,11 @@ class DiffusionPipeline(ConfigMixin): # 3. Load each module in the pipeline for name, (library_name, class_name) in init_dict.items(): + if class_name is None: + # edge case for when the pipeline was saved with safety_checker=None + init_kwargs[name] = None + continue + # 3.1 - now that JAX/Flax is an official framework of the library, we might load from Flax names if class_name.startswith("Flax"): class_name = class_name[4:] diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 260d58e94b..ded2470cc2 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -15,6 +15,7 @@ import gc import random +import tempfile import time import unittest @@ -318,6 +319,16 @@ class StableDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase): image = pipe("example prompt", num_inference_steps=2).images[0] assert image is not None + # check that there's no error when saving a pipeline with one of the models being None + with tempfile.TemporaryDirectory() as tmpdirname: + pipe.save_pretrained(tmpdirname) + pipe = StableDiffusionPipeline.from_pretrained(tmpdirname) + + # sanity check that the pipeline still works + assert pipe.safety_checker is None + image = pipe("example prompt", num_inference_steps=2).images[0] + assert image is not None + def test_stable_diffusion_k_lms(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet From d38c804320192c3844ff0bc7deed83e8b8cb7856 Mon Sep 17 00:00:00 2001 From: Revist Date: Thu, 3 Nov 2022 15:42:46 +0100 Subject: [PATCH 18/88] feat: add repaint (#974) * feat: add repaint * fix: fix quality check with `make fix-copies` * fix: remove old unnecessary arg * chore: change default to DDPM (looks better in experiments) * ".to(device)" changed to "device=" Co-authored-by: Anton Lozhkov * make generator device-specific Co-authored-by: Anton Lozhkov * make generator device-specific and change shape Co-authored-by: Anton Lozhkov * fix: add preprocessing for image and mask Co-authored-by: Anton Lozhkov * fix: update test Co-authored-by: Anton Lozhkov * Update src/diffusers/pipelines/repaint/pipeline_repaint.py Co-authored-by: Patrick von Platen * Add docs and examples * Fix toctree Co-authored-by: fja Co-authored-by: Anton Lozhkov Co-authored-by: Patrick von Platen Co-authored-by: Anton Lozhkov --- docs/source/_toctree.yml | 2 + docs/source/api/pipelines/overview.mdx | 28 +- docs/source/api/pipelines/repaint.mdx | 77 +++++ docs/source/api/schedulers.mdx | 12 +- src/diffusers/__init__.py | 2 + src/diffusers/pipelines/__init__.py | 1 + src/diffusers/pipelines/repaint/__init__.py | 1 + .../pipelines/repaint/pipeline_repaint.py | 140 ++++++++ src/diffusers/schedulers/__init__.py | 1 + .../schedulers/scheduling_repaint.py | 322 ++++++++++++++++++ src/diffusers/utils/dummy_pt_objects.py | 30 ++ tests/pipelines/repaint/__init__.py | 0 tests/pipelines/repaint/test_repaint.py | 65 ++++ 13 files changed, 667 insertions(+), 14 deletions(-) create mode 100644 docs/source/api/pipelines/repaint.mdx create mode 100644 src/diffusers/pipelines/repaint/__init__.py create mode 100644 src/diffusers/pipelines/repaint/pipeline_repaint.py create mode 100644 src/diffusers/schedulers/scheduling_repaint.py create mode 100644 tests/pipelines/repaint/__init__.py create mode 100644 tests/pipelines/repaint/test_repaint.py diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 7e46d95a46..331d4fff78 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -96,5 +96,7 @@ title: "Stochastic Karras VE" - local: api/pipelines/dance_diffusion title: "Dance Diffusion" + - local: api/pipelines/repaint + title: "RePaint" title: "Pipelines" title: "API" diff --git a/docs/source/api/pipelines/overview.mdx b/docs/source/api/pipelines/overview.mdx index 9bb351d93c..a53a2f8b41 100644 --- a/docs/source/api/pipelines/overview.mdx +++ b/docs/source/api/pipelines/overview.mdx @@ -41,19 +41,21 @@ If you are looking for *official* training examples, please have a look at [exam The following table summarizes all officially supported pipelines, their corresponding paper, and if available a colab notebook to directly try them out. -| Pipeline | Paper | Tasks | Colab -|---|---|:---:|:---:| -| [ddpm](./ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | -| [ddim](./ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) -| [latent_diffusion](./latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752)| Text-to-Image Generation | -| [latent_diffusion_uncond](./latent_diffusion_uncond) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Unconditional Image Generation | -| [pndm](./pndm) | [**Pseudo Numerical Methods for Diffusion Models on Manifolds**](https://arxiv.org/abs/2202.09778) | Unconditional Image Generation | -| [score_sde_ve](./score_sde_ve) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | -| [score_sde_vp](./score_sde_vp) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-to-Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) -| [stochastic_karras_ve](./stochastic_karras_ve) | [**Elucidating the Design Space of Diffusion-Based Generative Models**](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | +| Pipeline | Paper | Tasks | Colab +|------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------:|:---:| +| [ddpm](./ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | +| [ddim](./ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) +| [latent_diffusion](./latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Text-to-Image Generation | +| [latent_diffusion_uncond](./latent_diffusion_uncond) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Unconditional Image Generation | +| [pndm](./pndm) | [**Pseudo Numerical Methods for Diffusion Models on Manifolds**](https://arxiv.org/abs/2202.09778) | Unconditional Image Generation | +| [score_sde_ve](./score_sde_ve) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | +| [score_sde_vp](./score_sde_vp) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | +| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-to-Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) +| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) +| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) +| [stochastic_karras_ve](./stochastic_karras_ve) | [**Elucidating the Design Space of Diffusion-Based Generative Models**](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | +| [repaint](./repaint) | [**RePaint: Inpainting using Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2201.09865) | Image Inpainting | + **Note**: Pipelines are simple examples of how to play around with the diffusion systems as described in the corresponding papers. diff --git a/docs/source/api/pipelines/repaint.mdx b/docs/source/api/pipelines/repaint.mdx new file mode 100644 index 0000000000..0b7de8a457 --- /dev/null +++ b/docs/source/api/pipelines/repaint.mdx @@ -0,0 +1,77 @@ + + +# RePaint + +## Overview + +[RePaint: Inpainting using Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2201.09865) (PNDM) by Andreas Lugmayr, Martin Danelljan, Andres Romero, Fisher Yu, Radu Timofte, Luc Van Gool. + +The abstract of the paper is the following: + +Free-form inpainting is the task of adding new content to an image in the regions specified by an arbitrary binary mask. Most existing approaches train for a certain distribution of masks, which limits their generalization capabilities to unseen mask types. Furthermore, training with pixel-wise and perceptual losses often leads to simple textural extensions towards the missing areas instead of semantically meaningful generation. In this work, we propose RePaint: A Denoising Diffusion Probabilistic Model (DDPM) based inpainting approach that is applicable to even extreme masks. We employ a pretrained unconditional DDPM as the generative prior. To condition the generation process, we only alter the reverse diffusion iterations by sampling the unmasked regions using the given image information. Since this technique does not modify or condition the original DDPM network itself, the model produces high-quality and diverse output images for any inpainting form. We validate our method for both faces and general-purpose image inpainting using standard and extreme masks. +RePaint outperforms state-of-the-art Autoregressive, and GAN approaches for at least five out of six mask distributions. + +The original codebase can be found [here](https://github.com/andreas128/RePaint). + +## Available Pipelines: + +| Pipeline | Tasks | Colab +|-------------------------------------------------------------------------------------------------------------------------------|--------------------|:---:| +| [pipeline_repaint.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/repaint/pipeline_repaint.py) | *Image Inpainting* | - | + +## Usage example + +```python +from io import BytesIO + +import torch + +import PIL +import requests +from diffusers import RePaintPipeline, RePaintScheduler + + +def download_image(url): + response = requests.get(url) + return PIL.Image.open(BytesIO(response.content)).convert("RGB") + + +img_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/repaint/celeba_hq_256.png" +mask_url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/repaint/mask_256.png" + +# Load the original image and the mask as PIL images +original_image = download_image(img_url).resize((256, 256)) +mask_image = download_image(mask_url).resize((256, 256)) + +# Load the RePaint scheduler and pipeline based on a pretrained DDPM model +scheduler = RePaintScheduler.from_config("google/ddpm-ema-celebahq-256") +pipe = RePaintPipeline.from_pretrained("google/ddpm-ema-celebahq-256", scheduler=scheduler) +pipe = pipe.to("cuda") + +generator = torch.Generator(device="cuda").manual_seed(0) +output = pipe( + original_image=original_image, + mask_image=mask_image, + num_inference_steps=250, + eta=0.0, + jump_length=10, + jump_n_sample=10, + generator=generator, +) +inpainted_image = output.images[0] +``` + +## RePaintPipeline +[[autodoc]] pipelines.repaint.pipeline_repaint.RePaintPipeline + - __call__ + diff --git a/docs/source/api/schedulers.mdx b/docs/source/api/schedulers.mdx index 6616a3e515..6e7da10e33 100644 --- a/docs/source/api/schedulers.mdx +++ b/docs/source/api/schedulers.mdx @@ -127,4 +127,14 @@ Fast scheduler which often times generates good outputs with 20-30 steps. Ancestral sampling with Euler method steps. Based on the original (k-diffusion)[https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72] implementation by Katherine Crowson. Fast scheduler which often times generates good outputs with 20-30 steps. -[[autodoc]] EulerAncestralDiscreteScheduler \ No newline at end of file +[[autodoc]] EulerAncestralDiscreteScheduler + + +#### RePaint scheduler + +DDPM-based inpainting scheduler for unsupervised inpainting with extreme masks. +Intended for use with [`RePaintPipeline`]. +Based on the paper [RePaint: Inpainting using Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2201.09865) +and the original implementation by Andreas Lugmayr et al.: https://github.com/andreas128/RePaint + +[[autodoc]] RePaintScheduler \ No newline at end of file diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 49c3e82b8e..1a9a7d74eb 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -36,6 +36,7 @@ if is_torch_available(): KarrasVePipeline, LDMPipeline, PNDMPipeline, + RePaintPipeline, ScoreSdeVePipeline, ) from .schedulers import ( @@ -46,6 +47,7 @@ if is_torch_available(): IPNDMScheduler, KarrasVeScheduler, PNDMScheduler, + RePaintScheduler, SchedulerMixin, ScoreSdeVeScheduler, ) diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index b3124af390..8015d4e114 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -7,6 +7,7 @@ if is_torch_available(): from .ddpm import DDPMPipeline from .latent_diffusion_uncond import LDMPipeline from .pndm import PNDMPipeline + from .repaint import RePaintPipeline from .score_sde_ve import ScoreSdeVePipeline from .stochastic_karras_ve import KarrasVePipeline else: diff --git a/src/diffusers/pipelines/repaint/__init__.py b/src/diffusers/pipelines/repaint/__init__.py new file mode 100644 index 0000000000..16bc86d1ce --- /dev/null +++ b/src/diffusers/pipelines/repaint/__init__.py @@ -0,0 +1 @@ +from .pipeline_repaint import RePaintPipeline diff --git a/src/diffusers/pipelines/repaint/pipeline_repaint.py b/src/diffusers/pipelines/repaint/pipeline_repaint.py new file mode 100644 index 0000000000..7af88f6275 --- /dev/null +++ b/src/diffusers/pipelines/repaint/pipeline_repaint.py @@ -0,0 +1,140 @@ +# Copyright 2022 ETH Zurich Computer Vision Lab and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Optional, Tuple, Union + +import numpy as np +import torch + +import PIL +from tqdm.auto import tqdm + +from ...models import UNet2DModel +from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput +from ...schedulers import RePaintScheduler + + +def _preprocess_image(image: PIL.Image.Image): + image = np.array(image.convert("RGB")) + image = image[None].transpose(0, 3, 1, 2) + image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 + return image + + +def _preprocess_mask(mask: PIL.Image.Image): + mask = np.array(mask.convert("L")) + mask = mask.astype(np.float32) / 255.0 + mask = mask[None, None] + mask[mask < 0.5] = 0 + mask[mask >= 0.5] = 1 + mask = torch.from_numpy(mask) + return mask + + +class RePaintPipeline(DiffusionPipeline): + unet: UNet2DModel + scheduler: RePaintScheduler + + def __init__(self, unet, scheduler): + super().__init__() + self.register_modules(unet=unet, scheduler=scheduler) + + @torch.no_grad() + def __call__( + self, + original_image: Union[torch.FloatTensor, PIL.Image.Image], + mask_image: Union[torch.FloatTensor, PIL.Image.Image], + num_inference_steps: int = 250, + eta: float = 0.0, + jump_length: int = 10, + jump_n_sample: int = 10, + generator: Optional[torch.Generator] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + ) -> Union[ImagePipelineOutput, Tuple]: + r""" + Args: + original_image (`torch.FloatTensor` or `PIL.Image.Image`): + The original image to inpaint on. + mask_image (`torch.FloatTensor` or `PIL.Image.Image`): + The mask_image where 0.0 values define which part of the original image to inpaint (change). + num_inference_steps (`int`, *optional*, defaults to 1000): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + eta (`float`): + The weight of noise for added noise in a diffusion step. Its value is between 0.0 and 1.0 - 0.0 is DDIM + and 1.0 is DDPM scheduler respectively. + jump_length (`int`, *optional*, defaults to 10): + The number of steps taken forward in time before going backward in time for a single jump ("j" in + RePaint paper). Take a look at Figure 9 and 10 in https://arxiv.org/pdf/2201.09865.pdf. + jump_n_sample (`int`, *optional*, defaults to 10): + The number of times we will make forward time jump for a given chosen time sample. Take a look at + Figure 9 and 10 in https://arxiv.org/pdf/2201.09865.pdf. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. + + Returns: + [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if + `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the + generated images. + """ + + if not isinstance(original_image, torch.FloatTensor): + original_image = _preprocess_image(original_image) + original_image = original_image.to(self.device) + if not isinstance(mask_image, torch.FloatTensor): + mask_image = _preprocess_mask(mask_image) + mask_image = mask_image.to(self.device) + + # sample gaussian noise to begin the loop + image = torch.randn( + original_image.shape, + generator=generator, + device=self.device, + ) + image = image.to(self.device) + + # set step values + self.scheduler.set_timesteps(num_inference_steps, jump_length, jump_n_sample, self.device) + self.scheduler.eta = eta + + t_last = self.scheduler.timesteps[0] + 1 + for i, t in enumerate(tqdm(self.scheduler.timesteps)): + if t < t_last: + # predict the noise residual + model_output = self.unet(image, t).sample + # compute previous image: x_t -> x_t-1 + image = self.scheduler.step(model_output, t, image, original_image, mask_image, generator).prev_sample + + else: + # compute the reverse: x_t-1 -> x_t + image = self.scheduler.undo_step(image, t_last, generator) + t_last = t + + image = (image / 2 + 0.5).clamp(0, 1) + image = image.cpu().permute(0, 2, 3, 1).numpy() + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image,) + + return ImagePipelineOutput(images=image) diff --git a/src/diffusers/schedulers/__init__.py b/src/diffusers/schedulers/__init__.py index c3999d2cac..a1915ed8d2 100644 --- a/src/diffusers/schedulers/__init__.py +++ b/src/diffusers/schedulers/__init__.py @@ -24,6 +24,7 @@ if is_torch_available(): from .scheduling_ipndm import IPNDMScheduler from .scheduling_karras_ve import KarrasVeScheduler from .scheduling_pndm import PNDMScheduler + from .scheduling_repaint import RePaintScheduler from .scheduling_sde_ve import ScoreSdeVeScheduler from .scheduling_sde_vp import ScoreSdeVpScheduler from .scheduling_utils import SchedulerMixin diff --git a/src/diffusers/schedulers/scheduling_repaint.py b/src/diffusers/schedulers/scheduling_repaint.py new file mode 100644 index 0000000000..1751f41c66 --- /dev/null +++ b/src/diffusers/schedulers/scheduling_repaint.py @@ -0,0 +1,322 @@ +# Copyright 2022 ETH Zurich Computer Vision Lab and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from dataclasses import dataclass +from typing import Optional, Tuple, Union + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from .scheduling_utils import SchedulerMixin + + +@dataclass +class RePaintSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's step function output. + + Args: + prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): + Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): + The predicted denoised sample (x_{0}) based on the model output from + the current timestep. `pred_original_sample` can be used to preview progress or for guidance. + """ + + prev_sample: torch.FloatTensor + pred_original_sample: torch.FloatTensor + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + + Args: + num_diffusion_timesteps (`int`): the number of betas to produce. + max_beta (`float`): the maximum beta to use; use values lower than 1 to + prevent singularities. + + Returns: + betas (`np.ndarray`): the betas used by the scheduler to step the model outputs + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class RePaintScheduler(SchedulerMixin, ConfigMixin): + """ + RePaint is a schedule for DDPM inpainting inside a given mask. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and + [`~ConfigMixin.from_config`] functions. + + For more details, see the original paper: https://arxiv.org/pdf/2201.09865.pdf + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + eta (`float`): + The weight of noise for added noise in a diffusion step. Its value is between 0.0 and 1.0 -0.0 is DDIM and + 1.0 is DDPM scheduler respectively. + trained_betas (`np.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + variance_type (`str`): + options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`, + `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. + clip_sample (`bool`, default `True`): + option to clip predicted sample between -1 and 1 for numerical stability. + + """ + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + eta: float = 0.0, + trained_betas: Optional[np.ndarray] = None, + clip_sample: bool = True, + ): + if trained_betas is not None: + self.betas = torch.from_numpy(trained_betas) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + elif beta_schedule == "sigmoid": + # GeoDiff sigmoid schedule + betas = torch.linspace(-6, 6, num_train_timesteps) + self.betas = torch.sigmoid(betas) * (beta_end - beta_start) + beta_start + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + self.one = torch.tensor(1.0) + + self.final_alpha_cumprod = torch.tensor(1.0) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy()) + + self.eta = eta + + def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.FloatTensor`): input sample + timestep (`int`, optional): current timestep + + Returns: + `torch.FloatTensor`: scaled input sample + """ + return sample + + def set_timesteps( + self, + num_inference_steps: int, + jump_length: int = 10, + jump_n_sample: int = 10, + device: Union[str, torch.device] = None, + ): + num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps) + self.num_inference_steps = num_inference_steps + + timesteps = [] + + jumps = {} + for j in range(0, num_inference_steps - jump_length, jump_length): + jumps[j] = jump_n_sample - 1 + + t = num_inference_steps + while t >= 1: + t = t - 1 + timesteps.append(t) + + if jumps.get(t, 0) > 0: + jumps[t] = jumps[t] - 1 + for _ in range(jump_length): + t = t + 1 + timesteps.append(t) + + timesteps = np.array(timesteps) * (self.config.num_train_timesteps // self.num_inference_steps) + self.timesteps = torch.from_numpy(timesteps).to(device) + + def _get_variance(self, t): + prev_timestep = t - self.config.num_train_timesteps // self.num_inference_steps + + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from + # https://arxiv.org/pdf/2006.11239.pdf) and sample from it to get + # previous sample x_{t-1} ~ N(pred_prev_sample, variance) == add + # variance to pred_sample + # Is equivalent to formula (16) in https://arxiv.org/pdf/2010.02502.pdf + # without eta. + # variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t] + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def step( + self, + model_output: torch.FloatTensor, + timestep: int, + sample: torch.FloatTensor, + original_image: torch.FloatTensor, + mask: torch.FloatTensor, + generator: Optional[torch.Generator] = None, + return_dict: bool = True, + ) -> Union[RePaintSchedulerOutput, Tuple]: + """ + Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion + process from the learned model outputs (most often the predicted noise). + + Args: + model_output (`torch.FloatTensor`): direct output from learned + diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + current instance of sample being created by diffusion process. + original_image (`torch.FloatTensor`): + the original image to inpaint on. + mask (`torch.FloatTensor`): + the mask where 0.0 values define which part of the original image to inpaint (change). + generator (`torch.Generator`, *optional*): random number generator. + return_dict (`bool`): option for returning tuple rather than + DDPMSchedulerOutput class + + Returns: + [`~schedulers.scheduling_utils.RePaintSchedulerOutput`] or `tuple`: + [`~schedulers.scheduling_utils.RePaintSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When + returning a tuple, the first element is the sample tensor. + + """ + t = timestep + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf + pred_original_sample = (sample - beta_prod_t**0.5 * model_output) / alpha_prod_t**0.5 + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = torch.clamp(pred_original_sample, -1, 1) + + # We choose to follow RePaint Algorithm 1 to get x_{t-1}, however we + # substitute formula (7) in the algorithm coming from DDPM paper + # (formula (4) Algorithm 2 - Sampling) with formula (12) from DDIM paper. + # DDIM schedule gives the same results as DDPM with eta = 1.0 + # Noise is being reused in 7. and 8., but no impact on quality has + # been observed. + + # 5. Add noise + noise = torch.randn( + model_output.shape, dtype=model_output.dtype, generator=generator, device=model_output.device + ) + std_dev_t = self.eta * self._get_variance(timestep) ** 0.5 + + variance = 0 + if t > 0 and self.eta > 0: + variance = std_dev_t * noise + + # 6. compute "direction pointing to x_t" of formula (12) + # from https://arxiv.org/pdf/2010.02502.pdf + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** 0.5 * model_output + + # 7. compute x_{t-1} of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + prev_unknown_part = alpha_prod_t_prev**0.5 * pred_original_sample + pred_sample_direction + variance + + # 8. Algorithm 1 Line 5 https://arxiv.org/pdf/2201.09865.pdf + prev_known_part = (alpha_prod_t**0.5) * original_image + ((1 - alpha_prod_t) ** 0.5) * noise + + # 9. Algorithm 1 Line 8 https://arxiv.org/pdf/2201.09865.pdf + pred_prev_sample = mask * prev_known_part + (1.0 - mask) * prev_unknown_part + + if not return_dict: + return ( + pred_prev_sample, + pred_original_sample, + ) + + return RePaintSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample) + + def undo_step(self, sample, timestep, generator=None): + n = self.config.num_train_timesteps // self.num_inference_steps + + for i in range(n): + beta = self.betas[timestep + i] + noise = torch.randn(sample.shape, generator=generator, device=sample.device) + + # 10. Algorithm 1 Line 10 https://arxiv.org/pdf/2201.09865.pdf + sample = (1 - beta) ** 0.5 * sample + beta**0.5 * noise + + return sample + + def add_noise( + self, + original_samples: torch.FloatTensor, + noise: torch.FloatTensor, + timesteps: torch.IntTensor, + ) -> torch.FloatTensor: + raise NotImplementedError("Use `DDPMScheduler.add_noise()` to train for sampling with RePaint.") + + def __len__(self): + return self.config.num_train_timesteps diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index 5dd5832797..63aa20962f 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -227,6 +227,21 @@ class PNDMPipeline(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class RePaintPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class ScoreSdeVePipeline(metaclass=DummyObject): _backends = ["torch"] @@ -347,6 +362,21 @@ class PNDMScheduler(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class RePaintScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class SchedulerMixin(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/pipelines/repaint/__init__.py b/tests/pipelines/repaint/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/pipelines/repaint/test_repaint.py b/tests/pipelines/repaint/test_repaint.py new file mode 100644 index 0000000000..23544dfd24 --- /dev/null +++ b/tests/pipelines/repaint/test_repaint.py @@ -0,0 +1,65 @@ +# coding=utf-8 +# Copyright 2022 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch + +from diffusers import RePaintPipeline, RePaintScheduler, UNet2DModel +from diffusers.utils.testing_utils import load_image, require_torch_gpu, slow, torch_device + + +torch.backends.cuda.matmul.allow_tf32 = False + + +@slow +@require_torch_gpu +class RepaintPipelineIntegrationTests(unittest.TestCase): + def test_celebahq(self): + original_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/" + "repaint/celeba_hq_256.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/repaint/mask_256.png" + ) + expected_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/" + "repaint/celeba_hq_256_result.png" + ) + expected_image = np.array(expected_image, dtype=np.float32) / 255.0 + + model_id = "google/ddpm-ema-celebahq-256" + unet = UNet2DModel.from_pretrained(model_id) + scheduler = RePaintScheduler.from_config(model_id) + + repaint = RePaintPipeline(unet=unet, scheduler=scheduler).to(torch_device) + + generator = torch.Generator(device=torch_device).manual_seed(0) + output = repaint( + original_image, + mask_image, + num_inference_steps=250, + eta=0.0, + jump_length=10, + jump_n_sample=10, + generator=generator, + output_type="np", + ) + image = output.images[0] + + assert image.shape == (256, 256, 3) + assert np.abs(expected_image - image).mean() < 1e-2 From 269109dbfbbdbe2800535239b881e96e1828a0ef Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Thu, 3 Nov 2022 15:49:20 +0100 Subject: [PATCH 19/88] Continuation of #1035 (#1120) * remove batch size from repeat * repeat empty string if uncond_tokens is none * fix inpaint pipes * return back whitespace to pass code quality * Apply suggestions from code review * Fix typos. Co-authored-by: Had --- examples/community/interpolate_stable_diffusion.py | 4 ++-- examples/community/speech_to_image_diffusion.py | 4 ++-- examples/community/wildcard_stable_diffusion.py | 4 ++-- .../pipelines/stable_diffusion/pipeline_stable_diffusion.py | 4 ++-- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 6 ++++-- .../stable_diffusion/pipeline_stable_diffusion_inpaint.py | 4 ++-- .../pipeline_stable_diffusion_inpaint_legacy.py | 6 ++++-- 7 files changed, 18 insertions(+), 14 deletions(-) diff --git a/examples/community/interpolate_stable_diffusion.py b/examples/community/interpolate_stable_diffusion.py index bbb1b0f9e6..de1c6f687a 100644 --- a/examples/community/interpolate_stable_diffusion.py +++ b/examples/community/interpolate_stable_diffusion.py @@ -278,7 +278,7 @@ class StableDiffusionWalkPipeline(DiffusionPipeline): if do_classifier_free_guidance: uncond_tokens: List[str] if negative_prompt is None: - uncond_tokens = [""] + uncond_tokens = [""] * batch_size elif type(prompt) is not type(negative_prompt): raise TypeError( f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" @@ -307,7 +307,7 @@ class StableDiffusionWalkPipeline(DiffusionPipeline): # duplicate unconditional embeddings for each generation per prompt, using mps friendly method seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(batch_size, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) # For classifier free guidance, we need to do two forward passes. diff --git a/examples/community/speech_to_image_diffusion.py b/examples/community/speech_to_image_diffusion.py index 8b24914cb7..1a9d296e81 100644 --- a/examples/community/speech_to_image_diffusion.py +++ b/examples/community/speech_to_image_diffusion.py @@ -148,7 +148,7 @@ class SpeechToImagePipeline(DiffusionPipeline): if do_classifier_free_guidance: uncond_tokens: List[str] if negative_prompt is None: - uncond_tokens = [""] + uncond_tokens = [""] * batch_size elif type(prompt) is not type(negative_prompt): raise TypeError( f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" @@ -177,7 +177,7 @@ class SpeechToImagePipeline(DiffusionPipeline): # duplicate unconditional embeddings for each generation per prompt, using mps friendly method seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(batch_size, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) # For classifier free guidance, we need to do two forward passes. diff --git a/examples/community/wildcard_stable_diffusion.py b/examples/community/wildcard_stable_diffusion.py index 79cb4feb1e..b0f6375d50 100644 --- a/examples/community/wildcard_stable_diffusion.py +++ b/examples/community/wildcard_stable_diffusion.py @@ -295,7 +295,7 @@ class WildcardStableDiffusionPipeline(DiffusionPipeline): if do_classifier_free_guidance: uncond_tokens: List[str] if negative_prompt is None: - uncond_tokens = [""] + uncond_tokens = [""] * batch_size elif type(prompt) is not type(negative_prompt): raise TypeError( f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" @@ -324,7 +324,7 @@ class WildcardStableDiffusionPipeline(DiffusionPipeline): # duplicate unconditional embeddings for each generation per prompt, using mps friendly method seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(batch_size, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) # For classifier free guidance, we need to do two forward passes. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 3c1eb734a4..1ccc87804e 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -297,7 +297,7 @@ class StableDiffusionPipeline(DiffusionPipeline): if do_classifier_free_guidance: uncond_tokens: List[str] if negative_prompt is None: - uncond_tokens = [""] + uncond_tokens = [""] * batch_size elif type(prompt) is not type(negative_prompt): raise TypeError( f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" @@ -326,7 +326,7 @@ class StableDiffusionPipeline(DiffusionPipeline): # duplicate unconditional embeddings for each generation per prompt, using mps friendly method seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(batch_size, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) # For classifier free guidance, we need to do two forward passes. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index e61fb27acc..8284bac850 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -295,7 +295,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): if do_classifier_free_guidance: uncond_tokens: List[str] if negative_prompt is None: - uncond_tokens = [""] + uncond_tokens = [""] * batch_size elif type(prompt) is not type(negative_prompt): raise TypeError( f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" @@ -319,7 +319,9 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] # duplicate unconditional embeddings for each generation per prompt - uncond_embeddings = uncond_embeddings.repeat_interleave(batch_size * num_images_per_prompt, dim=0) + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) # For classifier free guidance, we need to do two forward passes. # Here we concatenate the unconditional and text embeddings into a single batch diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index bbe6ee6083..c200892ef6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -302,7 +302,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): if do_classifier_free_guidance: uncond_tokens: List[str] if negative_prompt is None: - uncond_tokens = [""] + uncond_tokens = [""] * batch_size elif type(prompt) is not type(negative_prompt): raise TypeError( f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" @@ -331,7 +331,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): # duplicate unconditional embeddings for each generation per prompt, using mps friendly method seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(batch_size, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) # For classifier free guidance, we need to do two forward passes. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 6db3624177..5c06b74bfa 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -284,7 +284,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): if do_classifier_free_guidance: uncond_tokens: List[str] if negative_prompt is None: - uncond_tokens = [""] + uncond_tokens = [""] * batch_size elif type(prompt) is not type(negative_prompt): raise TypeError( f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" @@ -312,7 +312,9 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] # duplicate unconditional embeddings for each generation per prompt - uncond_embeddings = uncond_embeddings.repeat_interleave(batch_size * num_images_per_prompt, dim=0) + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) # For classifier free guidance, we need to do two forward passes. # Here we concatenate the unconditional and text embeddings into a single batch From ef2ea33c3bc061fffa8bc4ccd640306ca1a1847d Mon Sep 17 00:00:00 2001 From: Will Berman Date: Thu, 3 Nov 2022 08:10:28 -0700 Subject: [PATCH 20/88] VQ-diffusion (#658) * Changes for VQ-diffusion VQVAE Add specify dimension of embeddings to VQModel: `VQModel` will by default set the dimension of embeddings to the number of latent channels. The VQ-diffusion VQVAE has a smaller embedding dimension, 128, than number of latent channels, 256. Add AttnDownEncoderBlock2D and AttnUpDecoderBlock2D to the up and down unet block helpers. VQ-diffusion's VQVAE uses those two block types. * Changes for VQ-diffusion transformer Modify attention.py so SpatialTransformer can be used for VQ-diffusion's transformer. SpatialTransformer: - Can now operate over discrete inputs (classes of vector embeddings) as well as continuous. - `in_channels` was made optional in the constructor so two locations where it was passed as a positional arg were moved to kwargs - modified forward pass to take optional timestep embeddings ImagePositionalEmbeddings: - added to provide positional embeddings to discrete inputs for latent pixels BasicTransformerBlock: - norm layers were made configurable so that the VQ-diffusion could use AdaLayerNorm with timestep embeddings - modified forward pass to take optional timestep embeddings CrossAttention: - now may optionally take a bias parameter for its query, key, and value linear layers FeedForward: - Internal layers are now configurable ApproximateGELU: - Activation function in VQ-diffusion's feedforward layer AdaLayerNorm: - Norm layer modified to incorporate timestep embeddings * Add VQ-diffusion scheduler * Add VQ-diffusion pipeline * Add VQ-diffusion convert script to diffusers * Add VQ-diffusion dummy objects * Add VQ-diffusion markdown docs * Add VQ-diffusion tests * some renaming * some fixes * more renaming * correct * fix typo * correct weights * finalize * fix tests * Apply suggestions from code review Co-authored-by: Anton Lozhkov * Apply suggestions from code review Co-authored-by: Pedro Cuenca * finish * finish * up Co-authored-by: Patrick von Platen Co-authored-by: Anton Lozhkov Co-authored-by: Pedro Cuenca --- docs/source/_toctree.yml | 2 + docs/source/api/models.mdx | 6 + docs/source/api/pipelines/overview.mdx | 28 +- docs/source/api/pipelines/vq_diffusion.mdx | 34 + docs/source/api/schedulers.mdx | 9 +- docs/source/index.mdx | 2 + scripts/convert_vq_diffusion_to_diffusers.py | 885 ++++++++++++++++++ src/diffusers/__init__.py | 4 +- src/diffusers/models/__init__.py | 1 + src/diffusers/models/attention.py | 444 ++++++--- src/diffusers/models/attention_flax.py | 2 +- src/diffusers/models/embeddings.py | 65 ++ src/diffusers/models/unet_2d_blocks.py | 94 +- src/diffusers/models/unet_2d_blocks_flax.py | 8 +- src/diffusers/models/vae.py | 22 +- src/diffusers/pipelines/__init__.py | 1 + .../pipelines/vq_diffusion/__init__.py | 1 + .../vq_diffusion/pipeline_vq_diffusion.py | 253 +++++ src/diffusers/schedulers/__init__.py | 1 + .../schedulers/scheduling_vq_diffusion.py | 494 ++++++++++ src/diffusers/utils/dummy_pt_objects.py | 45 + tests/pipelines/vq_diffusion/__init__.py | 0 .../vq_diffusion/test_vq_diffusion.py | 175 ++++ tests/test_layers_utils.py | 183 +++- tests/test_scheduler.py | 138 ++- 25 files changed, 2674 insertions(+), 223 deletions(-) create mode 100644 docs/source/api/pipelines/vq_diffusion.mdx create mode 100644 scripts/convert_vq_diffusion_to_diffusers.py create mode 100644 src/diffusers/pipelines/vq_diffusion/__init__.py create mode 100644 src/diffusers/pipelines/vq_diffusion/pipeline_vq_diffusion.py create mode 100644 src/diffusers/schedulers/scheduling_vq_diffusion.py create mode 100644 tests/pipelines/vq_diffusion/__init__.py create mode 100644 tests/pipelines/vq_diffusion/test_vq_diffusion.py diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 331d4fff78..70d64b80de 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -96,6 +96,8 @@ title: "Stochastic Karras VE" - local: api/pipelines/dance_diffusion title: "Dance Diffusion" + - local: api/pipelines/vq_diffusion + title: "VQ Diffusion" - local: api/pipelines/repaint title: "RePaint" title: "Pipelines" diff --git a/docs/source/api/models.mdx b/docs/source/api/models.mdx index c3f5e65edf..2e1e8798a7 100644 --- a/docs/source/api/models.mdx +++ b/docs/source/api/models.mdx @@ -49,6 +49,12 @@ The models are built on the base class ['ModelMixin'] that is a `torch.nn.module ## AutoencoderKL [[autodoc]] AutoencoderKL +## Transformer2DModel +[[autodoc]] Transformer2DModel + +## Transformer2DModelOutput +[[autodoc]] models.attention.Transformer2DModelOutput + ## FlaxModelMixin [[autodoc]] FlaxModelMixin diff --git a/docs/source/api/pipelines/overview.mdx b/docs/source/api/pipelines/overview.mdx index a53a2f8b41..5a15473cf1 100644 --- a/docs/source/api/pipelines/overview.mdx +++ b/docs/source/api/pipelines/overview.mdx @@ -41,22 +41,22 @@ If you are looking for *official* training examples, please have a look at [exam The following table summarizes all officially supported pipelines, their corresponding paper, and if available a colab notebook to directly try them out. -| Pipeline | Paper | Tasks | Colab -|------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------:|:---:| -| [ddpm](./ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | -| [ddim](./ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) -| [latent_diffusion](./latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Text-to-Image Generation | -| [latent_diffusion_uncond](./latent_diffusion_uncond) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Unconditional Image Generation | -| [pndm](./pndm) | [**Pseudo Numerical Methods for Diffusion Models on Manifolds**](https://arxiv.org/abs/2202.09778) | Unconditional Image Generation | -| [score_sde_ve](./score_sde_ve) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | -| [score_sde_vp](./score_sde_vp) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-to-Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) -| [stochastic_karras_ve](./stochastic_karras_ve) | [**Elucidating the Design Space of Diffusion-Based Generative Models**](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | +| Pipeline | Paper | Tasks | Colab +|---|---|:---:|:---:| +| [ddpm](./ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | +| [ddim](./ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) +| [latent_diffusion](./latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752)| Text-to-Image Generation | +| [latent_diffusion_uncond](./latent_diffusion_uncond) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Unconditional Image Generation | +| [pndm](./pndm) | [**Pseudo Numerical Methods for Diffusion Models on Manifolds**](https://arxiv.org/abs/2202.09778) | Unconditional Image Generation | +| [score_sde_ve](./score_sde_ve) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | +| [score_sde_vp](./score_sde_vp) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | +| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-to-Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) +| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) +| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) +| [stochastic_karras_ve](./stochastic_karras_ve) | [**Elucidating the Design Space of Diffusion-Based Generative Models**](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | +| [vq_diffusion](./vq_diffusion) | [**Vector Quantized Diffusion Model for Text-to-Image Synthesis**](https://arxiv.org/abs/2111.14822) | Text-to-Image Generation | | [repaint](./repaint) | [**RePaint: Inpainting using Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2201.09865) | Image Inpainting | - **Note**: Pipelines are simple examples of how to play around with the diffusion systems as described in the corresponding papers. However, most of them can be adapted to use different scheduler components or even different model components. Some pipeline examples are shown in the [Examples](#examples) below. diff --git a/docs/source/api/pipelines/vq_diffusion.mdx b/docs/source/api/pipelines/vq_diffusion.mdx new file mode 100644 index 0000000000..92cc903eee --- /dev/null +++ b/docs/source/api/pipelines/vq_diffusion.mdx @@ -0,0 +1,34 @@ + + +# VQDiffusion + +## Overview + +[Vector Quantized Diffusion Model for Text-to-Image Synthesis](https://arxiv.org/abs/2111.14822) by Shuyang Gu, Dong Chen, Jianmin Bao, Fang Wen, Bo Zhang, Dongdong Chen, Lu Yuan, Baining Guo + +The abstract of the paper is the following: + +We present the vector quantized diffusion (VQ-Diffusion) model for text-to-image generation. This method is based on a vector quantized variational autoencoder (VQ-VAE) whose latent space is modeled by a conditional variant of the recently developed Denoising Diffusion Probabilistic Model (DDPM). We find that this latent-space method is well-suited for text-to-image generation tasks because it not only eliminates the unidirectional bias with existing methods but also allows us to incorporate a mask-and-replace diffusion strategy to avoid the accumulation of errors, which is a serious problem with existing methods. Our experiments show that the VQ-Diffusion produces significantly better text-to-image generation results when compared with conventional autoregressive (AR) models with similar numbers of parameters. Compared with previous GAN-based text-to-image methods, our VQ-Diffusion can handle more complex scenes and improve the synthesized image quality by a large margin. Finally, we show that the image generation computation in our method can be made highly efficient by reparameterization. With traditional AR methods, the text-to-image generation time increases linearly with the output image resolution and hence is quite time consuming even for normal size images. The VQ-Diffusion allows us to achieve a better trade-off between quality and speed. Our experiments indicate that the VQ-Diffusion model with the reparameterization is fifteen times faster than traditional AR methods while achieving a better image quality. + +The original codebase can be found [here](https://github.com/microsoft/VQ-Diffusion). + +## Available Pipelines: + +| Pipeline | Tasks | Colab +|---|---|:---:| +| [pipeline_vq_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/vq_diffusion/pipeline_vq_diffusion.py) | *Text-to-Image Generation* | - | + + +## VQDiffusionPipeline +[[autodoc]] pipelines.vq_diffusion.pipeline_vq_diffusion.VQDiffusionPipeline + - __call__ diff --git a/docs/source/api/schedulers.mdx b/docs/source/api/schedulers.mdx index 6e7da10e33..f073f6b379 100644 --- a/docs/source/api/schedulers.mdx +++ b/docs/source/api/schedulers.mdx @@ -113,7 +113,6 @@ Score SDE-VP is under construction. [[autodoc]] schedulers.scheduling_sde_vp.ScoreSdeVpScheduler - #### Euler scheduler Euler scheduler (Algorithm 2) from the paper [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Karras et al. (2022). Based on the original [k-diffusion](https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51) implementation by Katherine Crowson. @@ -130,6 +129,12 @@ Fast scheduler which often times generates good outputs with 20-30 steps. [[autodoc]] EulerAncestralDiscreteScheduler +#### VQDiffusionScheduler + +Original paper can be found [here](https://arxiv.org/abs/2111.14822) + +[[autodoc]] VQDiffusionScheduler + #### RePaint scheduler DDPM-based inpainting scheduler for unsupervised inpainting with extreme masks. @@ -137,4 +142,4 @@ Intended for use with [`RePaintPipeline`]. Based on the paper [RePaint: Inpainting using Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2201.09865) and the original implementation by Andreas Lugmayr et al.: https://github.com/andreas128/RePaint -[[autodoc]] RePaintScheduler \ No newline at end of file +[[autodoc]] RePaintScheduler diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 392b223999..62a3e88f17 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -34,6 +34,7 @@ available a colab notebook to directly try them out. | Pipeline | Paper | Tasks | Colab |---|---|:---:|:---:| +| [dance_diffusion](./api/pipelines/dance_diffusion) | [**Dance Diffusion**](https://github.com/williamberman/diffusers.git) | Unconditional Audio Generation | | [ddpm](./api/pipelines/ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | | [ddim](./api/pipelines/ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | | [latent_diffusion](./api/pipelines/latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752)| Text-to-Image Generation | @@ -45,5 +46,6 @@ available a colab notebook to directly try them out. | [stable_diffusion](./api/pipelines/stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) | [stable_diffusion](./api/pipelines/stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) | [stochastic_karras_ve](./api/pipelines/stochastic_karras_ve) | [**Elucidating the Design Space of Diffusion-Based Generative Models**](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | +| [vq_diffusion](./api/pipelines/vq_diffusion) | [Vector Quantized Diffusion Model for Text-to-Image Synthesis](https://arxiv.org/abs/2111.14822) | Text-to-Image Generation | **Note**: Pipelines are simple examples of how to play around with the diffusion systems as described in the corresponding papers. diff --git a/scripts/convert_vq_diffusion_to_diffusers.py b/scripts/convert_vq_diffusion_to_diffusers.py new file mode 100644 index 0000000000..ae105e3036 --- /dev/null +++ b/scripts/convert_vq_diffusion_to_diffusers.py @@ -0,0 +1,885 @@ +""" +This script ports models from VQ-diffusion (https://github.com/microsoft/VQ-Diffusion) to diffusers. + +It currently only supports porting the ITHQ dataset. + +ITHQ dataset: +```sh +# From the root directory of diffusers. + +# Download the VQVAE checkpoint +$ wget https://facevcstandard.blob.core.windows.net/v-zhictang/Improved-VQ-Diffusion_model_release/ithq_vqvae.pth?sv=2020-10-02&st=2022-05-30T15%3A17%3A18Z&se=2030-05-31T15%3A17%3A00Z&sr=b&sp=r&sig=1jVavHFPpUjDs%2FTO1V3PTezaNbPp2Nx8MxiWI7y6fEY%3D -O ithq_vqvae.pth + +# Download the VQVAE config +# NOTE that in VQ-diffusion the documented file is `configs/ithq.yaml` but the target class +# `image_synthesis.modeling.codecs.image_codec.ema_vqvae.PatchVQVAE` +# loads `OUTPUT/pretrained_model/taming_dvae/config.yaml` +$ wget https://raw.githubusercontent.com/microsoft/VQ-Diffusion/main/OUTPUT/pretrained_model/taming_dvae/config.yaml -O ithq_vqvae.yaml + +# Download the main model checkpoint +$ wget https://facevcstandard.blob.core.windows.net/v-zhictang/Improved-VQ-Diffusion_model_release/ithq_learnable.pth?sv=2020-10-02&st=2022-05-30T10%3A22%3A06Z&se=2030-05-31T10%3A22%3A00Z&sr=b&sp=r&sig=GOE%2Bza02%2FPnGxYVOOPtwrTR4RA3%2F5NVgMxdW4kjaEZ8%3D -O ithq_learnable.pth + +# Download the main model config +$ wget https://raw.githubusercontent.com/microsoft/VQ-Diffusion/main/configs/ithq.yaml -O ithq.yaml + +# run the convert script +$ python ./scripts/convert_vq_diffusion_to_diffusers.py \ + --checkpoint_path ./ithq_learnable.pth \ + --original_config_file ./ithq.yaml \ + --vqvae_checkpoint_path ./ithq_vqvae.pth \ + --vqvae_original_config_file ./ithq_vqvae.yaml \ + --dump_path +``` +""" + +import argparse +import tempfile + +import torch + +import yaml +from accelerate import init_empty_weights, load_checkpoint_and_dispatch +from diffusers import VQDiffusionPipeline, VQDiffusionScheduler, VQModel +from diffusers.models.attention import Transformer2DModel +from transformers import CLIPTextModel, CLIPTokenizer +from yaml.loader import FullLoader + + +try: + from omegaconf import OmegaConf +except ImportError: + raise ImportError( + "OmegaConf is required to convert the VQ Diffusion checkpoints. Please install it with `pip install" + " OmegaConf`." + ) + +# vqvae model + +PORTED_VQVAES = ["image_synthesis.modeling.codecs.image_codec.patch_vqgan.PatchVQGAN"] + + +def vqvae_model_from_original_config(original_config): + assert original_config.target in PORTED_VQVAES, f"{original_config.target} has not yet been ported to diffusers." + + original_config = original_config.params + + original_encoder_config = original_config.encoder_config.params + original_decoder_config = original_config.decoder_config.params + + in_channels = original_encoder_config.in_channels + out_channels = original_decoder_config.out_ch + + down_block_types = get_down_block_types(original_encoder_config) + up_block_types = get_up_block_types(original_decoder_config) + + assert original_encoder_config.ch == original_decoder_config.ch + assert original_encoder_config.ch_mult == original_decoder_config.ch_mult + block_out_channels = tuple( + [original_encoder_config.ch * a_ch_mult for a_ch_mult in original_encoder_config.ch_mult] + ) + + assert original_encoder_config.num_res_blocks == original_decoder_config.num_res_blocks + layers_per_block = original_encoder_config.num_res_blocks + + assert original_encoder_config.z_channels == original_decoder_config.z_channels + latent_channels = original_encoder_config.z_channels + + num_vq_embeddings = original_config.n_embed + + # Hard coded value for ResnetBlock.GoupNorm(num_groups) in VQ-diffusion + norm_num_groups = 32 + + e_dim = original_config.embed_dim + + model = VQModel( + in_channels=in_channels, + out_channels=out_channels, + down_block_types=down_block_types, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + latent_channels=latent_channels, + num_vq_embeddings=num_vq_embeddings, + norm_num_groups=norm_num_groups, + vq_embed_dim=e_dim, + ) + + return model + + +def get_down_block_types(original_encoder_config): + attn_resolutions = coerce_attn_resolutions(original_encoder_config.attn_resolutions) + num_resolutions = len(original_encoder_config.ch_mult) + resolution = coerce_resolution(original_encoder_config.resolution) + + curr_res = resolution + down_block_types = [] + + for _ in range(num_resolutions): + if curr_res in attn_resolutions: + down_block_type = "AttnDownEncoderBlock2D" + else: + down_block_type = "DownEncoderBlock2D" + + down_block_types.append(down_block_type) + + curr_res = [r // 2 for r in curr_res] + + return down_block_types + + +def get_up_block_types(original_decoder_config): + attn_resolutions = coerce_attn_resolutions(original_decoder_config.attn_resolutions) + num_resolutions = len(original_decoder_config.ch_mult) + resolution = coerce_resolution(original_decoder_config.resolution) + + curr_res = [r // 2 ** (num_resolutions - 1) for r in resolution] + up_block_types = [] + + for _ in reversed(range(num_resolutions)): + if curr_res in attn_resolutions: + up_block_type = "AttnUpDecoderBlock2D" + else: + up_block_type = "UpDecoderBlock2D" + + up_block_types.append(up_block_type) + + curr_res = [r * 2 for r in curr_res] + + return up_block_types + + +def coerce_attn_resolutions(attn_resolutions): + attn_resolutions = OmegaConf.to_object(attn_resolutions) + attn_resolutions_ = [] + for ar in attn_resolutions: + if isinstance(ar, (list, tuple)): + attn_resolutions_.append(list(ar)) + else: + attn_resolutions_.append([ar, ar]) + return attn_resolutions_ + + +def coerce_resolution(resolution): + resolution = OmegaConf.to_object(resolution) + if isinstance(resolution, int): + resolution = [resolution, resolution] # H, W + elif isinstance(resolution, (tuple, list)): + resolution = list(resolution) + else: + raise ValueError("Unknown type of resolution:", resolution) + return resolution + + +# done vqvae model + +# vqvae checkpoint + + +def vqvae_original_checkpoint_to_diffusers_checkpoint(model, checkpoint): + diffusers_checkpoint = {} + + diffusers_checkpoint.update(vqvae_encoder_to_diffusers_checkpoint(model, checkpoint)) + + # quant_conv + + diffusers_checkpoint.update( + { + "quant_conv.weight": checkpoint["quant_conv.weight"], + "quant_conv.bias": checkpoint["quant_conv.bias"], + } + ) + + # quantize + diffusers_checkpoint.update({"quantize.embedding.weight": checkpoint["quantize.embedding"]}) + + # post_quant_conv + diffusers_checkpoint.update( + { + "post_quant_conv.weight": checkpoint["post_quant_conv.weight"], + "post_quant_conv.bias": checkpoint["post_quant_conv.bias"], + } + ) + + # decoder + diffusers_checkpoint.update(vqvae_decoder_to_diffusers_checkpoint(model, checkpoint)) + + return diffusers_checkpoint + + +def vqvae_encoder_to_diffusers_checkpoint(model, checkpoint): + diffusers_checkpoint = {} + + # conv_in + diffusers_checkpoint.update( + { + "encoder.conv_in.weight": checkpoint["encoder.conv_in.weight"], + "encoder.conv_in.bias": checkpoint["encoder.conv_in.bias"], + } + ) + + # down_blocks + for down_block_idx, down_block in enumerate(model.encoder.down_blocks): + diffusers_down_block_prefix = f"encoder.down_blocks.{down_block_idx}" + down_block_prefix = f"encoder.down.{down_block_idx}" + + # resnets + for resnet_idx, resnet in enumerate(down_block.resnets): + diffusers_resnet_prefix = f"{diffusers_down_block_prefix}.resnets.{resnet_idx}" + resnet_prefix = f"{down_block_prefix}.block.{resnet_idx}" + + diffusers_checkpoint.update( + vqvae_resnet_to_diffusers_checkpoint( + resnet, checkpoint, diffusers_resnet_prefix=diffusers_resnet_prefix, resnet_prefix=resnet_prefix + ) + ) + + # downsample + + # do not include the downsample when on the last down block + # There is no downsample on the last down block + if down_block_idx != len(model.encoder.down_blocks) - 1: + # There's a single downsample in the original checkpoint but a list of downsamples + # in the diffusers model. + diffusers_downsample_prefix = f"{diffusers_down_block_prefix}.downsamplers.0.conv" + downsample_prefix = f"{down_block_prefix}.downsample.conv" + diffusers_checkpoint.update( + { + f"{diffusers_downsample_prefix}.weight": checkpoint[f"{downsample_prefix}.weight"], + f"{diffusers_downsample_prefix}.bias": checkpoint[f"{downsample_prefix}.bias"], + } + ) + + # attentions + + if hasattr(down_block, "attentions"): + for attention_idx, _ in enumerate(down_block.attentions): + diffusers_attention_prefix = f"{diffusers_down_block_prefix}.attentions.{attention_idx}" + attention_prefix = f"{down_block_prefix}.attn.{attention_idx}" + diffusers_checkpoint.update( + vqvae_attention_to_diffusers_checkpoint( + checkpoint, + diffusers_attention_prefix=diffusers_attention_prefix, + attention_prefix=attention_prefix, + ) + ) + + # mid block + + # mid block attentions + + # There is a single hardcoded attention block in the middle of the VQ-diffusion encoder + diffusers_attention_prefix = "encoder.mid_block.attentions.0" + attention_prefix = "encoder.mid.attn_1" + diffusers_checkpoint.update( + vqvae_attention_to_diffusers_checkpoint( + checkpoint, diffusers_attention_prefix=diffusers_attention_prefix, attention_prefix=attention_prefix + ) + ) + + # mid block resnets + + for diffusers_resnet_idx, resnet in enumerate(model.encoder.mid_block.resnets): + diffusers_resnet_prefix = f"encoder.mid_block.resnets.{diffusers_resnet_idx}" + + # the hardcoded prefixes to `block_` are 1 and 2 + orig_resnet_idx = diffusers_resnet_idx + 1 + # There are two hardcoded resnets in the middle of the VQ-diffusion encoder + resnet_prefix = f"encoder.mid.block_{orig_resnet_idx}" + + diffusers_checkpoint.update( + vqvae_resnet_to_diffusers_checkpoint( + resnet, checkpoint, diffusers_resnet_prefix=diffusers_resnet_prefix, resnet_prefix=resnet_prefix + ) + ) + + diffusers_checkpoint.update( + { + # conv_norm_out + "encoder.conv_norm_out.weight": checkpoint["encoder.norm_out.weight"], + "encoder.conv_norm_out.bias": checkpoint["encoder.norm_out.bias"], + # conv_out + "encoder.conv_out.weight": checkpoint["encoder.conv_out.weight"], + "encoder.conv_out.bias": checkpoint["encoder.conv_out.bias"], + } + ) + + return diffusers_checkpoint + + +def vqvae_decoder_to_diffusers_checkpoint(model, checkpoint): + diffusers_checkpoint = {} + + # conv in + diffusers_checkpoint.update( + { + "decoder.conv_in.weight": checkpoint["decoder.conv_in.weight"], + "decoder.conv_in.bias": checkpoint["decoder.conv_in.bias"], + } + ) + + # up_blocks + + for diffusers_up_block_idx, up_block in enumerate(model.decoder.up_blocks): + # up_blocks are stored in reverse order in the VQ-diffusion checkpoint + orig_up_block_idx = len(model.decoder.up_blocks) - 1 - diffusers_up_block_idx + + diffusers_up_block_prefix = f"decoder.up_blocks.{diffusers_up_block_idx}" + up_block_prefix = f"decoder.up.{orig_up_block_idx}" + + # resnets + for resnet_idx, resnet in enumerate(up_block.resnets): + diffusers_resnet_prefix = f"{diffusers_up_block_prefix}.resnets.{resnet_idx}" + resnet_prefix = f"{up_block_prefix}.block.{resnet_idx}" + + diffusers_checkpoint.update( + vqvae_resnet_to_diffusers_checkpoint( + resnet, checkpoint, diffusers_resnet_prefix=diffusers_resnet_prefix, resnet_prefix=resnet_prefix + ) + ) + + # upsample + + # there is no up sample on the last up block + if diffusers_up_block_idx != len(model.decoder.up_blocks) - 1: + # There's a single upsample in the VQ-diffusion checkpoint but a list of downsamples + # in the diffusers model. + diffusers_downsample_prefix = f"{diffusers_up_block_prefix}.upsamplers.0.conv" + downsample_prefix = f"{up_block_prefix}.upsample.conv" + diffusers_checkpoint.update( + { + f"{diffusers_downsample_prefix}.weight": checkpoint[f"{downsample_prefix}.weight"], + f"{diffusers_downsample_prefix}.bias": checkpoint[f"{downsample_prefix}.bias"], + } + ) + + # attentions + + if hasattr(up_block, "attentions"): + for attention_idx, _ in enumerate(up_block.attentions): + diffusers_attention_prefix = f"{diffusers_up_block_prefix}.attentions.{attention_idx}" + attention_prefix = f"{up_block_prefix}.attn.{attention_idx}" + diffusers_checkpoint.update( + vqvae_attention_to_diffusers_checkpoint( + checkpoint, + diffusers_attention_prefix=diffusers_attention_prefix, + attention_prefix=attention_prefix, + ) + ) + + # mid block + + # mid block attentions + + # There is a single hardcoded attention block in the middle of the VQ-diffusion decoder + diffusers_attention_prefix = "decoder.mid_block.attentions.0" + attention_prefix = "decoder.mid.attn_1" + diffusers_checkpoint.update( + vqvae_attention_to_diffusers_checkpoint( + checkpoint, diffusers_attention_prefix=diffusers_attention_prefix, attention_prefix=attention_prefix + ) + ) + + # mid block resnets + + for diffusers_resnet_idx, resnet in enumerate(model.encoder.mid_block.resnets): + diffusers_resnet_prefix = f"decoder.mid_block.resnets.{diffusers_resnet_idx}" + + # the hardcoded prefixes to `block_` are 1 and 2 + orig_resnet_idx = diffusers_resnet_idx + 1 + # There are two hardcoded resnets in the middle of the VQ-diffusion decoder + resnet_prefix = f"decoder.mid.block_{orig_resnet_idx}" + + diffusers_checkpoint.update( + vqvae_resnet_to_diffusers_checkpoint( + resnet, checkpoint, diffusers_resnet_prefix=diffusers_resnet_prefix, resnet_prefix=resnet_prefix + ) + ) + + diffusers_checkpoint.update( + { + # conv_norm_out + "decoder.conv_norm_out.weight": checkpoint["decoder.norm_out.weight"], + "decoder.conv_norm_out.bias": checkpoint["decoder.norm_out.bias"], + # conv_out + "decoder.conv_out.weight": checkpoint["decoder.conv_out.weight"], + "decoder.conv_out.bias": checkpoint["decoder.conv_out.bias"], + } + ) + + return diffusers_checkpoint + + +def vqvae_resnet_to_diffusers_checkpoint(resnet, checkpoint, *, diffusers_resnet_prefix, resnet_prefix): + rv = { + # norm1 + f"{diffusers_resnet_prefix}.norm1.weight": checkpoint[f"{resnet_prefix}.norm1.weight"], + f"{diffusers_resnet_prefix}.norm1.bias": checkpoint[f"{resnet_prefix}.norm1.bias"], + # conv1 + f"{diffusers_resnet_prefix}.conv1.weight": checkpoint[f"{resnet_prefix}.conv1.weight"], + f"{diffusers_resnet_prefix}.conv1.bias": checkpoint[f"{resnet_prefix}.conv1.bias"], + # norm2 + f"{diffusers_resnet_prefix}.norm2.weight": checkpoint[f"{resnet_prefix}.norm2.weight"], + f"{diffusers_resnet_prefix}.norm2.bias": checkpoint[f"{resnet_prefix}.norm2.bias"], + # conv2 + f"{diffusers_resnet_prefix}.conv2.weight": checkpoint[f"{resnet_prefix}.conv2.weight"], + f"{diffusers_resnet_prefix}.conv2.bias": checkpoint[f"{resnet_prefix}.conv2.bias"], + } + + if resnet.conv_shortcut is not None: + rv.update( + { + f"{diffusers_resnet_prefix}.conv_shortcut.weight": checkpoint[f"{resnet_prefix}.nin_shortcut.weight"], + f"{diffusers_resnet_prefix}.conv_shortcut.bias": checkpoint[f"{resnet_prefix}.nin_shortcut.bias"], + } + ) + + return rv + + +def vqvae_attention_to_diffusers_checkpoint(checkpoint, *, diffusers_attention_prefix, attention_prefix): + return { + # group_norm + f"{diffusers_attention_prefix}.group_norm.weight": checkpoint[f"{attention_prefix}.norm.weight"], + f"{diffusers_attention_prefix}.group_norm.bias": checkpoint[f"{attention_prefix}.norm.bias"], + # query + f"{diffusers_attention_prefix}.query.weight": checkpoint[f"{attention_prefix}.q.weight"][:, :, 0, 0], + f"{diffusers_attention_prefix}.query.bias": checkpoint[f"{attention_prefix}.q.bias"], + # key + f"{diffusers_attention_prefix}.key.weight": checkpoint[f"{attention_prefix}.k.weight"][:, :, 0, 0], + f"{diffusers_attention_prefix}.key.bias": checkpoint[f"{attention_prefix}.k.bias"], + # value + f"{diffusers_attention_prefix}.value.weight": checkpoint[f"{attention_prefix}.v.weight"][:, :, 0, 0], + f"{diffusers_attention_prefix}.value.bias": checkpoint[f"{attention_prefix}.v.bias"], + # proj_attn + f"{diffusers_attention_prefix}.proj_attn.weight": checkpoint[f"{attention_prefix}.proj_out.weight"][ + :, :, 0, 0 + ], + f"{diffusers_attention_prefix}.proj_attn.bias": checkpoint[f"{attention_prefix}.proj_out.bias"], + } + + +# done vqvae checkpoint + +# transformer model + +PORTED_DIFFUSIONS = ["image_synthesis.modeling.transformers.diffusion_transformer.DiffusionTransformer"] +PORTED_TRANSFORMERS = ["image_synthesis.modeling.transformers.transformer_utils.Text2ImageTransformer"] +PORTED_CONTENT_EMBEDDINGS = ["image_synthesis.modeling.embeddings.dalle_mask_image_embedding.DalleMaskImageEmbedding"] + + +def transformer_model_from_original_config( + original_diffusion_config, original_transformer_config, original_content_embedding_config +): + assert ( + original_diffusion_config.target in PORTED_DIFFUSIONS + ), f"{original_diffusion_config.target} has not yet been ported to diffusers." + assert ( + original_transformer_config.target in PORTED_TRANSFORMERS + ), f"{original_transformer_config.target} has not yet been ported to diffusers." + assert ( + original_content_embedding_config.target in PORTED_CONTENT_EMBEDDINGS + ), f"{original_content_embedding_config.target} has not yet been ported to diffusers." + + original_diffusion_config = original_diffusion_config.params + original_transformer_config = original_transformer_config.params + original_content_embedding_config = original_content_embedding_config.params + + inner_dim = original_transformer_config["n_embd"] + + n_heads = original_transformer_config["n_head"] + + # VQ-Diffusion gives dimension of the multi-headed attention layers as the + # number of attention heads times the sequence length (the dimension) of a + # single head. We want to specify our attention blocks with those values + # specified separately + assert inner_dim % n_heads == 0 + d_head = inner_dim // n_heads + + depth = original_transformer_config["n_layer"] + context_dim = original_transformer_config["condition_dim"] + + num_embed = original_content_embedding_config["num_embed"] + # the number of embeddings in the transformer includes the mask embedding. + # the content embedding (the vqvae) does not include the mask embedding. + num_embed = num_embed + 1 + + height = original_transformer_config["content_spatial_size"][0] + width = original_transformer_config["content_spatial_size"][1] + + assert width == height, "width has to be equal to height" + dropout = original_transformer_config["resid_pdrop"] + num_embeds_ada_norm = original_diffusion_config["diffusion_step"] + + model_kwargs = { + "attention_bias": True, + "cross_attention_dim": context_dim, + "attention_head_dim": d_head, + "num_layers": depth, + "dropout": dropout, + "num_attention_heads": n_heads, + "num_vector_embeds": num_embed, + "num_embeds_ada_norm": num_embeds_ada_norm, + "norm_num_groups": 32, + "sample_size": width, + "activation_fn": "geglu-approximate", + } + + model = Transformer2DModel(**model_kwargs) + return model + + +# done transformer model + +# transformer checkpoint + + +def transformer_original_checkpoint_to_diffusers_checkpoint(model, checkpoint): + diffusers_checkpoint = {} + + transformer_prefix = "transformer.transformer" + + diffusers_latent_image_embedding_prefix = "latent_image_embedding" + latent_image_embedding_prefix = f"{transformer_prefix}.content_emb" + + # DalleMaskImageEmbedding + diffusers_checkpoint.update( + { + f"{diffusers_latent_image_embedding_prefix}.emb.weight": checkpoint[ + f"{latent_image_embedding_prefix}.emb.weight" + ], + f"{diffusers_latent_image_embedding_prefix}.height_emb.weight": checkpoint[ + f"{latent_image_embedding_prefix}.height_emb.weight" + ], + f"{diffusers_latent_image_embedding_prefix}.width_emb.weight": checkpoint[ + f"{latent_image_embedding_prefix}.width_emb.weight" + ], + } + ) + + # transformer blocks + for transformer_block_idx, transformer_block in enumerate(model.transformer_blocks): + diffusers_transformer_block_prefix = f"transformer_blocks.{transformer_block_idx}" + transformer_block_prefix = f"{transformer_prefix}.blocks.{transformer_block_idx}" + + # ada norm block + diffusers_ada_norm_prefix = f"{diffusers_transformer_block_prefix}.norm1" + ada_norm_prefix = f"{transformer_block_prefix}.ln1" + + diffusers_checkpoint.update( + transformer_ada_norm_to_diffusers_checkpoint( + checkpoint, diffusers_ada_norm_prefix=diffusers_ada_norm_prefix, ada_norm_prefix=ada_norm_prefix + ) + ) + + # attention block + diffusers_attention_prefix = f"{diffusers_transformer_block_prefix}.attn1" + attention_prefix = f"{transformer_block_prefix}.attn1" + + diffusers_checkpoint.update( + transformer_attention_to_diffusers_checkpoint( + checkpoint, diffusers_attention_prefix=diffusers_attention_prefix, attention_prefix=attention_prefix + ) + ) + + # ada norm block + diffusers_ada_norm_prefix = f"{diffusers_transformer_block_prefix}.norm2" + ada_norm_prefix = f"{transformer_block_prefix}.ln1_1" + + diffusers_checkpoint.update( + transformer_ada_norm_to_diffusers_checkpoint( + checkpoint, diffusers_ada_norm_prefix=diffusers_ada_norm_prefix, ada_norm_prefix=ada_norm_prefix + ) + ) + + # attention block + diffusers_attention_prefix = f"{diffusers_transformer_block_prefix}.attn2" + attention_prefix = f"{transformer_block_prefix}.attn2" + + diffusers_checkpoint.update( + transformer_attention_to_diffusers_checkpoint( + checkpoint, diffusers_attention_prefix=diffusers_attention_prefix, attention_prefix=attention_prefix + ) + ) + + # norm block + diffusers_norm_block_prefix = f"{diffusers_transformer_block_prefix}.norm3" + norm_block_prefix = f"{transformer_block_prefix}.ln2" + + diffusers_checkpoint.update( + { + f"{diffusers_norm_block_prefix}.weight": checkpoint[f"{norm_block_prefix}.weight"], + f"{diffusers_norm_block_prefix}.bias": checkpoint[f"{norm_block_prefix}.bias"], + } + ) + + # feedforward block + diffusers_feedforward_prefix = f"{diffusers_transformer_block_prefix}.ff" + feedforward_prefix = f"{transformer_block_prefix}.mlp" + + diffusers_checkpoint.update( + transformer_feedforward_to_diffusers_checkpoint( + checkpoint, + diffusers_feedforward_prefix=diffusers_feedforward_prefix, + feedforward_prefix=feedforward_prefix, + ) + ) + + # to logits + + diffusers_norm_out_prefix = "norm_out" + norm_out_prefix = f"{transformer_prefix}.to_logits.0" + + diffusers_checkpoint.update( + { + f"{diffusers_norm_out_prefix}.weight": checkpoint[f"{norm_out_prefix}.weight"], + f"{diffusers_norm_out_prefix}.bias": checkpoint[f"{norm_out_prefix}.bias"], + } + ) + + diffusers_out_prefix = "out" + out_prefix = f"{transformer_prefix}.to_logits.1" + + diffusers_checkpoint.update( + { + f"{diffusers_out_prefix}.weight": checkpoint[f"{out_prefix}.weight"], + f"{diffusers_out_prefix}.bias": checkpoint[f"{out_prefix}.bias"], + } + ) + + return diffusers_checkpoint + + +def transformer_ada_norm_to_diffusers_checkpoint(checkpoint, *, diffusers_ada_norm_prefix, ada_norm_prefix): + return { + f"{diffusers_ada_norm_prefix}.emb.weight": checkpoint[f"{ada_norm_prefix}.emb.weight"], + f"{diffusers_ada_norm_prefix}.linear.weight": checkpoint[f"{ada_norm_prefix}.linear.weight"], + f"{diffusers_ada_norm_prefix}.linear.bias": checkpoint[f"{ada_norm_prefix}.linear.bias"], + } + + +def transformer_attention_to_diffusers_checkpoint(checkpoint, *, diffusers_attention_prefix, attention_prefix): + return { + # key + f"{diffusers_attention_prefix}.to_k.weight": checkpoint[f"{attention_prefix}.key.weight"], + f"{diffusers_attention_prefix}.to_k.bias": checkpoint[f"{attention_prefix}.key.bias"], + # query + f"{diffusers_attention_prefix}.to_q.weight": checkpoint[f"{attention_prefix}.query.weight"], + f"{diffusers_attention_prefix}.to_q.bias": checkpoint[f"{attention_prefix}.query.bias"], + # value + f"{diffusers_attention_prefix}.to_v.weight": checkpoint[f"{attention_prefix}.value.weight"], + f"{diffusers_attention_prefix}.to_v.bias": checkpoint[f"{attention_prefix}.value.bias"], + # linear out + f"{diffusers_attention_prefix}.to_out.0.weight": checkpoint[f"{attention_prefix}.proj.weight"], + f"{diffusers_attention_prefix}.to_out.0.bias": checkpoint[f"{attention_prefix}.proj.bias"], + } + + +def transformer_feedforward_to_diffusers_checkpoint(checkpoint, *, diffusers_feedforward_prefix, feedforward_prefix): + return { + f"{diffusers_feedforward_prefix}.net.0.proj.weight": checkpoint[f"{feedforward_prefix}.0.weight"], + f"{diffusers_feedforward_prefix}.net.0.proj.bias": checkpoint[f"{feedforward_prefix}.0.bias"], + f"{diffusers_feedforward_prefix}.net.2.weight": checkpoint[f"{feedforward_prefix}.2.weight"], + f"{diffusers_feedforward_prefix}.net.2.bias": checkpoint[f"{feedforward_prefix}.2.bias"], + } + + +# done transformer checkpoint + + +def read_config_file(filename): + # The yaml file contains annotations that certain values should + # loaded as tuples. By default, OmegaConf will panic when reading + # these. Instead, we can manually read the yaml with the FullLoader and then + # construct the OmegaConf object. + with open(filename) as f: + original_config = yaml.load(f, FullLoader) + + return OmegaConf.create(original_config) + + +# We take separate arguments for the vqvae because the ITHQ vqvae config file +# is separate from the config file for the rest of the model. +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--vqvae_checkpoint_path", + default=None, + type=str, + required=True, + help="Path to the vqvae checkpoint to convert.", + ) + + parser.add_argument( + "--vqvae_original_config_file", + default=None, + type=str, + required=True, + help="The YAML config file corresponding to the original architecture for the vqvae.", + ) + + parser.add_argument( + "--checkpoint_path", default=None, type=str, required=True, help="Path to the checkpoint to convert." + ) + + parser.add_argument( + "--original_config_file", + default=None, + type=str, + required=True, + help="The YAML config file corresponding to the original architecture.", + ) + + parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output model.") + + parser.add_argument( + "--checkpoint_load_device", + default="cpu", + type=str, + required=False, + help="The device passed to `map_location` when loading checkpoints.", + ) + + # See link for how ema weights are always selected + # https://github.com/microsoft/VQ-Diffusion/blob/3c98e77f721db7c787b76304fa2c96a36c7b00af/inference_VQ_Diffusion.py#L65 + parser.add_argument( + "--no_use_ema", + action="store_true", + required=False, + help=( + "Set to not use the ema weights from the original VQ-Diffusion checkpoint. You probably do not want to set" + " it as the original VQ-Diffusion always uses the ema weights when loading models." + ), + ) + + args = parser.parse_args() + + use_ema = not args.no_use_ema + + print(f"loading checkpoints to {args.checkpoint_load_device}") + + checkpoint_map_location = torch.device(args.checkpoint_load_device) + + # vqvae_model + + print(f"loading vqvae, config: {args.vqvae_original_config_file}, checkpoint: {args.vqvae_checkpoint_path}") + + vqvae_original_config = read_config_file(args.vqvae_original_config_file).model + vqvae_checkpoint = torch.load(args.vqvae_checkpoint_path, map_location=checkpoint_map_location)["model"] + + with init_empty_weights(): + vqvae_model = vqvae_model_from_original_config(vqvae_original_config) + + vqvae_diffusers_checkpoint = vqvae_original_checkpoint_to_diffusers_checkpoint(vqvae_model, vqvae_checkpoint) + + with tempfile.NamedTemporaryFile() as vqvae_diffusers_checkpoint_file: + torch.save(vqvae_diffusers_checkpoint, vqvae_diffusers_checkpoint_file.name) + del vqvae_diffusers_checkpoint + del vqvae_checkpoint + load_checkpoint_and_dispatch(vqvae_model, vqvae_diffusers_checkpoint_file.name, device_map="auto") + + print("done loading vqvae") + + # done vqvae_model + + # transformer_model + + print( + f"loading transformer, config: {args.original_config_file}, checkpoint: {args.checkpoint_path}, use ema:" + f" {use_ema}" + ) + + original_config = read_config_file(args.original_config_file).model + + diffusion_config = original_config.params.diffusion_config + transformer_config = original_config.params.diffusion_config.params.transformer_config + content_embedding_config = original_config.params.diffusion_config.params.content_emb_config + + pre_checkpoint = torch.load(args.checkpoint_path, map_location=checkpoint_map_location) + + if use_ema: + if "ema" in pre_checkpoint: + checkpoint = {} + for k, v in pre_checkpoint["model"].items(): + checkpoint[k] = v + + for k, v in pre_checkpoint["ema"].items(): + # The ema weights are only used on the transformer. To mimic their key as if they came + # from the state_dict for the top level model, we prefix with an additional "transformer." + # See the source linked in the args.use_ema config for more information. + checkpoint[f"transformer.{k}"] = v + else: + print("attempted to load ema weights but no ema weights are specified in the loaded checkpoint.") + checkpoint = pre_checkpoint["model"] + else: + checkpoint = pre_checkpoint["model"] + + del pre_checkpoint + + with init_empty_weights(): + transformer_model = transformer_model_from_original_config( + diffusion_config, transformer_config, content_embedding_config + ) + + diffusers_transformer_checkpoint = transformer_original_checkpoint_to_diffusers_checkpoint( + transformer_model, checkpoint + ) + + with tempfile.NamedTemporaryFile() as diffusers_transformer_checkpoint_file: + torch.save(diffusers_transformer_checkpoint, diffusers_transformer_checkpoint_file.name) + del diffusers_transformer_checkpoint + del checkpoint + load_checkpoint_and_dispatch(transformer_model, diffusers_transformer_checkpoint_file.name, device_map="auto") + + print("done loading transformer") + + # done transformer_model + + # text encoder + + print("loading CLIP text encoder") + + clip_name = "openai/clip-vit-base-patch32" + + # The original VQ-Diffusion specifies the pad value by the int used in the + # returned tokens. Each model uses `0` as the pad value. The transformers clip api + # specifies the pad value via the token before it has been tokenized. The `!` pad + # token is the same as padding with the `0` pad value. + pad_token = "!" + + tokenizer_model = CLIPTokenizer.from_pretrained(clip_name, pad_token=pad_token, device_map="auto") + + assert tokenizer_model.convert_tokens_to_ids(pad_token) == 0 + + text_encoder_model = CLIPTextModel.from_pretrained( + clip_name, + # `CLIPTextModel` does not support device_map="auto" + # device_map="auto" + ) + + print("done loading CLIP text encoder") + + # done text encoder + + # scheduler + + scheduler_model = VQDiffusionScheduler( + # the scheduler has the same number of embeddings as the transformer + num_vec_classes=transformer_model.num_vector_embeds + ) + + # done scheduler + + print(f"saving VQ diffusion model, path: {args.dump_path}") + + pipe = VQDiffusionPipeline( + vqvae=vqvae_model, + transformer=transformer_model, + tokenizer=tokenizer_model, + text_encoder=text_encoder_model, + scheduler=scheduler_model, + ) + pipe.save_pretrained(args.dump_path) + + print("done writing VQ diffusion model") diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 1a9a7d74eb..00052109e3 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -18,7 +18,7 @@ from .utils import logging if is_torch_available(): from .modeling_utils import ModelMixin - from .models import AutoencoderKL, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel + from .models import AutoencoderKL, Transformer2DModel, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel from .optimization import ( get_constant_schedule, get_constant_schedule_with_warmup, @@ -38,6 +38,7 @@ if is_torch_available(): PNDMPipeline, RePaintPipeline, ScoreSdeVePipeline, + VQDiffusionPipeline, ) from .schedulers import ( DDIMScheduler, @@ -50,6 +51,7 @@ if is_torch_available(): RePaintScheduler, SchedulerMixin, ScoreSdeVeScheduler, + VQDiffusionScheduler, ) from .training_utils import EMAModel else: diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py index c5d53b2feb..5b101d1691 100644 --- a/src/diffusers/models/__init__.py +++ b/src/diffusers/models/__init__.py @@ -16,6 +16,7 @@ from ..utils import is_flax_available, is_torch_available if is_torch_available(): + from .attention import Transformer2DModel from .unet_1d import UNet1DModel from .unet_2d import UNet2DModel from .unet_2d_condition import UNet2DConditionModel diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py index 372c8492b4..bac85e2f39 100644 --- a/src/diffusers/models/attention.py +++ b/src/diffusers/models/attention.py @@ -12,13 +12,30 @@ # See the License for the specific language governing permissions and # limitations under the License. import math +from dataclasses import dataclass from typing import Optional import torch import torch.nn.functional as F from torch import nn -from diffusers.utils.import_utils import is_xformers_available +from ..configuration_utils import ConfigMixin, register_to_config +from ..modeling_utils import ModelMixin +from ..models.embeddings import ImagePositionalEmbeddings +from ..utils import BaseOutput +from ..utils.import_utils import is_xformers_available + + +@dataclass +class Transformer2DModelOutput(BaseOutput): + """ + Args: + sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` or `(batch size, num_vector_embeds - 1, num_latent_pixels)` if [`Transformer2DModel`] is discrete): + Hidden states conditioned on `encoder_hidden_states` input. If discrete, returns probability distributions + for the unnoised latent pixels. + """ + + sample: torch.FloatTensor if is_xformers_available(): @@ -28,6 +45,186 @@ else: xformers = None +class Transformer2DModel(ModelMixin, ConfigMixin): + """ + Transformer model for image-like data. Takes either discrete (classes of vector embeddings) or continuous (actual + embeddings) inputs. + + When input is continuous: First, project the input (aka embedding) and reshape to b, t, d. Then apply standard + transformer action. Finally, reshape to image. + + When input is discrete: First, input (classes of latent pixels) is converted to embeddings and has positional + embeddings applied, see `ImagePositionalEmbeddings`. Then apply standard transformer action. Finally, predict + classes of unnoised image. + + Note that it is assumed one of the input classes is the masked latent pixel. The predicted classes of the unnoised + image do not contain a prediction for the masked pixel as the unnoised image cannot be masked. + + Parameters: + num_attention_heads (`int`, *optional*, defaults to 16): The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 88): The number of channels in each head. + in_channels (`int`, *optional*): + Pass if the input is continuous. The number of channels in the input and output. + num_layers (`int`, *optional*, defaults to 1): The number of layers of Transformer blocks to use. + dropout (`float`, *optional*, defaults to 0.1): The dropout probability to use. + cross_attention_dim (`int`, *optional*): The number of context dimensions to use. + sample_size (`int`, *optional*): Pass if the input is discrete. The width of the latent images. + Note that this is fixed at training time as it is used for learning a number of position embeddings. See + `ImagePositionalEmbeddings`. + num_vector_embeds (`int`, *optional*): + Pass if the input is discrete. The number of classes of the vector embeddings of the latent pixels. + Includes the class for the masked latent pixel. + activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward. + num_embeds_ada_norm ( `int`, *optional*): Pass if at least one of the norm_layers is `AdaLayerNorm`. + The number of diffusion steps used during training. Note that this is fixed at training time as it is used + to learn a number of embeddings that are added to the hidden states. During inference, you can denoise for + up to but not more than steps than `num_embeds_ada_norm`. + attention_bias (`bool`, *optional*): + Configure if the TransformerBlocks' attention should contain a bias parameter. + """ + + @register_to_config + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: Optional[int] = None, + num_layers: int = 1, + dropout: float = 0.0, + norm_num_groups: int = 32, + cross_attention_dim: Optional[int] = None, + attention_bias: bool = False, + sample_size: Optional[int] = None, + num_vector_embeds: Optional[int] = None, + activation_fn: str = "geglu", + num_embeds_ada_norm: Optional[int] = None, + ): + super().__init__() + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + inner_dim = num_attention_heads * attention_head_dim + + # 1. Transformer2DModel can process both standard continous images of shape `(batch_size, num_channels, width, height)` as well as quantized image embeddings of shape `(batch_size, num_image_vectors)` + # Define whether input is continuous or discrete depending on configuration + self.is_input_continuous = in_channels is not None + self.is_input_vectorized = num_vector_embeds is not None + + if self.is_input_continuous and self.is_input_vectorized: + raise ValueError( + f"Cannot define both `in_channels`: {in_channels} and `num_vector_embeds`: {num_vector_embeds}. Make" + " sure that either `in_channels` or `num_vector_embeds` is None." + ) + elif not self.is_input_continuous and not self.is_input_vectorized: + raise ValueError( + f"Has to define either `in_channels`: {in_channels} or `num_vector_embeds`: {num_vector_embeds}. Make" + " sure that either `in_channels` or `num_vector_embeds` is not None." + ) + + # 2. Define input layers + if self.is_input_continuous: + self.in_channels = in_channels + + self.norm = torch.nn.GroupNorm(num_groups=norm_num_groups, num_channels=in_channels, eps=1e-6, affine=True) + self.proj_in = nn.Conv2d(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + elif self.is_input_vectorized: + assert sample_size is not None, "Transformer2DModel over discrete input must provide sample_size" + assert num_vector_embeds is not None, "Transformer2DModel over discrete input must provide num_embed" + + self.height = sample_size + self.width = sample_size + self.num_vector_embeds = num_vector_embeds + self.num_latent_pixels = self.height * self.width + + self.latent_image_embedding = ImagePositionalEmbeddings( + num_embed=num_vector_embeds, embed_dim=inner_dim, height=self.height, width=self.width + ) + + # 3. Define transformers blocks + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + cross_attention_dim=cross_attention_dim, + activation_fn=activation_fn, + num_embeds_ada_norm=num_embeds_ada_norm, + attention_bias=attention_bias, + ) + for d in range(num_layers) + ] + ) + + # 4. Define output layers + if self.is_input_continuous: + self.proj_out = nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0) + elif self.is_input_vectorized: + self.norm_out = nn.LayerNorm(inner_dim) + self.out = nn.Linear(inner_dim, self.num_vector_embeds - 1) + + def _set_attention_slice(self, slice_size): + for block in self.transformer_blocks: + block._set_attention_slice(slice_size) + + def forward(self, hidden_states, encoder_hidden_states=None, timestep=None, return_dict: bool = True): + """ + Args: + hidden_states ( When discrete, `torch.LongTensor` of shape `(batch size, num latent pixels)`. + When continous, `torch.FloatTensor` of shape `(batch size, channel, height, width)`): Input + hidden_states + encoder_hidden_states ( `torch.LongTensor` of shape `(batch size, context dim)`, *optional*): + Conditional embeddings for cross attention layer. If not given, cross-attention defaults to + self-attention. + timestep ( `torch.long`, *optional*): + Optional timestep to be applied as an embedding in AdaLayerNorm's. Used to indicate denoising step. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`models.unet_2d_condition.UNet2DConditionOutput`] instead of a plain tuple. + + Returns: + [`~models.attention.Transformer2DModelOutput`] or `tuple`: [`~models.attention.Transformer2DModelOutput`] + if `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is the sample + tensor. + """ + # 1. Input + if self.is_input_continuous: + batch, channel, height, weight = hidden_states.shape + residual = hidden_states + hidden_states = self.norm(hidden_states) + hidden_states = self.proj_in(hidden_states) + inner_dim = hidden_states.shape[1] + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * weight, inner_dim) + elif self.is_input_vectorized: + hidden_states = self.latent_image_embedding(hidden_states) + + # 2. Blocks + for block in self.transformer_blocks: + hidden_states = block(hidden_states, context=encoder_hidden_states, timestep=timestep) + + # 3. Output + if self.is_input_continuous: + hidden_states = hidden_states.reshape(batch, height, weight, inner_dim).permute(0, 3, 1, 2) + hidden_states = self.proj_out(hidden_states) + output = hidden_states + residual + elif self.is_input_vectorized: + hidden_states = self.norm_out(hidden_states) + logits = self.out(hidden_states) + # (batch, self.num_vector_embeds - 1, self.num_latent_pixels) + logits = logits.permute(0, 2, 1) + + # log(p(x_0)) + output = F.log_softmax(logits.double(), dim=1).float() + + if not return_dict: + return (output,) + + return Transformer2DModelOutput(sample=output) + + def _set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): + for block in self.transformer_blocks: + block._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) + + class AttentionBlock(nn.Module): """ An attention block that allows spatial positions to attend to each other. Originally ported from here, but adapted @@ -36,19 +233,19 @@ class AttentionBlock(nn.Module): Uses three q, k, v linear layers to compute attention. Parameters: - channels (:obj:`int`): The number of channels in the input and output. - num_head_channels (:obj:`int`, *optional*): + channels (`int`): The number of channels in the input and output. + num_head_channels (`int`, *optional*): The number of channels in each head. If None, then `num_heads` = 1. - num_groups (:obj:`int`, *optional*, defaults to 32): The number of groups to use for group norm. - rescale_output_factor (:obj:`float`, *optional*, defaults to 1.0): The factor to rescale the output by. - eps (:obj:`float`, *optional*, defaults to 1e-5): The epsilon value to use for group norm. + norm_num_groups (`int`, *optional*, defaults to 32): The number of groups to use for group norm. + rescale_output_factor (`float`, *optional*, defaults to 1.0): The factor to rescale the output by. + eps (`float`, *optional*, defaults to 1e-5): The epsilon value to use for group norm. """ def __init__( self, channels: int, num_head_channels: Optional[int] = None, - num_groups: int = 32, + norm_num_groups: int = 32, rescale_output_factor: float = 1.0, eps: float = 1e-5, ): @@ -57,7 +254,7 @@ class AttentionBlock(nn.Module): self.num_heads = channels // num_head_channels if num_head_channels is not None else 1 self.num_head_size = num_head_channels - self.group_norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, eps=eps, affine=True) + self.group_norm = nn.GroupNorm(num_channels=channels, num_groups=norm_num_groups, eps=eps, affine=True) # define q,k,v as linear layers self.query = nn.Linear(channels, channels) @@ -113,107 +310,61 @@ class AttentionBlock(nn.Module): return hidden_states -class SpatialTransformer(nn.Module): - """ - Transformer block for image-like data. First, project the input (aka embedding) and reshape to b, t, d. Then apply - standard transformer action. Finally, reshape to image. - - Parameters: - in_channels (:obj:`int`): The number of channels in the input and output. - n_heads (:obj:`int`): The number of heads to use for multi-head attention. - d_head (:obj:`int`): The number of channels in each head. - depth (:obj:`int`, *optional*, defaults to 1): The number of layers of Transformer blocks to use. - dropout (:obj:`float`, *optional*, defaults to 0.1): The dropout probability to use. - context_dim (:obj:`int`, *optional*): The number of context dimensions to use. - """ - - def __init__( - self, - in_channels: int, - n_heads: int, - d_head: int, - depth: int = 1, - dropout: float = 0.0, - num_groups: int = 32, - context_dim: Optional[int] = None, - ): - super().__init__() - self.n_heads = n_heads - self.d_head = d_head - self.in_channels = in_channels - inner_dim = n_heads * d_head - self.norm = torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True) - - self.proj_in = nn.Conv2d(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) - - self.transformer_blocks = nn.ModuleList( - [ - BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim) - for d in range(depth) - ] - ) - - self.proj_out = nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0) - - def _set_attention_slice(self, slice_size): - for block in self.transformer_blocks: - block._set_attention_slice(slice_size) - - def _set_use_memory_efficient_attention_xformers(self, use_memory_efficient_attention_xformers: bool): - for block in self.transformer_blocks: - block._set_use_memory_efficient_attention_xformers(use_memory_efficient_attention_xformers) - - def forward(self, hidden_states, context=None): - # note: if no context is given, cross-attention defaults to self-attention - batch, channel, height, width = hidden_states.shape - residual = hidden_states - hidden_states = self.norm(hidden_states) - hidden_states = self.proj_in(hidden_states) - inner_dim = hidden_states.shape[1] - hidden_states = hidden_states.permute(0, 2, 3, 1).reshape(batch, height * width, inner_dim) - for block in self.transformer_blocks: - hidden_states = block(hidden_states, context=context) - hidden_states = hidden_states.reshape(batch, height, width, inner_dim).permute(0, 3, 1, 2) - hidden_states = self.proj_out(hidden_states) - return hidden_states + residual - - class BasicTransformerBlock(nn.Module): r""" A basic Transformer block. Parameters: - dim (:obj:`int`): The number of channels in the input and output. - n_heads (:obj:`int`): The number of heads to use for multi-head attention. - d_head (:obj:`int`): The number of channels in each head. - dropout (:obj:`float`, *optional*, defaults to 0.0): The dropout probability to use. - context_dim (:obj:`int`, *optional*): The size of the context vector for cross attention. - gated_ff (:obj:`bool`, *optional*, defaults to :obj:`False`): Whether to use a gated feed-forward network. - checkpoint (:obj:`bool`, *optional*, defaults to :obj:`False`): Whether to use checkpointing. + dim (`int`): The number of channels in the input and output. + num_attention_heads (`int`): The number of heads to use for multi-head attention. + attention_head_dim (`int`): The number of channels in each head. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + cross_attention_dim (`int`, *optional*): The size of the context vector for cross attention. + activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward. + num_embeds_ada_norm (: + obj: `int`, *optional*): The number of diffusion steps used during training. See `Transformer2DModel`. + attention_bias (: + obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter. """ def __init__( self, dim: int, - n_heads: int, - d_head: int, + num_attention_heads: int, + attention_head_dim: int, dropout=0.0, - context_dim: Optional[int] = None, - gated_ff: bool = True, - checkpoint: bool = True, + cross_attention_dim: Optional[int] = None, + activation_fn: str = "geglu", + num_embeds_ada_norm: Optional[int] = None, + attention_bias: bool = False, ): super().__init__() self.attn1 = CrossAttention( - query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, ) # is a self-attention - self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) + self.ff = FeedForward(dim, dropout=dropout, activation_fn=activation_fn) self.attn2 = CrossAttention( - query_dim=dim, context_dim=context_dim, heads=n_heads, dim_head=d_head, dropout=dropout + query_dim=dim, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, ) # is self-attn if context is none - self.norm1 = nn.LayerNorm(dim) - self.norm2 = nn.LayerNorm(dim) + + # layer norms + self.use_ada_layer_norm = num_embeds_ada_norm is not None + if self.use_ada_layer_norm: + self.norm1 = AdaLayerNorm(dim, num_embeds_ada_norm) + self.norm2 = AdaLayerNorm(dim, num_embeds_ada_norm) + else: + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) self.norm3 = nn.LayerNorm(dim) - self.checkpoint = checkpoint def _set_attention_slice(self, slice_size): self.attn1._slice_size = slice_size @@ -245,10 +396,22 @@ class BasicTransformerBlock(nn.Module): self.attn1._use_memory_efficient_attention_xformers = use_memory_efficient_attention_xformers self.attn2._use_memory_efficient_attention_xformers = use_memory_efficient_attention_xformers - def forward(self, hidden_states, context=None): - hidden_states = self.attn1(self.norm1(hidden_states)) + hidden_states - hidden_states = self.attn2(self.norm2(hidden_states), context=context) + hidden_states + def forward(self, hidden_states, context=None, timestep=None): + # 1. Self-Attention + norm_hidden_states = ( + self.norm1(hidden_states, timestep) if self.use_ada_layer_norm else self.norm1(hidden_states) + ) + hidden_states = self.attn1(norm_hidden_states) + hidden_states + + # 2. Cross-Attention + norm_hidden_states = ( + self.norm2(hidden_states, timestep) if self.use_ada_layer_norm else self.norm2(hidden_states) + ) + hidden_states = self.attn2(norm_hidden_states, context=context) + hidden_states + + # 3. Feed-forward hidden_states = self.ff(self.norm3(hidden_states)) + hidden_states + return hidden_states @@ -257,20 +420,28 @@ class CrossAttention(nn.Module): A cross attention layer. Parameters: - query_dim (:obj:`int`): The number of channels in the query. - context_dim (:obj:`int`, *optional*): + query_dim (`int`): The number of channels in the query. + cross_attention_dim (`int`, *optional*): The number of channels in the context. If not given, defaults to `query_dim`. - heads (:obj:`int`, *optional*, defaults to 8): The number of heads to use for multi-head attention. - dim_head (:obj:`int`, *optional*, defaults to 64): The number of channels in each head. - dropout (:obj:`float`, *optional*, defaults to 0.0): The dropout probability to use. + heads (`int`, *optional*, defaults to 8): The number of heads to use for multi-head attention. + dim_head (`int`, *optional*, defaults to 64): The number of channels in each head. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + bias (`bool`, *optional*, defaults to False): + Set to `True` for the query, key, and value linear layers to contain a bias parameter. """ def __init__( - self, query_dim: int, context_dim: Optional[int] = None, heads: int = 8, dim_head: int = 64, dropout: int = 0.0 + self, + query_dim: int, + cross_attention_dim: Optional[int] = None, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias=False, ): super().__init__() inner_dim = dim_head * heads - context_dim = context_dim if context_dim is not None else query_dim + cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim self.scale = dim_head**-0.5 self.heads = heads @@ -280,9 +451,9 @@ class CrossAttention(nn.Module): self._slice_size = None self._use_memory_efficient_attention_xformers = False - self.to_q = nn.Linear(query_dim, inner_dim, bias=False) - self.to_k = nn.Linear(context_dim, inner_dim, bias=False) - self.to_v = nn.Linear(context_dim, inner_dim, bias=False) + self.to_q = nn.Linear(query_dim, inner_dim, bias=bias) + self.to_k = nn.Linear(cross_attention_dim, inner_dim, bias=bias) + self.to_v = nn.Linear(cross_attention_dim, inner_dim, bias=bias) self.to_out = nn.ModuleList([]) self.to_out.append(nn.Linear(inner_dim, query_dim)) @@ -394,23 +565,33 @@ class FeedForward(nn.Module): A feed-forward layer. Parameters: - dim (:obj:`int`): The number of channels in the input. - dim_out (:obj:`int`, *optional*): The number of channels in the output. If not given, defaults to `dim`. - mult (:obj:`int`, *optional*, defaults to 4): The multiplier to use for the hidden dimension. - glu (:obj:`bool`, *optional*, defaults to :obj:`False`): Whether to use GLU activation. - dropout (:obj:`float`, *optional*, defaults to 0.0): The dropout probability to use. + dim (`int`): The number of channels in the input. + dim_out (`int`, *optional*): The number of channels in the output. If not given, defaults to `dim`. + mult (`int`, *optional*, defaults to 4): The multiplier to use for the hidden dimension. + dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use. + activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward. """ def __init__( - self, dim: int, dim_out: Optional[int] = None, mult: int = 4, glu: bool = False, dropout: float = 0.0 + self, + dim: int, + dim_out: Optional[int] = None, + mult: int = 4, + dropout: float = 0.0, + activation_fn: str = "geglu", ): super().__init__() inner_dim = int(dim * mult) dim_out = dim_out if dim_out is not None else dim - self.net = nn.ModuleList([]) + if activation_fn == "geglu": + geglu = GEGLU(dim, inner_dim) + elif activation_fn == "geglu-approximate": + geglu = ApproximateGELU(dim, inner_dim) + + self.net = nn.ModuleList([]) # project in - self.net.append(GEGLU(dim, inner_dim)) + self.net.append(geglu) # project dropout self.net.append(nn.Dropout(dropout)) # project out @@ -428,8 +609,8 @@ class GEGLU(nn.Module): A variant of the gated linear unit activation function from https://arxiv.org/abs/2002.05202. Parameters: - dim_in (:obj:`int`): The number of channels in the input. - dim_out (:obj:`int`): The number of channels in the output. + dim_in (`int`): The number of channels in the input. + dim_out (`int`): The number of channels in the output. """ def __init__(self, dim_in: int, dim_out: int): @@ -445,3 +626,38 @@ class GEGLU(nn.Module): def forward(self, hidden_states): hidden_states, gate = self.proj(hidden_states).chunk(2, dim=-1) return hidden_states * self.gelu(gate) + + +class ApproximateGELU(nn.Module): + """ + The approximate form of Gaussian Error Linear Unit (GELU) + + For more details, see section 2: https://arxiv.org/abs/1606.08415 + """ + + def __init__(self, dim_in: int, dim_out: int): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out) + + def forward(self, x): + x = self.proj(x) + return x * torch.sigmoid(1.702 * x) + + +class AdaLayerNorm(nn.Module): + """ + Norm layer modified to incorporate timestep embeddings. + """ + + def __init__(self, embedding_dim, num_embeddings): + super().__init__() + self.emb = nn.Embedding(num_embeddings, embedding_dim) + self.silu = nn.SiLU() + self.linear = nn.Linear(embedding_dim, embedding_dim * 2) + self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False) + + def forward(self, x, timestep): + emb = self.linear(self.silu(self.emb(timestep))) + scale, shift = torch.chunk(emb, 2) + x = self.norm(x) * (1 + scale) + shift + return x diff --git a/src/diffusers/models/attention_flax.py b/src/diffusers/models/attention_flax.py index 1745265b91..1b86094747 100644 --- a/src/diffusers/models/attention_flax.py +++ b/src/diffusers/models/attention_flax.py @@ -142,7 +142,7 @@ class FlaxBasicTransformerBlock(nn.Module): return hidden_states -class FlaxSpatialTransformer(nn.Module): +class FlaxTransformer2DModel(nn.Module): r""" A Spatial Transformer layer with Gated Linear Unit (GLU) activation function as described in: https://arxiv.org/pdf/1506.02025.pdf diff --git a/src/diffusers/models/embeddings.py b/src/diffusers/models/embeddings.py index 35715e17fc..b09d43fc2e 100644 --- a/src/diffusers/models/embeddings.py +++ b/src/diffusers/models/embeddings.py @@ -126,3 +126,68 @@ class GaussianFourierProjection(nn.Module): else: out = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1) return out + + +class ImagePositionalEmbeddings(nn.Module): + """ + Converts latent image classes into vector embeddings. Sums the vector embeddings with positional embeddings for the + height and width of the latent space. + + For more details, see figure 10 of the dall-e paper: https://arxiv.org/abs/2102.12092 + + For VQ-diffusion: + + Output vector embeddings are used as input for the transformer. + + Note that the vector embeddings for the transformer are different than the vector embeddings from the VQVAE. + + Args: + num_embed (`int`): + Number of embeddings for the latent pixels embeddings. + height (`int`): + Height of the latent image i.e. the number of height embeddings. + width (`int`): + Width of the latent image i.e. the number of width embeddings. + embed_dim (`int`): + Dimension of the produced vector embeddings. Used for the latent pixel, height, and width embeddings. + """ + + def __init__( + self, + num_embed: int, + height: int, + width: int, + embed_dim: int, + ): + super().__init__() + + self.height = height + self.width = width + self.num_embed = num_embed + self.embed_dim = embed_dim + + self.emb = nn.Embedding(self.num_embed, embed_dim) + self.height_emb = nn.Embedding(self.height, embed_dim) + self.width_emb = nn.Embedding(self.width, embed_dim) + + def forward(self, index): + emb = self.emb(index) + + height_emb = self.height_emb(torch.arange(self.height, device=index.device).view(1, self.height)) + + # 1 x H x D -> 1 x H x 1 x D + height_emb = height_emb.unsqueeze(2) + + width_emb = self.width_emb(torch.arange(self.width, device=index.device).view(1, self.width)) + + # 1 x W x D -> 1 x 1 x W x D + width_emb = width_emb.unsqueeze(1) + + pos_emb = height_emb + width_emb + + # 1 x H x W x D -> 1 x L xD + pos_emb = pos_emb.view(1, self.height * self.width, -1) + + emb = emb + pos_emb[:, : emb.shape[1], :] + + return emb diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index ae4fe2d8bb..4132ccbd0c 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -15,7 +15,7 @@ import numpy as np import torch from torch import nn -from .attention import AttentionBlock, SpatialTransformer +from .attention import AttentionBlock, Transformer2DModel from .resnet import Downsample2D, FirDownsample2D, FirUpsample2D, ResnetBlock2D, Upsample2D @@ -109,6 +109,19 @@ def get_down_block( resnet_groups=resnet_groups, downsample_padding=downsample_padding, ) + elif down_block_type == "AttnDownEncoderBlock2D": + return AttnDownEncoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + resnet_groups=resnet_groups, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + raise ValueError(f"{down_block_type} does not exist.") def get_up_block( @@ -200,6 +213,17 @@ def get_up_block( resnet_act_fn=resnet_act_fn, resnet_groups=resnet_groups, ) + elif up_block_type == "AttnUpDecoderBlock2D": + return AttnUpDecoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + resnet_groups=resnet_groups, + attn_num_head_channels=attn_num_head_channels, + ) raise ValueError(f"{up_block_type} does not exist.") @@ -249,7 +273,7 @@ class UNetMidBlock2D(nn.Module): num_head_channels=attn_num_head_channels, rescale_output_factor=output_scale_factor, eps=resnet_eps, - num_groups=resnet_groups, + norm_num_groups=resnet_groups, ) ) resnets.append( @@ -325,13 +349,13 @@ class UNetMidBlock2DCrossAttn(nn.Module): for _ in range(num_layers): attentions.append( - SpatialTransformer( - in_channels, + Transformer2DModel( attn_num_head_channels, in_channels // attn_num_head_channels, - depth=1, - context_dim=cross_attention_dim, - num_groups=resnet_groups, + in_channels=in_channels, + num_layers=1, + cross_attention_dim=cross_attention_dim, + norm_num_groups=resnet_groups, ) ) resnets.append( @@ -374,7 +398,7 @@ class UNetMidBlock2DCrossAttn(nn.Module): def forward(self, hidden_states, temb=None, encoder_hidden_states=None): hidden_states = self.resnets[0](hidden_states, temb) for attn, resnet in zip(self.attentions, self.resnets[1:]): - hidden_states = attn(hidden_states, encoder_hidden_states) + hidden_states = attn(hidden_states, encoder_hidden_states).sample hidden_states = resnet(hidden_states, temb) return hidden_states @@ -427,7 +451,7 @@ class AttnDownBlock2D(nn.Module): num_head_channels=attn_num_head_channels, rescale_output_factor=output_scale_factor, eps=resnet_eps, - num_groups=resnet_groups, + norm_num_groups=resnet_groups, ) ) @@ -506,13 +530,13 @@ class CrossAttnDownBlock2D(nn.Module): ) ) attentions.append( - SpatialTransformer( - out_channels, + Transformer2DModel( attn_num_head_channels, out_channels // attn_num_head_channels, - depth=1, - context_dim=cross_attention_dim, - num_groups=resnet_groups, + in_channels=out_channels, + num_layers=1, + cross_attention_dim=cross_attention_dim, + norm_num_groups=resnet_groups, ) ) self.attentions = nn.ModuleList(attentions) @@ -556,19 +580,22 @@ class CrossAttnDownBlock2D(nn.Module): for resnet, attn in zip(self.resnets, self.attentions): if self.training and self.gradient_checkpointing: - def create_custom_forward(module): + def create_custom_forward(module, return_dict=None): def custom_forward(*inputs): - return module(*inputs) + if return_dict is not None: + return module(*inputs, return_dict=return_dict) + else: + return module(*inputs) return custom_forward hidden_states = torch.utils.checkpoint.checkpoint(create_custom_forward(resnet), hidden_states, temb) hidden_states = torch.utils.checkpoint.checkpoint( - create_custom_forward(attn), hidden_states, encoder_hidden_states - ) + create_custom_forward(attn, return_dict=False), hidden_states, encoder_hidden_states + )[0] else: hidden_states = resnet(hidden_states, temb) - hidden_states = attn(hidden_states, context=encoder_hidden_states) + hidden_states = attn(hidden_states, encoder_hidden_states=encoder_hidden_states).sample output_states += (hidden_states,) @@ -763,7 +790,7 @@ class AttnDownEncoderBlock2D(nn.Module): num_head_channels=attn_num_head_channels, rescale_output_factor=output_scale_factor, eps=resnet_eps, - num_groups=resnet_groups, + norm_num_groups=resnet_groups, ) ) @@ -1014,7 +1041,7 @@ class AttnUpBlock2D(nn.Module): num_head_channels=attn_num_head_channels, rescale_output_factor=output_scale_factor, eps=resnet_eps, - num_groups=resnet_groups, + norm_num_groups=resnet_groups, ) ) @@ -1089,13 +1116,13 @@ class CrossAttnUpBlock2D(nn.Module): ) ) attentions.append( - SpatialTransformer( - out_channels, + Transformer2DModel( attn_num_head_channels, out_channels // attn_num_head_channels, - depth=1, - context_dim=cross_attention_dim, - num_groups=resnet_groups, + in_channels=out_channels, + num_layers=1, + cross_attention_dim=cross_attention_dim, + norm_num_groups=resnet_groups, ) ) self.attentions = nn.ModuleList(attentions) @@ -1145,19 +1172,22 @@ class CrossAttnUpBlock2D(nn.Module): if self.training and self.gradient_checkpointing: - def create_custom_forward(module): + def create_custom_forward(module, return_dict=None): def custom_forward(*inputs): - return module(*inputs) + if return_dict is not None: + return module(*inputs, return_dict=return_dict) + else: + return module(*inputs) return custom_forward hidden_states = torch.utils.checkpoint.checkpoint(create_custom_forward(resnet), hidden_states, temb) hidden_states = torch.utils.checkpoint.checkpoint( - create_custom_forward(attn), hidden_states, encoder_hidden_states - ) + create_custom_forward(attn, return_dict=False), hidden_states, encoder_hidden_states + )[0] else: hidden_states = resnet(hidden_states, temb) - hidden_states = attn(hidden_states, context=encoder_hidden_states) + hidden_states = attn(hidden_states, encoder_hidden_states=encoder_hidden_states).sample if self.upsamplers is not None: for upsampler in self.upsamplers: @@ -1337,7 +1367,7 @@ class AttnUpDecoderBlock2D(nn.Module): num_head_channels=attn_num_head_channels, rescale_output_factor=output_scale_factor, eps=resnet_eps, - num_groups=resnet_groups, + norm_num_groups=resnet_groups, ) ) diff --git a/src/diffusers/models/unet_2d_blocks_flax.py b/src/diffusers/models/unet_2d_blocks_flax.py index baa71beabe..5798385b9d 100644 --- a/src/diffusers/models/unet_2d_blocks_flax.py +++ b/src/diffusers/models/unet_2d_blocks_flax.py @@ -15,7 +15,7 @@ import flax.linen as nn import jax.numpy as jnp -from .attention_flax import FlaxSpatialTransformer +from .attention_flax import FlaxTransformer2DModel from .resnet_flax import FlaxDownsample2D, FlaxResnetBlock2D, FlaxUpsample2D @@ -63,7 +63,7 @@ class FlaxCrossAttnDownBlock2D(nn.Module): ) resnets.append(res_block) - attn_block = FlaxSpatialTransformer( + attn_block = FlaxTransformer2DModel( in_channels=self.out_channels, n_heads=self.attn_num_head_channels, d_head=self.out_channels // self.attn_num_head_channels, @@ -196,7 +196,7 @@ class FlaxCrossAttnUpBlock2D(nn.Module): ) resnets.append(res_block) - attn_block = FlaxSpatialTransformer( + attn_block = FlaxTransformer2DModel( in_channels=self.out_channels, n_heads=self.attn_num_head_channels, d_head=self.out_channels // self.attn_num_head_channels, @@ -326,7 +326,7 @@ class FlaxUNetMidBlock2DCrossAttn(nn.Module): attentions = [] for _ in range(self.num_layers): - attn_block = FlaxSpatialTransformer( + attn_block = FlaxTransformer2DModel( in_channels=self.in_channels, n_heads=self.attn_num_head_channels, d_head=self.in_channels // self.attn_num_head_channels, diff --git a/src/diffusers/models/vae.py b/src/diffusers/models/vae.py index 5f5a47dada..30de343d08 100644 --- a/src/diffusers/models/vae.py +++ b/src/diffusers/models/vae.py @@ -233,14 +233,16 @@ class VectorQuantizer(nn.Module): # NOTE: due to a bug the beta term was applied to the wrong term. for # backwards compatibility we use the buggy version by default, but you can # specify legacy=False to fix it. - def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random", sane_index_shape=False, legacy=True): + def __init__( + self, n_e, vq_embed_dim, beta, remap=None, unknown_index="random", sane_index_shape=False, legacy=True + ): super().__init__() self.n_e = n_e - self.e_dim = e_dim + self.vq_embed_dim = vq_embed_dim self.beta = beta self.legacy = legacy - self.embedding = nn.Embedding(self.n_e, self.e_dim) + self.embedding = nn.Embedding(self.n_e, self.vq_embed_dim) self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e) self.remap = remap @@ -287,7 +289,7 @@ class VectorQuantizer(nn.Module): def forward(self, z): # reshape z -> (batch, height, width, channel) and flatten z = z.permute(0, 2, 3, 1).contiguous() - z_flattened = z.view(-1, self.e_dim) + z_flattened = z.view(-1, self.vq_embed_dim) # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z d = ( @@ -409,6 +411,7 @@ class VQModel(ModelMixin, ConfigMixin): latent_channels (`int`, *optional*, defaults to `3`): Number of channels in the latent space. sample_size (`int`, *optional*, defaults to `32`): TODO num_vq_embeddings (`int`, *optional*, defaults to `256`): Number of codebook vectors in the VQ-VAE. + vq_embed_dim (`int`, *optional*): Hidden dim of codebook vectors in the VQ-VAE. """ @register_to_config @@ -425,6 +428,7 @@ class VQModel(ModelMixin, ConfigMixin): sample_size: int = 32, num_vq_embeddings: int = 256, norm_num_groups: int = 32, + vq_embed_dim: Optional[int] = None, ): super().__init__() @@ -440,11 +444,11 @@ class VQModel(ModelMixin, ConfigMixin): double_z=False, ) - self.quant_conv = torch.nn.Conv2d(latent_channels, latent_channels, 1) - self.quantize = VectorQuantizer( - num_vq_embeddings, latent_channels, beta=0.25, remap=None, sane_index_shape=False - ) - self.post_quant_conv = torch.nn.Conv2d(latent_channels, latent_channels, 1) + vq_embed_dim = vq_embed_dim if vq_embed_dim is not None else latent_channels + + self.quant_conv = torch.nn.Conv2d(latent_channels, vq_embed_dim, 1) + self.quantize = VectorQuantizer(num_vq_embeddings, vq_embed_dim, beta=0.25, remap=None, sane_index_shape=False) + self.post_quant_conv = torch.nn.Conv2d(vq_embed_dim, latent_channels, 1) # pass init params to Decoder self.decoder = Decoder( diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index 8015d4e114..bb3440b2bf 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -21,6 +21,7 @@ if is_torch_available() and is_transformers_available(): StableDiffusionInpaintPipelineLegacy, StableDiffusionPipeline, ) + from .vq_diffusion import VQDiffusionPipeline if is_transformers_available() and is_onnx_available(): from .stable_diffusion import ( diff --git a/src/diffusers/pipelines/vq_diffusion/__init__.py b/src/diffusers/pipelines/vq_diffusion/__init__.py new file mode 100644 index 0000000000..edf6f570f5 --- /dev/null +++ b/src/diffusers/pipelines/vq_diffusion/__init__.py @@ -0,0 +1 @@ +from .pipeline_vq_diffusion import VQDiffusionPipeline diff --git a/src/diffusers/pipelines/vq_diffusion/pipeline_vq_diffusion.py b/src/diffusers/pipelines/vq_diffusion/pipeline_vq_diffusion.py new file mode 100644 index 0000000000..6e5325ba7e --- /dev/null +++ b/src/diffusers/pipelines/vq_diffusion/pipeline_vq_diffusion.py @@ -0,0 +1,253 @@ +# Copyright 2022 Microsoft and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable, List, Optional, Tuple, Union + +import torch + +from diffusers import Transformer2DModel, VQModel +from diffusers.schedulers.scheduling_vq_diffusion import VQDiffusionScheduler +from transformers import CLIPTextModel, CLIPTokenizer + +from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput +from ...utils import logging + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +class VQDiffusionPipeline(DiffusionPipeline): + r""" + Pipeline for text-to-image generation using VQ Diffusion + + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + + Args: + vqvae ([`VQModel`]): + Vector Quantized Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent + representations. + text_encoder ([`CLIPTextModel`]): + Frozen text-encoder. VQ Diffusion uses the text portion of + [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically + the [clip-vit-base-patch32](https://huggingface.co/openai/clip-vit-base-patch32) variant. + tokenizer (`CLIPTokenizer`): + Tokenizer of class + [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). + transformer ([`Transformer2DModel`]): + Conditional transformer to denoise the encoded image latents. + scheduler ([`VQDiffusionScheduler`]): + A scheduler to be used in combination with `transformer` to denoise the encoded image latents. + """ + + vqvae: VQModel + text_encoder: CLIPTextModel + tokenizer: CLIPTokenizer + transformer: Transformer2DModel + scheduler: VQDiffusionScheduler + + def __init__( + self, + vqvae: VQModel, + text_encoder: CLIPTextModel, + tokenizer: CLIPTokenizer, + transformer: Transformer2DModel, + scheduler: VQDiffusionScheduler, + ): + super().__init__() + + self.register_modules( + vqvae=vqvae, + transformer=transformer, + text_encoder=text_encoder, + tokenizer=tokenizer, + scheduler=scheduler, + ) + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + num_inference_steps: int = 100, + truncation_rate: float = 1.0, + num_images_per_prompt: int = 1, + generator: Optional[torch.Generator] = None, + latents: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: Optional[int] = 1, + ) -> Union[ImagePipelineOutput, Tuple]: + """ + Function invoked when calling the pipeline for generation. + + Args: + prompt (`str` or `List[str]`): + The prompt or prompts to guide the image generation. + num_inference_steps (`int`, *optional*, defaults to 100): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + truncation_rate (`float`, *optional*, defaults to 1.0 (equivalent to no truncation)): + Used to "truncate" the predicted classes for x_0 such that the cumulative probability for a pixel is at + most `truncation_rate`. The lowest probabilities that would increase the cumulative probability above + `truncation_rate` are set to zero. + num_images_per_prompt (`int`, *optional*, defaults to 1): + The number of images to generate per prompt. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + latents (`torch.FloatTensor` of shape (batch), *optional*): + Pre-generated noisy latents to be used as inputs for image generation. Must be valid embedding indices. + Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will + be generated of completely masked latent pixels. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generated image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. + callback (`Callable`, *optional*): + A function that will be called every `callback_steps` steps during inference. The function will be + called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. + callback_steps (`int`, *optional*, defaults to 1): + The frequency at which the `callback` function will be called. If not specified, the callback will be + called at every step. + + Returns: + [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~ pipeline_utils.ImagePipelineOutput `] if + `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the + generated images. + """ + if isinstance(prompt, str): + batch_size = 1 + elif isinstance(prompt, list): + batch_size = len(prompt) + else: + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + batch_size = batch_size * num_images_per_prompt + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + # get prompt text embeddings + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] + + # NOTE: This additional step of normalizing the text embeddings is from VQ-Diffusion. + # While CLIP does normalize the pooled output of the text transformer when combining + # the image and text embeddings, CLIP does not directly normalize the last hidden state. + # + # CLIP normalizing the pooled output. + # https://github.com/huggingface/transformers/blob/d92e22d1f28324f513f3080e5c47c071a3916721/src/transformers/models/clip/modeling_clip.py#L1052-L1053 + text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True) + + # duplicate text embeddings for each generation per prompt + text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) + + # get the initial completely masked latents unless the user supplied it + + latents_shape = (batch_size, self.transformer.num_latent_pixels) + if latents is None: + mask_class = self.transformer.num_vector_embeds - 1 + latents = torch.full(latents_shape, mask_class).to(self.device) + else: + if latents.shape != latents_shape: + raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") + if (latents < 0).any() or (latents >= self.transformer.num_vector_embeds).any(): + raise ValueError( + "Unexpected latents value(s). All latents be valid embedding indices i.e. in the range 0," + f" {self.transformer.num_vector_embeds - 1} (inclusive)." + ) + latents = latents.to(self.device) + + # set timesteps + self.scheduler.set_timesteps(num_inference_steps, device=self.device) + + timesteps_tensor = self.scheduler.timesteps.to(self.device) + + sample = latents + + for i, t in enumerate(self.progress_bar(timesteps_tensor)): + # predict the un-noised image + # model_output == `log_p_x_0` + model_output = self.transformer(sample, encoder_hidden_states=text_embeddings, timestep=t).sample + + model_output = self.truncate(model_output, truncation_rate) + + # remove `log(0)`'s (`-inf`s) + model_output = model_output.clamp(-70) + + # compute the previous noisy sample x_t -> x_t-1 + sample = self.scheduler.step(model_output, timestep=t, sample=sample, generator=generator).prev_sample + + # call the callback, if provided + if callback is not None and i % callback_steps == 0: + callback(i, t, sample) + + embedding_channels = self.vqvae.config.vq_embed_dim + embeddings_shape = (batch_size, self.transformer.height, self.transformer.width, embedding_channels) + embeddings = self.vqvae.quantize.get_codebook_entry(sample, shape=embeddings_shape) + image = self.vqvae.decode(embeddings, force_not_quantize=True).sample + + image = (image / 2 + 0.5).clamp(0, 1) + image = image.cpu().permute(0, 2, 3, 1).numpy() + + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image,) + + return ImagePipelineOutput(images=image) + + def truncate(self, log_p_x_0: torch.FloatTensor, truncation_rate: float) -> torch.FloatTensor: + """ + Truncates log_p_x_0 such that for each column vector, the total cumulative probability is `truncation_rate` The + lowest probabilities that would increase the cumulative probability above `truncation_rate` are set to zero. + """ + sorted_log_p_x_0, indices = torch.sort(log_p_x_0, 1, descending=True) + sorted_p_x_0 = torch.exp(sorted_log_p_x_0) + keep_mask = sorted_p_x_0.cumsum(dim=1) < truncation_rate + + # Ensure that at least the largest probability is not zeroed out + all_true = torch.full_like(keep_mask[:, 0:1, :], True) + keep_mask = torch.cat((all_true, keep_mask), dim=1) + keep_mask = keep_mask[:, :-1, :] + + keep_mask = keep_mask.gather(1, indices.argsort(1)) + + rv = log_p_x_0.clone() + + rv[~keep_mask] = -torch.inf # -inf = log(0) + + return rv diff --git a/src/diffusers/schedulers/__init__.py b/src/diffusers/schedulers/__init__.py index a1915ed8d2..1be541ba8b 100644 --- a/src/diffusers/schedulers/__init__.py +++ b/src/diffusers/schedulers/__init__.py @@ -28,6 +28,7 @@ if is_torch_available(): from .scheduling_sde_ve import ScoreSdeVeScheduler from .scheduling_sde_vp import ScoreSdeVpScheduler from .scheduling_utils import SchedulerMixin + from .scheduling_vq_diffusion import VQDiffusionScheduler else: from ..utils.dummy_pt_objects import * # noqa F403 diff --git a/src/diffusers/schedulers/scheduling_vq_diffusion.py b/src/diffusers/schedulers/scheduling_vq_diffusion.py new file mode 100644 index 0000000000..dbe320d998 --- /dev/null +++ b/src/diffusers/schedulers/scheduling_vq_diffusion.py @@ -0,0 +1,494 @@ +# Copyright 2022 Microsoft and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Optional, Tuple, Union + +import numpy as np +import torch +import torch.nn.functional as F + +from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import BaseOutput +from .scheduling_utils import SchedulerMixin + + +@dataclass +class VQDiffusionSchedulerOutput(BaseOutput): + """ + Output class for the scheduler's step function output. + + Args: + prev_sample (`torch.LongTensor` of shape `(batch size, num latent pixels)`): + Computed sample x_{t-1} of previous timestep. `prev_sample` should be used as next model input in the + denoising loop. + """ + + prev_sample: torch.LongTensor + + +def index_to_log_onehot(x: torch.LongTensor, num_classes: int) -> torch.FloatTensor: + """ + Convert batch of vector of class indices into batch of log onehot vectors + + Args: + x (`torch.LongTensor` of shape `(batch size, vector length)`): + Batch of class indices + + num_classes (`int`): + number of classes to be used for the onehot vectors + + Returns: + `torch.FloatTensor` of shape `(batch size, num classes, vector length)`: + Log onehot vectors + """ + x_onehot = F.one_hot(x, num_classes) + x_onehot = x_onehot.permute(0, 2, 1) + log_x = torch.log(x_onehot.float().clamp(min=1e-30)) + return log_x + + +def gumbel_noised(logits: torch.FloatTensor, generator: Optional[torch.Generator]) -> torch.FloatTensor: + """ + Apply gumbel noise to `logits` + """ + uniform = torch.rand(logits.shape, device=logits.device, generator=generator) + gumbel_noise = -torch.log(-torch.log(uniform + 1e-30) + 1e-30) + noised = gumbel_noise + logits + return noised + + +def alpha_schedules(num_diffusion_timesteps: int, alpha_cum_start=0.99999, alpha_cum_end=0.000009): + """ + Cumulative and non-cumulative alpha schedules. + + See section 4.1. + """ + att = ( + np.arange(0, num_diffusion_timesteps) / (num_diffusion_timesteps - 1) * (alpha_cum_end - alpha_cum_start) + + alpha_cum_start + ) + att = np.concatenate(([1], att)) + at = att[1:] / att[:-1] + att = np.concatenate((att[1:], [1])) + return at, att + + +def gamma_schedules(num_diffusion_timesteps: int, gamma_cum_start=0.000009, gamma_cum_end=0.99999): + """ + Cumulative and non-cumulative gamma schedules. + + See section 4.1. + """ + ctt = ( + np.arange(0, num_diffusion_timesteps) / (num_diffusion_timesteps - 1) * (gamma_cum_end - gamma_cum_start) + + gamma_cum_start + ) + ctt = np.concatenate(([0], ctt)) + one_minus_ctt = 1 - ctt + one_minus_ct = one_minus_ctt[1:] / one_minus_ctt[:-1] + ct = 1 - one_minus_ct + ctt = np.concatenate((ctt[1:], [0])) + return ct, ctt + + +class VQDiffusionScheduler(SchedulerMixin, ConfigMixin): + """ + The VQ-diffusion transformer outputs predicted probabilities of the initial unnoised image. + + The VQ-diffusion scheduler converts the transformer's output into a sample for the unnoised image at the previous + diffusion timestep. + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and + [`~ConfigMixin.from_config`] functions. + + For more details, see the original paper: https://arxiv.org/abs/2111.14822 + + Args: + num_vec_classes (`int`): + The number of classes of the vector embeddings of the latent pixels. Includes the class for the masked + latent pixel. + + num_train_timesteps (`int`): + Number of diffusion steps used to train the model. + + alpha_cum_start (`float`): + The starting cumulative alpha value. + + alpha_cum_end (`float`): + The ending cumulative alpha value. + + gamma_cum_start (`float`): + The starting cumulative gamma value. + + gamma_cum_end (`float`): + The ending cumulative gamma value. + """ + + @register_to_config + def __init__( + self, + num_vec_classes: int, + num_train_timesteps: int = 100, + alpha_cum_start: float = 0.99999, + alpha_cum_end: float = 0.000009, + gamma_cum_start: float = 0.000009, + gamma_cum_end: float = 0.99999, + ): + self.num_embed = num_vec_classes + + # By convention, the index for the mask class is the last class index + self.mask_class = self.num_embed - 1 + + at, att = alpha_schedules(num_train_timesteps, alpha_cum_start=alpha_cum_start, alpha_cum_end=alpha_cum_end) + ct, ctt = gamma_schedules(num_train_timesteps, gamma_cum_start=gamma_cum_start, gamma_cum_end=gamma_cum_end) + + num_non_mask_classes = self.num_embed - 1 + bt = (1 - at - ct) / num_non_mask_classes + btt = (1 - att - ctt) / num_non_mask_classes + + at = torch.tensor(at.astype("float64")) + bt = torch.tensor(bt.astype("float64")) + ct = torch.tensor(ct.astype("float64")) + log_at = torch.log(at) + log_bt = torch.log(bt) + log_ct = torch.log(ct) + + att = torch.tensor(att.astype("float64")) + btt = torch.tensor(btt.astype("float64")) + ctt = torch.tensor(ctt.astype("float64")) + log_cumprod_at = torch.log(att) + log_cumprod_bt = torch.log(btt) + log_cumprod_ct = torch.log(ctt) + + self.log_at = log_at.float() + self.log_bt = log_bt.float() + self.log_ct = log_ct.float() + self.log_cumprod_at = log_cumprod_at.float() + self.log_cumprod_bt = log_cumprod_bt.float() + self.log_cumprod_ct = log_cumprod_ct.float() + + # setable values + self.num_inference_steps = None + self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy()) + + def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None): + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + + device (`str` or `torch.device`): + device to place the timesteps and the diffusion process parameters (alpha, beta, gamma) on. + """ + self.num_inference_steps = num_inference_steps + timesteps = np.arange(0, self.num_inference_steps)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps).to(device) + + self.log_at = self.log_at.to(device) + self.log_bt = self.log_bt.to(device) + self.log_ct = self.log_ct.to(device) + self.log_cumprod_at = self.log_cumprod_at.to(device) + self.log_cumprod_bt = self.log_cumprod_bt.to(device) + self.log_cumprod_ct = self.log_cumprod_ct.to(device) + + def step( + self, + model_output: torch.FloatTensor, + timestep: torch.long, + sample: torch.LongTensor, + generator: Optional[torch.Generator] = None, + return_dict: bool = True, + ) -> Union[VQDiffusionSchedulerOutput, Tuple]: + """ + Predict the sample at the previous timestep via the reverse transition distribution i.e. Equation (11). See the + docstring for `self.q_posterior` for more in depth docs on how Equation (11) is computed. + + Args: + log_p_x_0: (`torch.FloatTensor` of shape `(batch size, num classes - 1, num latent pixels)`): + The log probabilities for the predicted classes of the initial latent pixels. Does not include a + prediction for the masked class as the initial unnoised image cannot be masked. + + t (`torch.long`): + The timestep that determines which transition matrices are used. + + x_t: (`torch.LongTensor` of shape `(batch size, num latent pixels)`): + The classes of each latent pixel at time `t` + + generator: (`torch.Generator` or None): + RNG for the noise applied to p(x_{t-1} | x_t) before it is sampled from. + + return_dict (`bool`): + option for returning tuple rather than VQDiffusionSchedulerOutput class + + Returns: + [`~schedulers.scheduling_utils.VQDiffusionSchedulerOutput`] or `tuple`: + [`~schedulers.scheduling_utils.VQDiffusionSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. + When returning a tuple, the first element is the sample tensor. + """ + if timestep == 0: + log_p_x_t_min_1 = model_output + else: + log_p_x_t_min_1 = self.q_posterior(model_output, sample, timestep) + + log_p_x_t_min_1 = gumbel_noised(log_p_x_t_min_1, generator) + + x_t_min_1 = log_p_x_t_min_1.argmax(dim=1) + + if not return_dict: + return (x_t_min_1,) + + return VQDiffusionSchedulerOutput(prev_sample=x_t_min_1) + + def q_posterior(self, log_p_x_0, x_t, t): + """ + Calculates the log probabilities for the predicted classes of the image at timestep `t-1`. I.e. Equation (11). + + Instead of directly computing equation (11), we use Equation (5) to restate Equation (11) in terms of only + forward probabilities. + + Equation (11) stated in terms of forward probabilities via Equation (5): + + Where: + - the sum is over x_0 = {C_0 ... C_{k-1}} (classes for x_0) + + p(x_{t-1} | x_t) = sum( q(x_t | x_{t-1}) * q(x_{t-1} | x_0) * p(x_0) / q(x_t | x_0) ) + + Args: + log_p_x_0: (`torch.FloatTensor` of shape `(batch size, num classes - 1, num latent pixels)`): + The log probabilities for the predicted classes of the initial latent pixels. Does not include a + prediction for the masked class as the initial unnoised image cannot be masked. + + x_t: (`torch.LongTensor` of shape `(batch size, num latent pixels)`): + The classes of each latent pixel at time `t` + + t (torch.Long): + The timestep that determines which transition matrix is used. + + Returns: + `torch.FloatTensor` of shape `(batch size, num classes, num latent pixels)`: + The log probabilities for the predicted classes of the image at timestep `t-1`. I.e. Equation (11). + """ + log_onehot_x_t = index_to_log_onehot(x_t, self.num_embed) + + log_q_x_t_given_x_0 = self.log_Q_t_transitioning_to_known_class( + t=t, x_t=x_t, log_onehot_x_t=log_onehot_x_t, cumulative=True + ) + + log_q_t_given_x_t_min_1 = self.log_Q_t_transitioning_to_known_class( + t=t, x_t=x_t, log_onehot_x_t=log_onehot_x_t, cumulative=False + ) + + # p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) ... p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) + # . . . + # . . . + # . . . + # p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) ... p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) + q = log_p_x_0 - log_q_x_t_given_x_0 + + # sum_0 = p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) + ... + p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}), ... , + # sum_n = p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) + ... + p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) + q_log_sum_exp = torch.logsumexp(q, dim=1, keepdim=True) + + # p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_0 ... p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_n + # . . . + # . . . + # . . . + # p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_0 ... p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_n + q = q - q_log_sum_exp + + # (p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_0) * a_cumulative_{t-1} + b_cumulative_{t-1} ... (p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_n) * a_cumulative_{t-1} + b_cumulative_{t-1} + # . . . + # . . . + # . . . + # (p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_0) * a_cumulative_{t-1} + b_cumulative_{t-1} ... (p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_n) * a_cumulative_{t-1} + b_cumulative_{t-1} + # c_cumulative_{t-1} ... c_cumulative_{t-1} + q = self.apply_cumulative_transitions(q, t - 1) + + # ((p_0(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_0) * a_cumulative_{t-1} + b_cumulative_{t-1}) * q(x_t | x_{t-1}=C_0) * sum_0 ... ((p_n(x_0=C_0 | x_t) / q(x_t | x_0=C_0) / sum_n) * a_cumulative_{t-1} + b_cumulative_{t-1}) * q(x_t | x_{t-1}=C_0) * sum_n + # . . . + # . . . + # . . . + # ((p_0(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_0) * a_cumulative_{t-1} + b_cumulative_{t-1}) * q(x_t | x_{t-1}=C_{k-1}) * sum_0 ... ((p_n(x_0=C_{k-1} | x_t) / q(x_t | x_0=C_{k-1}) / sum_n) * a_cumulative_{t-1} + b_cumulative_{t-1}) * q(x_t | x_{t-1}=C_{k-1}) * sum_n + # c_cumulative_{t-1} * q(x_t | x_{t-1}=C_k) * sum_0 ... c_cumulative_{t-1} * q(x_t | x_{t-1}=C_k) * sum_0 + log_p_x_t_min_1 = q + log_q_t_given_x_t_min_1 + q_log_sum_exp + + # For each column, there are two possible cases. + # + # Where: + # - sum(p_n(x_0))) is summing over all classes for x_0 + # - C_i is the class transitioning from (not to be confused with c_t and c_cumulative_t being used for gamma's) + # - C_j is the class transitioning to + # + # 1. x_t is masked i.e. x_t = c_k + # + # Simplifying the expression, the column vector is: + # . + # . + # . + # (c_t / c_cumulative_t) * (a_cumulative_{t-1} * p_n(x_0 = C_i | x_t) + b_cumulative_{t-1} * sum(p_n(x_0))) + # . + # . + # . + # (c_cumulative_{t-1} / c_cumulative_t) * sum(p_n(x_0)) + # + # From equation (11) stated in terms of forward probabilities, the last row is trivially verified. + # + # For the other rows, we can state the equation as ... + # + # (c_t / c_cumulative_t) * [b_cumulative_{t-1} * p(x_0=c_0) + ... + (a_cumulative_{t-1} + b_cumulative_{t-1}) * p(x_0=C_i) + ... + b_cumulative_{k-1} * p(x_0=c_{k-1})] + # + # This verifies the other rows. + # + # 2. x_t is not masked + # + # Simplifying the expression, there are two cases for the rows of the column vector, where C_j = C_i and where C_j != C_i: + # . + # . + # . + # C_j != C_i: b_t * ((b_cumulative_{t-1} / b_cumulative_t) * p_n(x_0 = c_0) + ... + ((a_cumulative_{t-1} + b_cumulative_{t-1}) / b_cumulative_t) * p_n(x_0 = C_i) + ... + (b_cumulative_{t-1} / (a_cumulative_t + b_cumulative_t)) * p_n(c_0=C_j) + ... + (b_cumulative_{t-1} / b_cumulative_t) * p_n(x_0 = c_{k-1})) + # . + # . + # . + # C_j = C_i: (a_t + b_t) * ((b_cumulative_{t-1} / b_cumulative_t) * p_n(x_0 = c_0) + ... + ((a_cumulative_{t-1} + b_cumulative_{t-1}) / (a_cumulative_t + b_cumulative_t)) * p_n(x_0 = C_i = C_j) + ... + (b_cumulative_{t-1} / b_cumulative_t) * p_n(x_0 = c_{k-1})) + # . + # . + # . + # 0 + # + # The last row is trivially verified. The other rows can be verified by directly expanding equation (11) stated in terms of forward probabilities. + return log_p_x_t_min_1 + + def log_Q_t_transitioning_to_known_class( + self, *, t: torch.int, x_t: torch.LongTensor, log_onehot_x_t: torch.FloatTensor, cumulative: bool + ): + """ + Returns the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each + latent pixel in `x_t`. + + See equation (7) for the complete non-cumulative transition matrix. The complete cumulative transition matrix + is the same structure except the parameters (alpha, beta, gamma) are the cumulative analogs. + + Args: + t (torch.Long): + The timestep that determines which transition matrix is used. + + x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`): + The classes of each latent pixel at time `t`. + + log_onehot_x_t (`torch.FloatTensor` of shape `(batch size, num classes, num latent pixels)`): + The log one-hot vectors of `x_t` + + cumulative (`bool`): + If cumulative is `False`, we use the single step transition matrix `t-1`->`t`. If cumulative is `True`, + we use the cumulative transition matrix `0`->`t`. + + Returns: + `torch.FloatTensor` of shape `(batch size, num classes - 1, num latent pixels)`: + Each _column_ of the returned matrix is a _row_ of log probabilities of the complete probability + transition matrix. + + When non cumulative, returns `self.num_classes - 1` rows because the initial latent pixel cannot be + masked. + + Where: + - `q_n` is the probability distribution for the forward process of the `n`th latent pixel. + - C_0 is a class of a latent pixel embedding + - C_k is the class of the masked latent pixel + + non-cumulative result (omitting logarithms): + ``` + q_0(x_t | x_{t-1} = C_0) ... q_n(x_t | x_{t-1} = C_0) + . . . + . . . + . . . + q_0(x_t | x_{t-1} = C_k) ... q_n(x_t | x_{t-1} = C_k) + ``` + + cumulative result (omitting logarithms): + ``` + q_0_cumulative(x_t | x_0 = C_0) ... q_n_cumulative(x_t | x_0 = C_0) + . . . + . . . + . . . + q_0_cumulative(x_t | x_0 = C_{k-1}) ... q_n_cumulative(x_t | x_0 = C_{k-1}) + ``` + """ + if cumulative: + a = self.log_cumprod_at[t] + b = self.log_cumprod_bt[t] + c = self.log_cumprod_ct[t] + else: + a = self.log_at[t] + b = self.log_bt[t] + c = self.log_ct[t] + + if not cumulative: + # The values in the onehot vector can also be used as the logprobs for transitioning + # from masked latent pixels. If we are not calculating the cumulative transitions, + # we need to save these vectors to be re-appended to the final matrix so the values + # aren't overwritten. + # + # `P(x_t!=mask|x_{t-1=mask}) = 0` and 0 will be the value of the last row of the onehot vector + # if x_t is not masked + # + # `P(x_t=mask|x_{t-1=mask}) = 1` and 1 will be the value of the last row of the onehot vector + # if x_t is masked + log_onehot_x_t_transitioning_from_masked = log_onehot_x_t[:, -1, :].unsqueeze(1) + + # `index_to_log_onehot` will add onehot vectors for masked pixels, + # so the default one hot matrix has one too many rows. See the doc string + # for an explanation of the dimensionality of the returned matrix. + log_onehot_x_t = log_onehot_x_t[:, :-1, :] + + # this is a cheeky trick to produce the transition probabilities using log one-hot vectors. + # + # Don't worry about what values this sets in the columns that mark transitions + # to masked latent pixels. They are overwrote later with the `mask_class_mask`. + # + # Looking at the below logspace formula in non-logspace, each value will evaluate to either + # `1 * a + b = a + b` where `log_Q_t` has the one hot value in the column + # or + # `0 * a + b = b` where `log_Q_t` has the 0 values in the column. + # + # See equation 7 for more details. + log_Q_t = (log_onehot_x_t + a).logaddexp(b) + + # The whole column of each masked pixel is `c` + mask_class_mask = x_t == self.mask_class + mask_class_mask = mask_class_mask.unsqueeze(1).expand(-1, self.num_embed - 1, -1) + log_Q_t[mask_class_mask] = c + + if not cumulative: + log_Q_t = torch.cat((log_Q_t, log_onehot_x_t_transitioning_from_masked), dim=1) + + return log_Q_t + + def apply_cumulative_transitions(self, q, t): + bsz = q.shape[0] + a = self.log_cumprod_at[t] + b = self.log_cumprod_bt[t] + c = self.log_cumprod_ct[t] + + num_latent_pixels = q.shape[2] + c = c.expand(bsz, 1, num_latent_pixels) + + q = (q + a).logaddexp(b) + q = torch.cat((q, c), dim=1) + + return q diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index 63aa20962f..833f2b6c50 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -34,6 +34,21 @@ class AutoencoderKL(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class Transformer2DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class UNet1DModel(metaclass=DummyObject): _backends = ["torch"] @@ -257,6 +272,21 @@ class ScoreSdeVePipeline(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class VQDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class DDIMScheduler(metaclass=DummyObject): _backends = ["torch"] @@ -407,6 +437,21 @@ class ScoreSdeVeScheduler(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class VQDiffusionScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class EMAModel(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/pipelines/vq_diffusion/__init__.py b/tests/pipelines/vq_diffusion/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/pipelines/vq_diffusion/test_vq_diffusion.py b/tests/pipelines/vq_diffusion/test_vq_diffusion.py new file mode 100644 index 0000000000..5eb32d40d4 --- /dev/null +++ b/tests/pipelines/vq_diffusion/test_vq_diffusion.py @@ -0,0 +1,175 @@ +# coding=utf-8 +# Copyright 2022 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import unittest + +import numpy as np +import torch + +from diffusers import Transformer2DModel, VQDiffusionPipeline, VQDiffusionScheduler, VQModel +from diffusers.utils import load_image, slow, torch_device +from diffusers.utils.testing_utils import require_torch_gpu +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from ...test_pipelines_common import PipelineTesterMixin + + +torch.backends.cuda.matmul.allow_tf32 = False + + +class VQDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + torch.cuda.empty_cache() + + @property + def num_embed(self): + return 12 + + @property + def num_embeds_ada_norm(self): + return 12 + + @property + def dummy_vqvae(self): + torch.manual_seed(0) + model = VQModel( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=3, + num_vq_embeddings=self.num_embed, + vq_embed_dim=3, + ) + return model + + @property + def dummy_tokenizer(self): + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + return tokenizer + + @property + def dummy_text_encoder(self): + torch.manual_seed(0) + config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + return CLIPTextModel(config) + + @property + def dummy_transformer(self): + torch.manual_seed(0) + + height = 12 + width = 12 + + model_kwargs = { + "attention_bias": True, + "cross_attention_dim": 32, + "attention_head_dim": height * width, + "num_attention_heads": 1, + "num_vector_embeds": self.num_embed, + "num_embeds_ada_norm": self.num_embeds_ada_norm, + "norm_num_groups": 32, + "sample_size": width, + "activation_fn": "geglu-approximate", + } + + model = Transformer2DModel(**model_kwargs) + return model + + def test_vq_diffusion(self): + device = "cpu" + + vqvae = self.dummy_vqvae + text_encoder = self.dummy_text_encoder + tokenizer = self.dummy_tokenizer + transformer = self.dummy_transformer + scheduler = VQDiffusionScheduler(self.num_embed) + + pipe = VQDiffusionPipeline( + vqvae=vqvae, text_encoder=text_encoder, tokenizer=tokenizer, transformer=transformer, scheduler=scheduler + ) + pipe = pipe.to(device) + pipe.set_progress_bar_config(disable=None) + + prompt = "teddy bear playing in the pool" + + generator = torch.Generator(device=device).manual_seed(0) + output = pipe([prompt], generator=generator, num_inference_steps=2, output_type="np") + image = output.images + + generator = torch.Generator(device=device).manual_seed(0) + image_from_tuple = pipe( + [prompt], generator=generator, output_type="np", return_dict=False, num_inference_steps=2 + )[0] + + image_slice = image[0, -3:, -3:, -1] + image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] + + assert image.shape == (1, 24, 24, 3) + + expected_slice = np.array([0.6583, 0.6410, 0.5325, 0.5635, 0.5563, 0.4234, 0.6008, 0.5491, 0.4880]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + + +@slow +@require_torch_gpu +class VQDiffusionPipelineIntegrationTests(unittest.TestCase): + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + torch.cuda.empty_cache() + + def test_vq_diffusion(self): + expected_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/vq_diffusion/teddy_bear_pool.png" + ) + expected_image = np.array(expected_image, dtype=np.float32) / 255.0 + + pipeline = VQDiffusionPipeline.from_pretrained("microsoft/vq-diffusion-ithq") + pipeline = pipeline.to(torch_device) + pipeline.set_progress_bar_config(disable=None) + + generator = torch.Generator(device=torch_device).manual_seed(0) + output = pipeline( + "teddy bear playing in the pool", + truncation_rate=0.86, + num_images_per_prompt=1, + generator=generator, + output_type="np", + ) + + image = output.images[0] + + assert image.shape == (256, 256, 3) + assert np.abs(expected_image - image).max() < 1e-2 diff --git a/tests/test_layers_utils.py b/tests/test_layers_utils.py index cf531fbf3f..911ec548b3 100755 --- a/tests/test_layers_utils.py +++ b/tests/test_layers_utils.py @@ -18,8 +18,9 @@ import unittest import numpy as np import torch +from torch import nn -from diffusers.models.attention import AttentionBlock, SpatialTransformer +from diffusers.models.attention import GEGLU, AdaLayerNorm, ApproximateGELU, AttentionBlock, Transformer2DModel from diffusers.models.embeddings import get_timestep_embedding from diffusers.models.resnet import Downsample2D, Upsample2D from diffusers.utils import torch_device @@ -235,7 +236,7 @@ class AttentionBlockTests(unittest.TestCase): num_head_channels=1, rescale_output_factor=1.0, eps=1e-6, - num_groups=32, + norm_num_groups=32, ).to(torch_device) with torch.no_grad(): attention_scores = attentionBlock(sample) @@ -259,7 +260,7 @@ class AttentionBlockTests(unittest.TestCase): channels=512, rescale_output_factor=1.0, eps=1e-6, - num_groups=32, + norm_num_groups=32, ).to(torch_device) with torch.no_grad(): attention_scores = attentionBlock(sample) @@ -273,22 +274,22 @@ class AttentionBlockTests(unittest.TestCase): assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) -class SpatialTransformerTests(unittest.TestCase): +class Transformer2DModelTests(unittest.TestCase): def test_spatial_transformer_default(self): torch.manual_seed(0) if torch.cuda.is_available(): torch.cuda.manual_seed_all(0) sample = torch.randn(1, 32, 64, 64).to(torch_device) - spatial_transformer_block = SpatialTransformer( + spatial_transformer_block = Transformer2DModel( in_channels=32, - n_heads=1, - d_head=32, + num_attention_heads=1, + attention_head_dim=32, dropout=0.0, - context_dim=None, + cross_attention_dim=None, ).to(torch_device) with torch.no_grad(): - attention_scores = spatial_transformer_block(sample) + attention_scores = spatial_transformer_block(sample).sample assert attention_scores.shape == (1, 32, 64, 64) output_slice = attention_scores[0, -1, -3:, -3:] @@ -298,22 +299,22 @@ class SpatialTransformerTests(unittest.TestCase): ) assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) - def test_spatial_transformer_context_dim(self): + def test_spatial_transformer_cross_attention_dim(self): torch.manual_seed(0) if torch.cuda.is_available(): torch.cuda.manual_seed_all(0) sample = torch.randn(1, 64, 64, 64).to(torch_device) - spatial_transformer_block = SpatialTransformer( + spatial_transformer_block = Transformer2DModel( in_channels=64, - n_heads=2, - d_head=32, + num_attention_heads=2, + attention_head_dim=32, dropout=0.0, - context_dim=64, + cross_attention_dim=64, ).to(torch_device) with torch.no_grad(): context = torch.randn(1, 4, 64).to(torch_device) - attention_scores = spatial_transformer_block(sample, context) + attention_scores = spatial_transformer_block(sample, context).sample assert attention_scores.shape == (1, 64, 64, 64) output_slice = attention_scores[0, -1, -3:, -3:] @@ -323,6 +324,44 @@ class SpatialTransformerTests(unittest.TestCase): ) assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) + def test_spatial_transformer_timestep(self): + torch.manual_seed(0) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(0) + + num_embeds_ada_norm = 5 + + sample = torch.randn(1, 64, 64, 64).to(torch_device) + spatial_transformer_block = Transformer2DModel( + in_channels=64, + num_attention_heads=2, + attention_head_dim=32, + dropout=0.0, + cross_attention_dim=64, + num_embeds_ada_norm=num_embeds_ada_norm, + ).to(torch_device) + with torch.no_grad(): + timestep_1 = torch.tensor(1, dtype=torch.long).to(torch_device) + timestep_2 = torch.tensor(2, dtype=torch.long).to(torch_device) + attention_scores_1 = spatial_transformer_block(sample, timestep=timestep_1).sample + attention_scores_2 = spatial_transformer_block(sample, timestep=timestep_2).sample + + assert attention_scores_1.shape == (1, 64, 64, 64) + assert attention_scores_2.shape == (1, 64, 64, 64) + + output_slice_1 = attention_scores_1[0, -1, -3:, -3:] + output_slice_2 = attention_scores_2[0, -1, -3:, -3:] + + expected_slice_1 = torch.tensor( + [-0.1874, -0.9704, -1.4290, -1.3357, 1.5138, 0.3036, -0.0976, -1.1667, 0.1283], device=torch_device + ) + expected_slice_2 = torch.tensor( + [-0.3493, -1.0924, -1.6161, -1.5016, 1.4245, 0.1367, -0.2526, -1.3109, -0.0547], device=torch_device + ) + + assert torch.allclose(output_slice_1.flatten(), expected_slice_1, atol=1e-3) + assert torch.allclose(output_slice_2.flatten(), expected_slice_2, atol=1e-3) + def test_spatial_transformer_dropout(self): torch.manual_seed(0) if torch.cuda.is_available(): @@ -330,18 +369,18 @@ class SpatialTransformerTests(unittest.TestCase): sample = torch.randn(1, 32, 64, 64).to(torch_device) spatial_transformer_block = ( - SpatialTransformer( + Transformer2DModel( in_channels=32, - n_heads=2, - d_head=16, + num_attention_heads=2, + attention_head_dim=16, dropout=0.3, - context_dim=None, + cross_attention_dim=None, ) .to(torch_device) .eval() ) with torch.no_grad(): - attention_scores = spatial_transformer_block(sample) + attention_scores = spatial_transformer_block(sample).sample assert attention_scores.shape == (1, 32, 64, 64) output_slice = attention_scores[0, -1, -3:, -3:] @@ -350,3 +389,107 @@ class SpatialTransformerTests(unittest.TestCase): [-1.2448, -0.0190, -0.9471, -1.5140, 0.7069, -1.0144, -2.1077, 0.9099, -1.0091], device=torch_device ) assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) + + @unittest.skipIf(torch_device == "mps", "MPS does not support float64") + def test_spatial_transformer_discrete(self): + torch.manual_seed(0) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(0) + + num_embed = 5 + + sample = torch.randint(0, num_embed, (1, 32)).to(torch_device) + spatial_transformer_block = ( + Transformer2DModel( + num_attention_heads=1, + attention_head_dim=32, + num_vector_embeds=num_embed, + sample_size=16, + ) + .to(torch_device) + .eval() + ) + + with torch.no_grad(): + attention_scores = spatial_transformer_block(sample).sample + + assert attention_scores.shape == (1, num_embed - 1, 32) + + output_slice = attention_scores[0, -2:, -3:] + + expected_slice = torch.tensor([-0.8957, -1.8370, -1.3390, -0.9152, -0.5187, -1.1702], device=torch_device) + assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3) + + def test_spatial_transformer_default_norm_layers(self): + spatial_transformer_block = Transformer2DModel(num_attention_heads=1, attention_head_dim=32, in_channels=32) + + assert spatial_transformer_block.transformer_blocks[0].norm1.__class__ == nn.LayerNorm + assert spatial_transformer_block.transformer_blocks[0].norm2.__class__ == nn.LayerNorm + assert spatial_transformer_block.transformer_blocks[0].norm3.__class__ == nn.LayerNorm + + def test_spatial_transformer_ada_norm_layers(self): + spatial_transformer_block = Transformer2DModel( + num_attention_heads=1, + attention_head_dim=32, + in_channels=32, + num_embeds_ada_norm=5, + ) + + assert spatial_transformer_block.transformer_blocks[0].norm1.__class__ == AdaLayerNorm + assert spatial_transformer_block.transformer_blocks[0].norm2.__class__ == AdaLayerNorm + assert spatial_transformer_block.transformer_blocks[0].norm3.__class__ == nn.LayerNorm + + def test_spatial_transformer_default_ff_layers(self): + spatial_transformer_block = Transformer2DModel( + num_attention_heads=1, + attention_head_dim=32, + in_channels=32, + ) + + assert spatial_transformer_block.transformer_blocks[0].ff.net[0].__class__ == GEGLU + assert spatial_transformer_block.transformer_blocks[0].ff.net[1].__class__ == nn.Dropout + assert spatial_transformer_block.transformer_blocks[0].ff.net[2].__class__ == nn.Linear + + dim = 32 + inner_dim = 128 + + # First dimension change + assert spatial_transformer_block.transformer_blocks[0].ff.net[0].proj.in_features == dim + # NOTE: inner_dim * 2 because GEGLU + assert spatial_transformer_block.transformer_blocks[0].ff.net[0].proj.out_features == inner_dim * 2 + + # Second dimension change + assert spatial_transformer_block.transformer_blocks[0].ff.net[2].in_features == inner_dim + assert spatial_transformer_block.transformer_blocks[0].ff.net[2].out_features == dim + + def test_spatial_transformer_geglu_approx_ff_layers(self): + spatial_transformer_block = Transformer2DModel( + num_attention_heads=1, + attention_head_dim=32, + in_channels=32, + activation_fn="geglu-approximate", + ) + + assert spatial_transformer_block.transformer_blocks[0].ff.net[0].__class__ == ApproximateGELU + assert spatial_transformer_block.transformer_blocks[0].ff.net[1].__class__ == nn.Dropout + assert spatial_transformer_block.transformer_blocks[0].ff.net[2].__class__ == nn.Linear + + dim = 32 + inner_dim = 128 + + # First dimension change + assert spatial_transformer_block.transformer_blocks[0].ff.net[0].proj.in_features == dim + assert spatial_transformer_block.transformer_blocks[0].ff.net[0].proj.out_features == inner_dim + + # Second dimension change + assert spatial_transformer_block.transformer_blocks[0].ff.net[2].in_features == inner_dim + assert spatial_transformer_block.transformer_blocks[0].ff.net[2].out_features == dim + + def test_spatial_transformer_attention_bias(self): + spatial_transformer_block = Transformer2DModel( + num_attention_heads=1, attention_head_dim=32, in_channels=32, attention_bias=True + ) + + assert spatial_transformer_block.transformer_blocks[0].attn1.to_q.bias is not None + assert spatial_transformer_block.transformer_blocks[0].attn1.to_k.bias is not None + assert spatial_transformer_block.transformer_blocks[0].attn1.to_v.bias is not None diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 9285eed20f..29186aaac9 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -19,6 +19,7 @@ from typing import Dict, List, Tuple import numpy as np import torch +import torch.nn.functional as F from diffusers import ( DDIMScheduler, @@ -29,6 +30,7 @@ from diffusers import ( LMSDiscreteScheduler, PNDMScheduler, ScoreSdeVeScheduler, + VQDiffusionScheduler, ) from diffusers.utils import torch_device @@ -85,12 +87,18 @@ class SchedulerCommonTest(unittest.TestCase): if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): time_step = float(time_step) - sample = self.dummy_sample - residual = 0.1 * sample - scheduler_config = self.get_scheduler_config(**config) scheduler = scheduler_class(**scheduler_config) + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, time_step) + else: + sample = self.dummy_sample + residual = 0.1 * sample + with tempfile.TemporaryDirectory() as tmpdirname: scheduler.save_config(tmpdirname) new_scheduler = scheduler_class.from_config(tmpdirname) @@ -122,12 +130,18 @@ class SchedulerCommonTest(unittest.TestCase): if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): time_step = float(time_step) - sample = self.dummy_sample - residual = 0.1 * sample - scheduler_config = self.get_scheduler_config() scheduler = scheduler_class(**scheduler_config) + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, time_step) + else: + sample = self.dummy_sample + residual = 0.1 * sample + with tempfile.TemporaryDirectory() as tmpdirname: scheduler.save_config(tmpdirname) new_scheduler = scheduler_class.from_config(tmpdirname) @@ -154,15 +168,21 @@ class SchedulerCommonTest(unittest.TestCase): num_inference_steps = kwargs.pop("num_inference_steps", None) for scheduler_class in self.scheduler_classes: - sample = self.dummy_sample - residual = 0.1 * sample + timestep = 1 + if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): + timestep = float(timestep) scheduler_config = self.get_scheduler_config() scheduler = scheduler_class(**scheduler_config) - timestep = 1 - if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): - timestep = float(timestep) + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, timestep) + else: + sample = self.dummy_sample + residual = 0.1 * sample with tempfile.TemporaryDirectory() as tmpdirname: scheduler.save_config(tmpdirname) @@ -200,8 +220,14 @@ class SchedulerCommonTest(unittest.TestCase): scheduler_config = self.get_scheduler_config() scheduler = scheduler_class(**scheduler_config) - sample = self.dummy_sample - residual = 0.1 * sample + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, timestep_0) + else: + sample = self.dummy_sample + residual = 0.1 * sample if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): scheduler.set_timesteps(num_inference_steps) @@ -255,8 +281,14 @@ class SchedulerCommonTest(unittest.TestCase): scheduler_config = self.get_scheduler_config() scheduler = scheduler_class(**scheduler_config) - sample = self.dummy_sample - residual = 0.1 * sample + if scheduler_class == VQDiffusionScheduler: + num_vec_classes = scheduler_config["num_vec_classes"] + sample = self.dummy_sample(num_vec_classes) + model = self.dummy_model(num_vec_classes) + residual = model(sample, timestep) + else: + sample = self.dummy_sample + residual = 0.1 * sample if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): scheduler.set_timesteps(num_inference_steps) @@ -284,22 +316,26 @@ class SchedulerCommonTest(unittest.TestCase): for scheduler_class in self.scheduler_classes: scheduler_config = self.get_scheduler_config() scheduler = scheduler_class(**scheduler_config) - self.assertTrue( - hasattr(scheduler, "init_noise_sigma"), - f"{scheduler_class} does not implement a required attribute `init_noise_sigma`", - ) - self.assertTrue( - hasattr(scheduler, "scale_model_input"), - f"{scheduler_class} does not implement a required class method `scale_model_input(sample, timestep)`", - ) + + if scheduler_class != VQDiffusionScheduler: + self.assertTrue( + hasattr(scheduler, "init_noise_sigma"), + f"{scheduler_class} does not implement a required attribute `init_noise_sigma`", + ) + self.assertTrue( + hasattr(scheduler, "scale_model_input"), + f"{scheduler_class} does not implement a required class method `scale_model_input(sample," + " timestep)`", + ) self.assertTrue( hasattr(scheduler, "step"), f"{scheduler_class} does not implement a required class method `step(...)`", ) - sample = self.dummy_sample - scaled_sample = scheduler.scale_model_input(sample, 0.0) - self.assertEqual(sample.shape, scaled_sample.shape) + if scheduler_class != VQDiffusionScheduler: + sample = self.dummy_sample + scaled_sample = scheduler.scale_model_input(sample, 0.0) + self.assertEqual(sample.shape, scaled_sample.shape) def test_add_noise_device(self): for scheduler_class in self.scheduler_classes: @@ -1238,3 +1274,53 @@ class IPNDMSchedulerTest(SchedulerCommonTest): result_mean = torch.mean(torch.abs(sample)) assert abs(result_mean.item() - 2540529) < 10 + + +class VQDiffusionSchedulerTest(SchedulerCommonTest): + scheduler_classes = (VQDiffusionScheduler,) + + def get_scheduler_config(self, **kwargs): + config = { + "num_vec_classes": 4097, + "num_train_timesteps": 100, + } + + config.update(**kwargs) + return config + + def dummy_sample(self, num_vec_classes): + batch_size = 4 + height = 8 + width = 8 + + sample = torch.randint(0, num_vec_classes, (batch_size, height * width)) + + return sample + + @property + def dummy_sample_deter(self): + assert False + + def dummy_model(self, num_vec_classes): + def model(sample, t, *args): + batch_size, num_latent_pixels = sample.shape + logits = torch.rand((batch_size, num_vec_classes - 1, num_latent_pixels)) + return_value = F.log_softmax(logits.double(), dim=1).float() + return return_value + + return model + + def test_timesteps(self): + for timesteps in [2, 5, 100, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_num_vec_classes(self): + for num_vec_classes in [5, 100, 1000, 4000]: + self.check_over_configs(num_vec_classes=num_vec_classes) + + def test_time_indices(self): + for t in [0, 50, 99]: + self.check_over_forward(time_step=t) + + def test_add_noise_device(self): + pass From 7482178162b779506a54538f2cf2565c8b88c597 Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Thu, 3 Nov 2022 17:25:57 +0100 Subject: [PATCH 21/88] =?UTF-8?q?default=20fast=20model=20loading=20?= =?UTF-8?q?=F0=9F=94=A5=20(#1115)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * make accelerate hard dep * default fast init * move params to cpu when device map is None * handle device_map=None * handle torch < 1.9 * remove device_map="auto" * style * add accelerate in torch extra * remove accelerate from extras["test"] * raise an error if torch is available but not accelerate * update installation docs * Apply suggestions from code review Co-authored-by: Patrick von Platen * improve defautl loading speed even further, allow disabling fats loading * address review comments * adapt the tests * fix test_stable_diffusion_fast_load * fix test_read_init * temp fix for dummy checks * Trigger Build * Apply suggestions from code review Co-authored-by: Anton Lozhkov Co-authored-by: Patrick von Platen Co-authored-by: Anton Lozhkov --- README.md | 16 +- docs/source/installation.mdx | 40 +- setup.py | 3 +- src/diffusers/__init__.py | 8 + src/diffusers/modeling_utils.py | 47 ++- src/diffusers/pipeline_utils.py | 10 + .../dummy_torch_and_accelerate_objects.py | 392 ++++++++++++++++++ tests/models/test_models_unet_1d.py | 2 +- tests/models/test_models_unet_2d.py | 28 +- tests/models/test_models_vae.py | 5 +- .../dance_diffusion/test_dance_diffusion.py | 6 +- tests/pipelines/ddim/test_ddim.py | 4 +- tests/pipelines/ddpm/test_ddpm.py | 2 +- tests/pipelines/karras_ve/test_karras_ve.py | 2 +- .../latent_diffusion/test_latent_diffusion.py | 4 +- tests/pipelines/pndm/test_pndm.py | 2 +- .../score_sde_ve/test_score_sde_ve.py | 2 +- .../stable_diffusion/test_stable_diffusion.py | 38 +- .../test_stable_diffusion_img2img.py | 6 +- .../test_stable_diffusion_inpaint.py | 11 +- .../test_stable_diffusion_inpaint_legacy.py | 9 +- tests/repo_utils/test_check_dummies.py | 4 +- tests/test_pipelines.py | 32 +- 23 files changed, 564 insertions(+), 109 deletions(-) create mode 100644 src/diffusers/utils/dummy_torch_and_accelerate_objects.py diff --git a/README.md b/README.md index 03365a2a50..9c44cff2c8 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,12 @@ More precisely, 🤗 Diffusers offers: ## Installation +### For PyTorch + **With `pip`** ```bash -pip install --upgrade diffusers +pip install --upgrade diffusers[torch] ``` **With `conda`** @@ -39,6 +41,14 @@ pip install --upgrade diffusers conda install -c conda-forge diffusers ``` +### For Flax + +**With `pip`** + +```bash +pip install --upgrade diffusers[flax] +``` + **Apple Silicon (M1/M2) support** Please, refer to [the documentation](https://huggingface.co/docs/diffusers/optimization/mps). @@ -354,7 +364,7 @@ There are many ways to try running Diffusers! Here we outline code-focused tools If you want to run the code yourself 💻, you can try out: - [Text-to-Image Latent Diffusion](https://huggingface.co/CompVis/ldm-text2im-large-256) ```python -# !pip install diffusers transformers +# !pip install diffusers["torch"] transformers from diffusers import DiffusionPipeline device = "cuda" @@ -373,7 +383,7 @@ image.save("squirrel.png") ``` - [Unconditional Diffusion with discrete scheduler](https://huggingface.co/google/ddpm-celebahq-256) ```python -# !pip install diffusers +# !pip install diffusers["torch"] from diffusers import DDPMPipeline, DDIMPipeline, PNDMPipeline model_id = "google/ddpm-celebahq-256" diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 1c9460eca8..9c93b35956 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -12,9 +12,12 @@ specific language governing permissions and limitations under the License. # Installation -Install Diffusers for with PyTorch. Support for other libraries will come in the future +Install 🤗 Diffusers for whichever deep learning library you’re working with. -🤗 Diffusers is tested on Python 3.7+, and PyTorch 1.7.0+. +🤗 Diffusers is tested on Python 3.7+, PyTorch 1.7.0+ and flax. Follow the installation instructions below for the deep learning library you are using: + +- [PyTorch](https://pytorch.org/get-started/locally/) installation instructions. +- [Flax](https://flax.readthedocs.io/en/latest/) installation instructions. ## Install with pip @@ -36,12 +39,30 @@ source .env/bin/activate Now you're ready to install 🤗 Diffusers with the following command: +**For PyTorch** + ```bash -pip install diffusers +pip install diffusers["torch"] +``` + +**For Flax** + +```bash +pip install diffusers["flax"] ``` ## Install from source +Before intsalling `diffusers` from source, make sure you have `torch` and `accelerate` installed. + +For `torch` installation refer to the `torch` [docs](https://pytorch.org/get-started/locally/#start-locally). + +To install `accelerate` + +```bash +pip install accelerate +``` + Install 🤗 Diffusers from source with the following command: ```bash @@ -67,7 +88,18 @@ Clone the repository and install 🤗 Diffusers with the following commands: ```bash git clone https://github.com/huggingface/diffusers.git cd diffusers -pip install -e . +``` + +**For PyTorch** + +``` +pip install -e ".[torch]" +``` + +**For Flax** + +``` +pip install -e ".[flax]" ``` These commands will link the folder you cloned the repository to and your Python library paths. diff --git a/setup.py b/setup.py index 8904242a31..becd4cbb87 100644 --- a/setup.py +++ b/setup.py @@ -178,7 +178,6 @@ extras["quality"] = deps_list("black", "isort", "flake8", "hf-doc-builder") extras["docs"] = deps_list("hf-doc-builder") extras["training"] = deps_list("accelerate", "datasets", "tensorboard", "modelcards") extras["test"] = deps_list( - "accelerate", "datasets", "parameterized", "pytest", @@ -188,7 +187,7 @@ extras["test"] = deps_list( "torchvision", "transformers" ) -extras["torch"] = deps_list("torch") +extras["torch"] = deps_list("torch", "accelerate") if os.name == "nt": # windows extras["flax"] = [] # jax is not supported on windows diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 00052109e3..61ac2425db 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -1,4 +1,5 @@ from .utils import ( + is_accelerate_available, is_flax_available, is_inflect_available, is_onnx_available, @@ -16,6 +17,13 @@ from .onnx_utils import OnnxRuntimeModel from .utils import logging +# This will create an extra dummy file "dummy_torch_and_accelerate_objects.py" +# TODO: (patil-suraj, anton-l) maybe import everything under is_torch_and_accelerate_available +if is_torch_available() and not is_accelerate_available(): + error_msg = "Please install the `accelerate` library to use Diffusers with PyTorch. You can do so by running `pip install diffusers[torch]`. Or if torch is already installed, you can run `pip install accelerate`." # noqa: E501 + raise ImportError(error_msg) + + if is_torch_available(): from .modeling_utils import ModelMixin from .models import AutoencoderKL, Transformer2DModel, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index c62caf8028..f469763671 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -21,7 +21,9 @@ from typing import Callable, List, Optional, Tuple, Union import torch from torch import Tensor, device -from diffusers.utils import is_accelerate_available +import accelerate +from accelerate.utils import set_module_tensor_to_device +from accelerate.utils.versions import is_torch_version from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError from requests import HTTPError @@ -268,6 +270,19 @@ class ModelMixin(torch.nn.Module): Mirror source to accelerate downloads in China. If you are from China and have an accessibility problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety. Please refer to the mirror site for more information. + device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be refined to each + parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the + same device. + + To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + fast_load (`bool`, *optional*, defaults to `True`): + Speed up model loading by not initializing the weights and only loading the pre-trained weights. This + also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the + model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, + this argument will be ignored and the model will be loaded normally. @@ -296,6 +311,16 @@ class ModelMixin(torch.nn.Module): torch_dtype = kwargs.pop("torch_dtype", None) subfolder = kwargs.pop("subfolder", None) device_map = kwargs.pop("device_map", None) + fast_load = kwargs.pop("fast_load", True) + + # Check if we can handle device_map and dispatching the weights + if device_map is not None and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError("Loading and dispatching requires torch >= 1.9.0") + + # Fast init is only possible if torch version is >= 1.9.0 + _INIT_EMPTY_WEIGHTS = fast_load or device_map is not None + if _INIT_EMPTY_WEIGHTS and not is_torch_version(">=", "1.9.0"): + logger.warn("Loading with `fast_load` requires torch >= 1.9.0. Falling back to normal loading.") user_agent = { "diffusers": __version__, @@ -378,12 +403,8 @@ class ModelMixin(torch.nn.Module): # restore default dtype - if device_map == "auto": - if is_accelerate_available(): - import accelerate - else: - raise ImportError("Please install accelerate via `pip install accelerate`") - + if _INIT_EMPTY_WEIGHTS: + # Instantiate model with empty weights with accelerate.init_empty_weights(): model, unused_kwargs = cls.from_config( config_path, @@ -400,7 +421,17 @@ class ModelMixin(torch.nn.Module): **kwargs, ) - accelerate.load_checkpoint_and_dispatch(model, model_file, device_map) + # if device_map is Non,e load the state dict on move the params from meta device to the cpu + if device_map is None: + param_device = "cpu" + state_dict = load_state_dict(model_file) + # move the parms from meta device to cpu + for param_name, param in state_dict.items(): + set_module_tensor_to_device(model, param_name, param_device, value=param) + else: # else let accelerate handle loading and dispatching. + # Load weights and dispatch according to the device_map + # by deafult the device_map is None and the weights are loaded on the CPU + accelerate.load_checkpoint_and_dispatch(model, model_file, device_map) loading_info = { "missing_keys": [], diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 4ba8d2d930..5c248ec1a9 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -380,6 +380,7 @@ class DiffusionPipeline(ConfigMixin): provider = kwargs.pop("provider", None) sess_options = kwargs.pop("sess_options", None) device_map = kwargs.pop("device_map", None) + fast_load = kwargs.pop("fast_load", True) # 1. Download the checkpoints and configs # use snapshot download here to get it working from from_pretrained @@ -572,6 +573,15 @@ class DiffusionPipeline(ConfigMixin): and version.parse(version.parse(transformers.__version__).base_version) >= version.parse("4.20.0") ) + if is_diffusers_model: + loading_kwargs["fast_load"] = fast_load + + # When loading a transformers model, if the device_map is None, the weights will be initialized as opposed to diffusers. + # To make default loading faster we set the `low_cpu_mem_usage=fast_load` flag which is `True` by default. + # This makes sure that the weights won't be initialized which significantly speeds up loading. + if is_transformers_model and device_map is None: + loading_kwargs["low_cpu_mem_usage"] = fast_load + if is_diffusers_model or is_transformers_model: loading_kwargs["device_map"] = device_map diff --git a/src/diffusers/utils/dummy_torch_and_accelerate_objects.py b/src/diffusers/utils/dummy_torch_and_accelerate_objects.py new file mode 100644 index 0000000000..a1650f62db --- /dev/null +++ b/src/diffusers/utils/dummy_torch_and_accelerate_objects.py @@ -0,0 +1,392 @@ +# This file is autogenerated by the command `make fix-copies`, do not edit. +# flake8: noqa + +from ..utils import DummyObject, requires_backends + + +class ModelMixin(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class AutoencoderKL(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class UNet1DModel(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class UNet2DConditionModel(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class UNet2DModel(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class VQModel(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +def get_constant_schedule(*args, **kwargs): + requires_backends(get_constant_schedule, ["torch", "accelerate"]) + + +def get_constant_schedule_with_warmup(*args, **kwargs): + requires_backends(get_constant_schedule_with_warmup, ["torch", "accelerate"]) + + +def get_cosine_schedule_with_warmup(*args, **kwargs): + requires_backends(get_cosine_schedule_with_warmup, ["torch", "accelerate"]) + + +def get_cosine_with_hard_restarts_schedule_with_warmup(*args, **kwargs): + requires_backends(get_cosine_with_hard_restarts_schedule_with_warmup, ["torch", "accelerate"]) + + +def get_linear_schedule_with_warmup(*args, **kwargs): + requires_backends(get_linear_schedule_with_warmup, ["torch", "accelerate"]) + + +def get_polynomial_decay_schedule_with_warmup(*args, **kwargs): + requires_backends(get_polynomial_decay_schedule_with_warmup, ["torch", "accelerate"]) + + +def get_scheduler(*args, **kwargs): + requires_backends(get_scheduler, ["torch", "accelerate"]) + + +class DiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class DanceDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class DDIMPipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class DDPMPipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class KarrasVePipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class LDMPipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class PNDMPipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class ScoreSdeVePipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class DDIMScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class DDPMScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class EulerAncestralDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class EulerDiscreteScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class IPNDMScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class KarrasVeScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class PNDMScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class SchedulerMixin(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class ScoreSdeVeScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + +class EMAModel(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) diff --git a/tests/models/test_models_unet_1d.py b/tests/models/test_models_unet_1d.py index 286c7525e2..c274ce4192 100644 --- a/tests/models/test_models_unet_1d.py +++ b/tests/models/test_models_unet_1d.py @@ -28,7 +28,7 @@ class UnetModel1DTests(unittest.TestCase): @slow def test_unet_1d_maestro(self): model_id = "harmonai/maestro-150k" - model = UNet1DModel.from_pretrained(model_id, subfolder="unet", device_map="auto") + model = UNet1DModel.from_pretrained(model_id, subfolder="unet") model.to(torch_device) sample_size = 65536 diff --git a/tests/models/test_models_unet_2d.py b/tests/models/test_models_unet_2d.py index 20371708a4..feee724577 100644 --- a/tests/models/test_models_unet_2d.py +++ b/tests/models/test_models_unet_2d.py @@ -125,9 +125,7 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU") def test_from_pretrained_accelerate(self): - model, _ = UNet2DModel.from_pretrained( - "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto" - ) + model, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) model.to(torch_device) image = model(**self.dummy_input).sample @@ -135,9 +133,8 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU") def test_from_pretrained_accelerate_wont_change_results(self): - model_accelerate, _ = UNet2DModel.from_pretrained( - "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto" - ) + # by defautl model loading will use accelerate as `fast_load=True` + model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) model_accelerate.to(torch_device) model_accelerate.eval() @@ -159,7 +156,7 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): gc.collect() model_normal_load, _ = UNet2DModel.from_pretrained( - "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto" + "fusing/unet-ldm-dummy-update", output_loading_info=True, fast_init=False ) model_normal_load.to(torch_device) model_normal_load.eval() @@ -173,9 +170,8 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): gc.collect() tracemalloc.start() - model_accelerate, _ = UNet2DModel.from_pretrained( - "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto" - ) + # by defautl model loading will use accelerate as `fast_load=True` + model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) model_accelerate.to(torch_device) model_accelerate.eval() _, peak_accelerate = tracemalloc.get_traced_memory() @@ -184,7 +180,9 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): torch.cuda.empty_cache() gc.collect() - model_normal_load, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) + model_normal_load, _ = UNet2DModel.from_pretrained( + "fusing/unet-ldm-dummy-update", output_loading_info=True, fast_init=False + ) model_normal_load.to(torch_device) model_normal_load.eval() _, peak_normal = tracemalloc.get_traced_memory() @@ -348,9 +346,7 @@ class NCSNppModelTests(ModelTesterMixin, unittest.TestCase): @slow def test_from_pretrained_hub(self): - model, loading_info = UNet2DModel.from_pretrained( - "google/ncsnpp-celebahq-256", output_loading_info=True, device_map="auto" - ) + model, loading_info = UNet2DModel.from_pretrained("google/ncsnpp-celebahq-256", output_loading_info=True) self.assertIsNotNone(model) self.assertEqual(len(loading_info["missing_keys"]), 0) @@ -364,7 +360,7 @@ class NCSNppModelTests(ModelTesterMixin, unittest.TestCase): @slow def test_output_pretrained_ve_mid(self): - model = UNet2DModel.from_pretrained("google/ncsnpp-celebahq-256", device_map="auto") + model = UNet2DModel.from_pretrained("google/ncsnpp-celebahq-256") model.to(torch_device) torch.manual_seed(0) @@ -439,7 +435,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): torch_dtype = torch.float16 if fp16 else torch.float32 model = UNet2DConditionModel.from_pretrained( - model_id, subfolder="unet", torch_dtype=torch_dtype, revision=revision, device_map="auto" + model_id, subfolder="unet", torch_dtype=torch_dtype, revision=revision ) model.to(torch_device).eval() diff --git a/tests/models/test_models_vae.py b/tests/models/test_models_vae.py index 3da7b50e34..1693657561 100644 --- a/tests/models/test_models_vae.py +++ b/tests/models/test_models_vae.py @@ -155,7 +155,10 @@ class AutoencoderKLIntegrationTests(unittest.TestCase): torch_dtype = torch.float16 if fp16 else torch.float32 model = AutoencoderKL.from_pretrained( - model_id, subfolder="vae", torch_dtype=torch_dtype, revision=revision, device_map="auto" + model_id, + subfolder="vae", + torch_dtype=torch_dtype, + revision=revision, ) model.to(torch_device).eval() diff --git a/tests/pipelines/dance_diffusion/test_dance_diffusion.py b/tests/pipelines/dance_diffusion/test_dance_diffusion.py index 737d1c57d1..72e67e4479 100644 --- a/tests/pipelines/dance_diffusion/test_dance_diffusion.py +++ b/tests/pipelines/dance_diffusion/test_dance_diffusion.py @@ -86,7 +86,7 @@ class PipelineIntegrationTests(unittest.TestCase): def test_dance_diffusion(self): device = torch_device - pipe = DanceDiffusionPipeline.from_pretrained("harmonai/maestro-150k", device_map="auto") + pipe = DanceDiffusionPipeline.from_pretrained("harmonai/maestro-150k") pipe = pipe.to(device) pipe.set_progress_bar_config(disable=None) @@ -103,9 +103,7 @@ class PipelineIntegrationTests(unittest.TestCase): def test_dance_diffusion_fp16(self): device = torch_device - pipe = DanceDiffusionPipeline.from_pretrained( - "harmonai/maestro-150k", torch_dtype=torch.float16, device_map="auto" - ) + pipe = DanceDiffusionPipeline.from_pretrained("harmonai/maestro-150k", torch_dtype=torch.float16) pipe = pipe.to(device) pipe.set_progress_bar_config(disable=None) diff --git a/tests/pipelines/ddim/test_ddim.py b/tests/pipelines/ddim/test_ddim.py index 41280a9bb4..4445fe7fee 100644 --- a/tests/pipelines/ddim/test_ddim.py +++ b/tests/pipelines/ddim/test_ddim.py @@ -78,7 +78,7 @@ class DDIMPipelineIntegrationTests(unittest.TestCase): def test_inference_ema_bedroom(self): model_id = "google/ddpm-ema-bedroom-256" - unet = UNet2DModel.from_pretrained(model_id, device_map="auto") + unet = UNet2DModel.from_pretrained(model_id) scheduler = DDIMScheduler.from_config(model_id) ddpm = DDIMPipeline(unet=unet, scheduler=scheduler) @@ -97,7 +97,7 @@ class DDIMPipelineIntegrationTests(unittest.TestCase): def test_inference_cifar10(self): model_id = "google/ddpm-cifar10-32" - unet = UNet2DModel.from_pretrained(model_id, device_map="auto") + unet = UNet2DModel.from_pretrained(model_id) scheduler = DDIMScheduler() ddim = DDIMPipeline(unet=unet, scheduler=scheduler) diff --git a/tests/pipelines/ddpm/test_ddpm.py b/tests/pipelines/ddpm/test_ddpm.py index c6b24309ce..c58e2db38f 100644 --- a/tests/pipelines/ddpm/test_ddpm.py +++ b/tests/pipelines/ddpm/test_ddpm.py @@ -38,7 +38,7 @@ class DDPMPipelineIntegrationTests(unittest.TestCase): def test_inference_cifar10(self): model_id = "google/ddpm-cifar10-32" - unet = UNet2DModel.from_pretrained(model_id, device_map="auto") + unet = UNet2DModel.from_pretrained(model_id) scheduler = DDPMScheduler.from_config(model_id) ddpm = DDPMPipeline(unet=unet, scheduler=scheduler) diff --git a/tests/pipelines/karras_ve/test_karras_ve.py b/tests/pipelines/karras_ve/test_karras_ve.py index caca35f693..1fafa1cb40 100644 --- a/tests/pipelines/karras_ve/test_karras_ve.py +++ b/tests/pipelines/karras_ve/test_karras_ve.py @@ -70,7 +70,7 @@ class KarrasVePipelineFastTests(PipelineTesterMixin, unittest.TestCase): class KarrasVePipelineIntegrationTests(unittest.TestCase): def test_inference(self): model_id = "google/ncsnpp-celebahq-256" - model = UNet2DModel.from_pretrained(model_id, device_map="auto") + model = UNet2DModel.from_pretrained(model_id) scheduler = KarrasVeScheduler() pipe = KarrasVePipeline(unet=model, scheduler=scheduler) diff --git a/tests/pipelines/latent_diffusion/test_latent_diffusion.py b/tests/pipelines/latent_diffusion/test_latent_diffusion.py index beb30a24f6..085cdb4e76 100644 --- a/tests/pipelines/latent_diffusion/test_latent_diffusion.py +++ b/tests/pipelines/latent_diffusion/test_latent_diffusion.py @@ -121,7 +121,7 @@ class LDMTextToImagePipelineFastTests(PipelineTesterMixin, unittest.TestCase): @require_torch class LDMTextToImagePipelineIntegrationTests(unittest.TestCase): def test_inference_text2img(self): - ldm = LDMTextToImagePipeline.from_pretrained("CompVis/ldm-text2im-large-256", device_map="auto") + ldm = LDMTextToImagePipeline.from_pretrained("CompVis/ldm-text2im-large-256") ldm.to(torch_device) ldm.set_progress_bar_config(disable=None) @@ -138,7 +138,7 @@ class LDMTextToImagePipelineIntegrationTests(unittest.TestCase): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 def test_inference_text2img_fast(self): - ldm = LDMTextToImagePipeline.from_pretrained("CompVis/ldm-text2im-large-256", device_map="auto") + ldm = LDMTextToImagePipeline.from_pretrained("CompVis/ldm-text2im-large-256") ldm.to(torch_device) ldm.set_progress_bar_config(disable=None) diff --git a/tests/pipelines/pndm/test_pndm.py b/tests/pipelines/pndm/test_pndm.py index 02f5b1fe4f..5d9212223e 100644 --- a/tests/pipelines/pndm/test_pndm.py +++ b/tests/pipelines/pndm/test_pndm.py @@ -71,7 +71,7 @@ class PNDMPipelineIntegrationTests(unittest.TestCase): def test_inference_cifar10(self): model_id = "google/ddpm-cifar10-32" - unet = UNet2DModel.from_pretrained(model_id, device_map="auto") + unet = UNet2DModel.from_pretrained(model_id) scheduler = PNDMScheduler() pndm = PNDMPipeline(unet=unet, scheduler=scheduler) diff --git a/tests/pipelines/score_sde_ve/test_score_sde_ve.py b/tests/pipelines/score_sde_ve/test_score_sde_ve.py index e2f7bc22ec..55dcc1cea1 100644 --- a/tests/pipelines/score_sde_ve/test_score_sde_ve.py +++ b/tests/pipelines/score_sde_ve/test_score_sde_ve.py @@ -72,7 +72,7 @@ class ScoreSdeVeipelineFastTests(PipelineTesterMixin, unittest.TestCase): class ScoreSdeVePipelineIntegrationTests(unittest.TestCase): def test_inference(self): model_id = "google/ncsnpp-church-256" - model = UNet2DModel.from_pretrained(model_id, device_map="auto") + model = UNet2DModel.from_pretrained(model_id) scheduler = ScoreSdeVeScheduler.from_config(model_id) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index ded2470cc2..0f77987355 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -631,7 +631,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_stable_diffusion(self): # make sure here that pndm scheduler skips prk - sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-1", device_map="auto") + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-1") sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) @@ -653,9 +653,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_stable_diffusion_fast_ddim(self): scheduler = DDIMScheduler.from_config("CompVis/stable-diffusion-v1-1", subfolder="scheduler") - sd_pipe = StableDiffusionPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-1", scheduler=scheduler, device_map="auto" - ) + sd_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-1", scheduler=scheduler) sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) @@ -674,7 +672,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_lms_stable_diffusion_pipeline(self): model_id = "CompVis/stable-diffusion-v1-1" - pipe = StableDiffusionPipeline.from_pretrained(model_id, device_map="auto").to(torch_device) + pipe = StableDiffusionPipeline.from_pretrained(model_id).to(torch_device) pipe.set_progress_bar_config(disable=None) scheduler = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler") pipe.scheduler = scheduler @@ -693,9 +691,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_stable_diffusion_memory_chunking(self): torch.cuda.reset_peak_memory_stats() model_id = "CompVis/stable-diffusion-v1-4" - pipe = StableDiffusionPipeline.from_pretrained( - model_id, revision="fp16", torch_dtype=torch.float16, device_map="auto" - ) + pipe = StableDiffusionPipeline.from_pretrained(model_id, revision="fp16", torch_dtype=torch.float16) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -732,9 +728,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_stable_diffusion_text2img_pipeline_fp16(self): torch.cuda.reset_peak_memory_stats() model_id = "CompVis/stable-diffusion-v1-4" - pipe = StableDiffusionPipeline.from_pretrained( - model_id, revision="fp16", device_map="auto", torch_dtype=torch.float16 - ) + pipe = StableDiffusionPipeline.from_pretrained(model_id, revision="fp16", torch_dtype=torch.float16) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -767,11 +761,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "CompVis/stable-diffusion-v1-4" - pipe = StableDiffusionPipeline.from_pretrained( - model_id, - safety_checker=None, - device_map="auto", - ) + pipe = StableDiffusionPipeline.from_pretrained(model_id, safety_checker=None) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() @@ -812,7 +802,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): test_callback_fn.has_been_called = False pipe = StableDiffusionPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, device_map="auto" + "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16 ) pipe = pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -833,23 +823,23 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): assert test_callback_fn.has_been_called assert number_of_steps == 51 - def test_stable_diffusion_accelerate_auto_device(self): + def test_stable_diffusion_fast_load(self): pipeline_id = "CompVis/stable-diffusion-v1-4" start_time = time.time() - pipeline_normal_load = StableDiffusionPipeline.from_pretrained( + pipeline_fast_load = StableDiffusionPipeline.from_pretrained( pipeline_id, revision="fp16", torch_dtype=torch.float16 ) - pipeline_normal_load.to(torch_device) - normal_load_time = time.time() - start_time + pipeline_fast_load.to(torch_device) + fast_load_time = time.time() - start_time start_time = time.time() _ = StableDiffusionPipeline.from_pretrained( - pipeline_id, revision="fp16", torch_dtype=torch.float16, use_auth_token=True, device_map="auto" + pipeline_id, revision="fp16", torch_dtype=torch.float16, use_auth_token=True, fast_load=False ) - meta_device_load_time = time.time() - start_time + normal_load_time = time.time() - start_time - assert 2 * meta_device_load_time < normal_load_time + assert 2 * fast_load_time < normal_load_time @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_stable_diffusion_pipeline_with_unet_on_gpu_only(self): diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 78d001e3c7..f5a92fac87 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -488,7 +488,6 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): pipe = StableDiffusionImg2ImgPipeline.from_pretrained( model_id, safety_checker=None, - device_map="auto", ) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -529,7 +528,6 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): model_id, scheduler=lms, safety_checker=None, - device_map="auto", ) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -581,7 +579,9 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): init_image = init_image.resize((768, 512)) pipe = StableDiffusionImg2ImgPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, device_map="auto" + "CompVis/stable-diffusion-v1-4", + revision="fp16", + torch_dtype=torch.float16, ) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index f5a8b3cf9e..44a7a3249a 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -284,11 +284,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): ) model_id = "runwayml/stable-diffusion-inpainting" - pipe = StableDiffusionInpaintPipeline.from_pretrained( - model_id, - safety_checker=None, - device_map="auto", - ) + pipe = StableDiffusionInpaintPipeline.from_pretrained(model_id, safety_checker=None) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() @@ -328,7 +324,6 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): revision="fp16", torch_dtype=torch.float16, safety_checker=None, - device_map="auto", ) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -365,9 +360,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): model_id = "runwayml/stable-diffusion-inpainting" pndm = PNDMScheduler.from_config(model_id, subfolder="scheduler") - pipe = StableDiffusionInpaintPipeline.from_pretrained( - model_id, safety_checker=None, scheduler=pndm, device_map="auto" - ) + pipe = StableDiffusionInpaintPipeline.from_pretrained(model_id, safety_checker=None, scheduler=pndm) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py index 81deba67f2..c5b2572fb7 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py @@ -364,11 +364,7 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase): ) model_id = "CompVis/stable-diffusion-v1-4" - pipe = StableDiffusionInpaintPipeline.from_pretrained( - model_id, - safety_checker=None, - device_map="auto", - ) + pipe = StableDiffusionInpaintPipeline.from_pretrained(model_id, safety_checker=None) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() @@ -411,7 +407,6 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase): model_id, scheduler=lms, safety_checker=None, - device_map="auto", ) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -468,7 +463,7 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase): ) pipe = StableDiffusionInpaintPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, device_map="auto" + "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16 ) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) diff --git a/tests/repo_utils/test_check_dummies.py b/tests/repo_utils/test_check_dummies.py index d8fa9ce105..0331b5e8c2 100644 --- a/tests/repo_utils/test_check_dummies.py +++ b/tests/repo_utils/test_check_dummies.py @@ -52,13 +52,13 @@ class CheckDummiesTester(unittest.TestCase): def test_read_init(self): objects = read_init() # We don't assert on the exact list of keys to allow for smooth grow of backend-specific objects - self.assertIn("torch", objects) + self.assertIn("torch_and_accelerate", objects) self.assertIn("torch_and_transformers", objects) self.assertIn("flax_and_transformers", objects) self.assertIn("torch_and_transformers_and_onnx", objects) # Likewise, we can't assert on the exact content of a key - self.assertIn("UNet2DModel", objects["torch"]) + self.assertIn("UNet2DModel", objects["torch_and_accelerate"]) self.assertIn("FlaxUNet2DConditionModel", objects["flax"]) self.assertIn("StableDiffusionPipeline", objects["torch_and_transformers"]) self.assertIn("FlaxStableDiffusionPipeline", objects["flax_and_transformers"]) diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 1654518f1e..b8316075fa 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -128,7 +128,7 @@ class CustomPipelineTests(unittest.TestCase): def test_load_pipeline_from_git(self): clip_model_id = "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" - feature_extractor = CLIPFeatureExtractor.from_pretrained(clip_model_id, device_map="auto") + feature_extractor = CLIPFeatureExtractor.from_pretrained(clip_model_id) clip_model = CLIPModel.from_pretrained(clip_model_id, torch_dtype=torch.float16) pipeline = DiffusionPipeline.from_pretrained( @@ -138,7 +138,6 @@ class CustomPipelineTests(unittest.TestCase): feature_extractor=feature_extractor, torch_dtype=torch.float16, revision="fp16", - device_map="auto", ) pipeline.enable_attention_slicing() pipeline = pipeline.to(torch_device) @@ -333,9 +332,7 @@ class PipelineSlowTests(unittest.TestCase): def test_smart_download(self): model_id = "hf-internal-testing/unet-pipeline-dummy" with tempfile.TemporaryDirectory() as tmpdirname: - _ = DiffusionPipeline.from_pretrained( - model_id, cache_dir=tmpdirname, force_download=True, device_map="auto" - ) + _ = DiffusionPipeline.from_pretrained(model_id, cache_dir=tmpdirname, force_download=True) local_repo_name = "--".join(["models"] + model_id.split("/")) snapshot_dir = os.path.join(tmpdirname, local_repo_name, "snapshots") snapshot_dir = os.path.join(snapshot_dir, os.listdir(snapshot_dir)[0]) @@ -359,7 +356,10 @@ class PipelineSlowTests(unittest.TestCase): with tempfile.TemporaryDirectory() as tmpdirname: with CaptureLogger(logger) as cap_logger: DiffusionPipeline.from_pretrained( - model_id, not_used=True, cache_dir=tmpdirname, force_download=True, device_map="auto" + model_id, + not_used=True, + cache_dir=tmpdirname, + force_download=True, ) assert cap_logger.out == "Keyword arguments {'not_used': True} not recognized.\n" @@ -383,7 +383,7 @@ class PipelineSlowTests(unittest.TestCase): with tempfile.TemporaryDirectory() as tmpdirname: ddpm.save_pretrained(tmpdirname) - new_ddpm = DDPMPipeline.from_pretrained(tmpdirname, device_map="auto") + new_ddpm = DDPMPipeline.from_pretrained(tmpdirname) new_ddpm.to(torch_device) generator = torch.manual_seed(0) @@ -399,11 +399,11 @@ class PipelineSlowTests(unittest.TestCase): scheduler = DDPMScheduler(num_train_timesteps=10) - ddpm = DDPMPipeline.from_pretrained(model_path, scheduler=scheduler, device_map="auto") + ddpm = DDPMPipeline.from_pretrained(model_path, scheduler=scheduler) ddpm = ddpm.to(torch_device) ddpm.set_progress_bar_config(disable=None) - ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path, scheduler=scheduler, device_map="auto") + ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path, scheduler=scheduler) ddpm_from_hub = ddpm_from_hub.to(torch_device) ddpm_from_hub.set_progress_bar_config(disable=None) @@ -421,14 +421,12 @@ class PipelineSlowTests(unittest.TestCase): scheduler = DDPMScheduler(num_train_timesteps=10) # pass unet into DiffusionPipeline - unet = UNet2DModel.from_pretrained(model_path, device_map="auto") - ddpm_from_hub_custom_model = DiffusionPipeline.from_pretrained( - model_path, unet=unet, scheduler=scheduler, device_map="auto" - ) + unet = UNet2DModel.from_pretrained(model_path) + ddpm_from_hub_custom_model = DiffusionPipeline.from_pretrained(model_path, unet=unet, scheduler=scheduler) ddpm_from_hub_custom_model = ddpm_from_hub_custom_model.to(torch_device) ddpm_from_hub_custom_model.set_progress_bar_config(disable=None) - ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path, scheduler=scheduler, device_map="auto") + ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path, scheduler=scheduler) ddpm_from_hub = ddpm_from_hub.to(torch_device) ddpm_from_hub_custom_model.set_progress_bar_config(disable=None) @@ -443,7 +441,7 @@ class PipelineSlowTests(unittest.TestCase): def test_output_format(self): model_path = "google/ddpm-cifar10-32" - pipe = DDIMPipeline.from_pretrained(model_path, device_map="auto") + pipe = DDIMPipeline.from_pretrained(model_path) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -467,7 +465,7 @@ class PipelineSlowTests(unittest.TestCase): def test_ddpm_ddim_equality(self, seed): model_id = "google/ddpm-cifar10-32" - unet = UNet2DModel.from_pretrained(model_id, device_map="auto") + unet = UNet2DModel.from_pretrained(model_id) ddpm_scheduler = DDPMScheduler() ddim_scheduler = DDIMScheduler() @@ -498,7 +496,7 @@ class PipelineSlowTests(unittest.TestCase): def test_ddpm_ddim_equality_batched(self, seed): model_id = "google/ddpm-cifar10-32" - unet = UNet2DModel.from_pretrained(model_id, device_map="auto") + unet = UNet2DModel.from_pretrained(model_id) ddpm_scheduler = DDPMScheduler() ddim_scheduler = DDIMScheduler() From 988c82227db1a41846a9aae5c83750dcfc334f66 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 3 Nov 2022 17:32:39 +0100 Subject: [PATCH 22/88] fix copies --- .../dummy_torch_and_accelerate_objects.py | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/diffusers/utils/dummy_torch_and_accelerate_objects.py b/src/diffusers/utils/dummy_torch_and_accelerate_objects.py index a1650f62db..c2c423eeab 100644 --- a/src/diffusers/utils/dummy_torch_and_accelerate_objects.py +++ b/src/diffusers/utils/dummy_torch_and_accelerate_objects.py @@ -34,6 +34,21 @@ class AutoencoderKL(metaclass=DummyObject): requires_backends(cls, ["torch", "accelerate"]) +class Transformer2DModel(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + class UNet1DModel(metaclass=DummyObject): _backends = ["torch", "accelerate"] @@ -227,6 +242,21 @@ class PNDMPipeline(metaclass=DummyObject): requires_backends(cls, ["torch", "accelerate"]) +class RePaintPipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + class ScoreSdeVePipeline(metaclass=DummyObject): _backends = ["torch", "accelerate"] @@ -242,6 +272,21 @@ class ScoreSdeVePipeline(metaclass=DummyObject): requires_backends(cls, ["torch", "accelerate"]) +class VQDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + class DDIMScheduler(metaclass=DummyObject): _backends = ["torch", "accelerate"] @@ -347,6 +392,21 @@ class PNDMScheduler(metaclass=DummyObject): requires_backends(cls, ["torch", "accelerate"]) +class RePaintScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + class SchedulerMixin(metaclass=DummyObject): _backends = ["torch", "accelerate"] @@ -377,6 +437,21 @@ class ScoreSdeVeScheduler(metaclass=DummyObject): requires_backends(cls, ["torch", "accelerate"]) +class VQDiffusionScheduler(metaclass=DummyObject): + _backends = ["torch", "accelerate"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "accelerate"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "accelerate"]) + + class EMAModel(metaclass=DummyObject): _backends = ["torch", "accelerate"] From 42bb459457d77d6185f74cbc32f2a08b08876af5 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 3 Nov 2022 18:11:18 +0100 Subject: [PATCH 23/88] [Low cpu memory] Correct naming and improve default usage (#1122) * correct naming * finish * Apply suggestions from code review * Apply suggestions from code review Co-authored-by: Suraj Patil Co-authored-by: Suraj Patil --- src/diffusers/modeling_utils.py | 34 ++++++++++---- src/diffusers/pipeline_utils.py | 44 +++++++++++++++---- tests/models/test_models_unet_2d.py | 8 ++-- .../stable_diffusion/test_stable_diffusion.py | 12 ++--- 4 files changed, 71 insertions(+), 27 deletions(-) diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index f469763671..9e05672bf1 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -35,6 +35,12 @@ from .utils import CONFIG_NAME, DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT logger = logging.get_logger(__name__) +if is_torch_version(">=", "1.9.0"): + _LOW_CPU_MEM_USAGE_DEFAULT = True +else: + _LOW_CPU_MEM_USAGE_DEFAULT = False + + def get_parameter_device(parameter: torch.nn.Module): try: return next(parameter.parameters()).device @@ -278,11 +284,11 @@ class ModelMixin(torch.nn.Module): To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For more information about each option see [designing a device map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). - fast_load (`bool`, *optional*, defaults to `True`): + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): Speed up model loading by not initializing the weights and only loading the pre-trained weights. This also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, - this argument will be ignored and the model will be loaded normally. + setting this argument to `True` will raise an error. @@ -311,16 +317,26 @@ class ModelMixin(torch.nn.Module): torch_dtype = kwargs.pop("torch_dtype", None) subfolder = kwargs.pop("subfolder", None) device_map = kwargs.pop("device_map", None) - fast_load = kwargs.pop("fast_load", True) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) # Check if we can handle device_map and dispatching the weights if device_map is not None and not is_torch_version(">=", "1.9.0"): - raise NotImplementedError("Loading and dispatching requires torch >= 1.9.0") + raise NotImplementedError( + "Loading and dispatching requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `device_map=None`." + ) - # Fast init is only possible if torch version is >= 1.9.0 - _INIT_EMPTY_WEIGHTS = fast_load or device_map is not None - if _INIT_EMPTY_WEIGHTS and not is_torch_version(">=", "1.9.0"): - logger.warn("Loading with `fast_load` requires torch >= 1.9.0. Falling back to normal loading.") + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + if low_cpu_mem_usage is False and device_map is not None: + raise ValueError( + f"You cannot set `low_cpu_mem_usage` to `False` while using device_map={device_map} for loading and" + " dispatching. Please make sure to set `low_cpu_mem_usage=True`." + ) user_agent = { "diffusers": __version__, @@ -403,7 +419,7 @@ class ModelMixin(torch.nn.Module): # restore default dtype - if _INIT_EMPTY_WEIGHTS: + if low_cpu_mem_usage: # Instantiate model with empty weights with accelerate.init_empty_weights(): model, unused_kwargs = cls.from_config( diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 5c248ec1a9..36c2d5b888 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -25,6 +25,7 @@ import torch import diffusers import PIL +from accelerate.utils.versions import is_torch_version from huggingface_hub import snapshot_download from packaging import version from PIL import Image @@ -33,6 +34,7 @@ from tqdm.auto import tqdm from .configuration_utils import ConfigMixin from .dynamic_modules_utils import get_class_from_dynamic_module from .hub_utils import http_user_agent +from .modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from .utils import ( CONFIG_NAME, @@ -328,6 +330,19 @@ class DiffusionPipeline(ConfigMixin): Mirror source to accelerate downloads in China. If you are from China and have an accessibility problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety. Please refer to the mirror site for more information. specify the folder name here. + device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*): + A map that specifies where each submodule should go. It doesn't need to be refined to each + parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the + same device. + + To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For + more information about each option see [designing a device + map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map). + low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`): + Speed up model loading by not initializing the weights and only loading the pre-trained weights. This + also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the + model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch, + setting this argument to `True` will raise an error. kwargs (remaining dictionary of keyword arguments, *optional*): Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the @@ -380,7 +395,25 @@ class DiffusionPipeline(ConfigMixin): provider = kwargs.pop("provider", None) sess_options = kwargs.pop("sess_options", None) device_map = kwargs.pop("device_map", None) - fast_load = kwargs.pop("fast_load", True) + low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + + if device_map is not None and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Loading and dispatching requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `device_map=None`." + ) + + if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"): + raise NotImplementedError( + "Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set" + " `low_cpu_mem_usage=False`." + ) + + if low_cpu_mem_usage is False and device_map is not None: + raise ValueError( + f"You cannot set `low_cpu_mem_usage` to False while using device_map={device_map} for loading and" + " dispatching. Please make sure to set `low_cpu_mem_usage=True`." + ) # 1. Download the checkpoints and configs # use snapshot download here to get it working from from_pretrained @@ -573,17 +606,12 @@ class DiffusionPipeline(ConfigMixin): and version.parse(version.parse(transformers.__version__).base_version) >= version.parse("4.20.0") ) - if is_diffusers_model: - loading_kwargs["fast_load"] = fast_load - # When loading a transformers model, if the device_map is None, the weights will be initialized as opposed to diffusers. - # To make default loading faster we set the `low_cpu_mem_usage=fast_load` flag which is `True` by default. + # To make default loading faster we set the `low_cpu_mem_usage=low_cpu_mem_usage` flag which is `True` by default. # This makes sure that the weights won't be initialized which significantly speeds up loading. - if is_transformers_model and device_map is None: - loading_kwargs["low_cpu_mem_usage"] = fast_load - if is_diffusers_model or is_transformers_model: loading_kwargs["device_map"] = device_map + loading_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage # check if the module is in a subdirectory if os.path.isdir(os.path.join(cached_folder, name)): diff --git a/tests/models/test_models_unet_2d.py b/tests/models/test_models_unet_2d.py index feee724577..71ddf1a134 100644 --- a/tests/models/test_models_unet_2d.py +++ b/tests/models/test_models_unet_2d.py @@ -133,7 +133,7 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU") def test_from_pretrained_accelerate_wont_change_results(self): - # by defautl model loading will use accelerate as `fast_load=True` + # by defautl model loading will use accelerate as `low_cpu_mem_usage=True` model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) model_accelerate.to(torch_device) model_accelerate.eval() @@ -156,7 +156,7 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): gc.collect() model_normal_load, _ = UNet2DModel.from_pretrained( - "fusing/unet-ldm-dummy-update", output_loading_info=True, fast_init=False + "fusing/unet-ldm-dummy-update", output_loading_info=True, low_cpu_mem_usage=False ) model_normal_load.to(torch_device) model_normal_load.eval() @@ -170,7 +170,7 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): gc.collect() tracemalloc.start() - # by defautl model loading will use accelerate as `fast_load=True` + # by defautl model loading will use accelerate as `low_cpu_mem_usage=True` model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True) model_accelerate.to(torch_device) model_accelerate.eval() @@ -181,7 +181,7 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): gc.collect() model_normal_load, _ = UNet2DModel.from_pretrained( - "fusing/unet-ldm-dummy-update", output_loading_info=True, fast_init=False + "fusing/unet-ldm-dummy-update", output_loading_info=True, low_cpu_mem_usage=False ) model_normal_load.to(torch_device) model_normal_load.eval() diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 0f77987355..b01094a607 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -823,23 +823,23 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): assert test_callback_fn.has_been_called assert number_of_steps == 51 - def test_stable_diffusion_fast_load(self): + def test_stable_diffusion_low_cpu_mem_usage(self): pipeline_id = "CompVis/stable-diffusion-v1-4" start_time = time.time() - pipeline_fast_load = StableDiffusionPipeline.from_pretrained( + pipeline_low_cpu_mem_usage = StableDiffusionPipeline.from_pretrained( pipeline_id, revision="fp16", torch_dtype=torch.float16 ) - pipeline_fast_load.to(torch_device) - fast_load_time = time.time() - start_time + pipeline_low_cpu_mem_usage.to(torch_device) + low_cpu_mem_usage_time = time.time() - start_time start_time = time.time() _ = StableDiffusionPipeline.from_pretrained( - pipeline_id, revision="fp16", torch_dtype=torch.float16, use_auth_token=True, fast_load=False + pipeline_id, revision="fp16", torch_dtype=torch.float16, use_auth_token=True, low_cpu_mem_usage=False ) normal_load_time = time.time() - start_time - assert 2 * fast_load_time < normal_load_time + assert 2 * low_cpu_mem_usage_time < normal_load_time @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_stable_diffusion_pipeline_with_unet_on_gpu_only(self): From 7b030a7d68888ab08cbec3d7a9c371f1568c219b Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Thu, 3 Nov 2022 18:13:18 +0100 Subject: [PATCH 24/88] handle device for randn in euler step (#1124) * handle device for randn in euler step * convert device to str --- .../schedulers/scheduling_euler_ancestral_discrete.py | 11 ++++++++++- src/diffusers/schedulers/scheduling_euler_discrete.py | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index 3fe52a4980..134b45a73b 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -217,7 +217,16 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): prev_sample = sample + derivative * dt device = model_output.device if torch.is_tensor(model_output) else "cpu" - noise = torch.randn(model_output.shape, dtype=model_output.dtype, generator=generator).to(device) + if str(device) == "mps": + # randn does not work reproducibly on mps + noise = torch.randn(model_output.shape, dtype=model_output.dtype, device="cpu", generator=generator).to( + device + ) + else: + noise = torch.randn(model_output.shape, dtype=model_output.dtype, device=device, generator=generator).to( + device + ) + prev_sample = prev_sample + noise * sigma_up if not return_dict: diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 93aeb8cc38..6425072ac3 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -214,7 +214,16 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin): gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 device = model_output.device if torch.is_tensor(model_output) else "cpu" - noise = torch.randn(model_output.shape, dtype=model_output.dtype, generator=generator).to(device) + if str(device) == "mps": + # randn does not work reproducibly on mps + noise = torch.randn(model_output.shape, dtype=model_output.dtype, device="cpu", generator=generator).to( + device + ) + else: + noise = torch.randn(model_output.shape, dtype=model_output.dtype, device=device, generator=generator).to( + device + ) + eps = noise * s_noise sigma_hat = sigma * (gamma + 1) From 118c5be94a2b8eb90fa41a2ceb59b3a8de9e0218 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Thu, 3 Nov 2022 18:17:23 +0100 Subject: [PATCH 25/88] Docs: Do not require PyTorch nightlies (#1123) Do not require PyTorch nightlies. --- docs/source/optimization/mps.mdx | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/source/optimization/mps.mdx b/docs/source/optimization/mps.mdx index 4eeabc6587..8a2d5ad763 100644 --- a/docs/source/optimization/mps.mdx +++ b/docs/source/optimization/mps.mdx @@ -19,11 +19,8 @@ specific language governing permissions and limitations under the License. - Mac computer with Apple silicon (M1/M2) hardware. - macOS 12.6 or later (13.0 or later recommended). - arm64 version of Python. -- PyTorch 1.13.0 RC (Release Candidate). You can install it with `pip` using: +- PyTorch 1.13. You can install it with `pip` or `conda` using the instructions in https://pytorch.org/get-started/locally/. -``` -pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/test/cpu -``` ## Inference Pipeline @@ -63,4 +60,4 @@ pipeline.enable_attention_slicing() ## Known Issues - As mentioned above, we are investigating a strange [first-time inference issue](https://github.com/huggingface/diffusers/issues/372). -- Generating multiple prompts in a batch [crashes or doesn't work reliably](https://github.com/huggingface/diffusers/issues/363). We believe this is related to the [`mps` backend in PyTorch](https://github.com/pytorch/pytorch/issues/84039). For now, we recommend to iterate instead of batching. +- Generating multiple prompts in a batch [crashes or doesn't work reliably](https://github.com/huggingface/diffusers/issues/363). We believe this is related to the [`mps` backend in PyTorch](https://github.com/pytorch/pytorch/issues/84039). This is being resolved, but for now we recommend to iterate instead of batching. From 1578679ff4a4ff8157214081438aa7d78f13b4fc Mon Sep 17 00:00:00 2001 From: anton-l Date: Thu, 3 Nov 2022 18:47:20 +0100 Subject: [PATCH 26/88] Release: v0.7.0 --- setup.py | 2 +- src/diffusers/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index becd4cbb87..16b64fe81a 100644 --- a/setup.py +++ b/setup.py @@ -210,7 +210,7 @@ install_requires = [ setup( name="diffusers", - version="0.7.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) + version="0.7.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) description="Diffusers", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown", diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 61ac2425db..3d268128a7 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -10,7 +10,7 @@ from .utils import ( ) -__version__ = "0.7.0.dev0" +__version__ = "0.7.0" from .configuration_utils import ConfigMixin from .onnx_utils import OnnxRuntimeModel From 33108bfa6b13a3b0c279871678eb07f184fd28e9 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 3 Nov 2022 17:54:48 +0000 Subject: [PATCH 27/88] Correct VQDiffusion Pipeline import --- ...3c81220be0a04e4543e6fd9b0f290547749cc06cfb | 324 ++++++++++++++++++ ...20be0a04e4543e6fd9b0f290547749cc06cfb.json | 1 + .../bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb | 102 ++++++ .../refs/main | 1 + .../pipeline.py | 1 + src/diffusers/__init__.py | 2 +- .../dummy_torch_and_accelerate_objects.py | 15 - .../dummy_torch_and_transformers_objects.py | 15 + 8 files changed, 445 insertions(+), 16 deletions(-) create mode 100644 clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb create mode 100644 clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb.json create mode 100644 hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb create mode 100644 hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/refs/main create mode 120000 hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/snapshots/b8fa12635e53eebebc22f95ee863e7af4fc2fb07/pipeline.py diff --git a/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb b/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb new file mode 100644 index 0000000000..2c86e9130f --- /dev/null +++ b/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb @@ -0,0 +1,324 @@ +import inspect +from typing import List, Optional, Union + +import torch +from torch import nn +from torch.nn import functional as F + +from diffusers import AutoencoderKL, DiffusionPipeline, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel +from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput +from torchvision import transforms +from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextModel, CLIPTokenizer + + +class MakeCutouts(nn.Module): + def __init__(self, cut_size, cut_power=1.0): + super().__init__() + + self.cut_size = cut_size + self.cut_power = cut_power + + def forward(self, pixel_values, num_cutouts): + sideY, sideX = pixel_values.shape[2:4] + max_size = min(sideX, sideY) + min_size = min(sideX, sideY, self.cut_size) + cutouts = [] + for _ in range(num_cutouts): + size = int(torch.rand([]) ** self.cut_power * (max_size - min_size) + min_size) + offsetx = torch.randint(0, sideX - size + 1, ()) + offsety = torch.randint(0, sideY - size + 1, ()) + cutout = pixel_values[:, :, offsety : offsety + size, offsetx : offsetx + size] + cutouts.append(F.adaptive_avg_pool2d(cutout, self.cut_size)) + return torch.cat(cutouts) + + +def spherical_dist_loss(x, y): + x = F.normalize(x, dim=-1) + y = F.normalize(y, dim=-1) + return (x - y).norm(dim=-1).div(2).arcsin().pow(2).mul(2) + + +def set_requires_grad(model, value): + for param in model.parameters(): + param.requires_grad = value + + +class CLIPGuidedStableDiffusion(DiffusionPipeline): + """CLIP guided stable diffusion based on the amazing repo by @crowsonkb and @Jack000 + - https://github.com/Jack000/glid-3-xl + - https://github.dev/crowsonkb/k-diffusion + """ + + def __init__( + self, + vae: AutoencoderKL, + text_encoder: CLIPTextModel, + clip_model: CLIPModel, + tokenizer: CLIPTokenizer, + unet: UNet2DConditionModel, + scheduler: Union[PNDMScheduler, LMSDiscreteScheduler], + feature_extractor: CLIPFeatureExtractor, + ): + super().__init__() + self.register_modules( + vae=vae, + text_encoder=text_encoder, + clip_model=clip_model, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + feature_extractor=feature_extractor, + ) + + self.normalize = transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) + self.make_cutouts = MakeCutouts(feature_extractor.size) + + set_requires_grad(self.text_encoder, False) + set_requires_grad(self.clip_model, False) + + def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = self.unet.config.attention_head_dim // 2 + self.unet.set_attention_slice(slice_size) + + def disable_attention_slicing(self): + self.enable_attention_slicing(None) + + def freeze_vae(self): + set_requires_grad(self.vae, False) + + def unfreeze_vae(self): + set_requires_grad(self.vae, True) + + def freeze_unet(self): + set_requires_grad(self.unet, False) + + def unfreeze_unet(self): + set_requires_grad(self.unet, True) + + @torch.enable_grad() + def cond_fn( + self, + latents, + timestep, + index, + text_embeddings, + noise_pred_original, + text_embeddings_clip, + clip_guidance_scale, + num_cutouts, + use_cutouts=True, + ): + latents = latents.detach().requires_grad_() + + if isinstance(self.scheduler, LMSDiscreteScheduler): + sigma = self.scheduler.sigmas[index] + # the model input needs to be scaled to match the continuous ODE formulation in K-LMS + latent_model_input = latents / ((sigma**2 + 1) ** 0.5) + else: + latent_model_input = latents + + # predict the noise residual + noise_pred = self.unet(latent_model_input, timestep, encoder_hidden_states=text_embeddings).sample + + if isinstance(self.scheduler, PNDMScheduler): + alpha_prod_t = self.scheduler.alphas_cumprod[timestep] + beta_prod_t = 1 - alpha_prod_t + # compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_original_sample = (latents - beta_prod_t ** (0.5) * noise_pred) / alpha_prod_t ** (0.5) + + fac = torch.sqrt(beta_prod_t) + sample = pred_original_sample * (fac) + latents * (1 - fac) + elif isinstance(self.scheduler, LMSDiscreteScheduler): + sigma = self.scheduler.sigmas[index] + sample = latents - sigma * noise_pred + else: + raise ValueError(f"scheduler type {type(self.scheduler)} not supported") + + sample = 1 / 0.18215 * sample + image = self.vae.decode(sample).sample + image = (image / 2 + 0.5).clamp(0, 1) + + if use_cutouts: + image = self.make_cutouts(image, num_cutouts) + else: + image = transforms.Resize(self.feature_extractor.size)(image) + image = self.normalize(image).to(latents.dtype) + + image_embeddings_clip = self.clip_model.get_image_features(image) + image_embeddings_clip = image_embeddings_clip / image_embeddings_clip.norm(p=2, dim=-1, keepdim=True) + + if use_cutouts: + dists = spherical_dist_loss(image_embeddings_clip, text_embeddings_clip) + dists = dists.view([num_cutouts, sample.shape[0], -1]) + loss = dists.sum(2).mean(0).sum() * clip_guidance_scale + else: + loss = spherical_dist_loss(image_embeddings_clip, text_embeddings_clip).mean() * clip_guidance_scale + + grads = -torch.autograd.grad(loss, latents)[0] + + if isinstance(self.scheduler, LMSDiscreteScheduler): + latents = latents.detach() + grads * (sigma**2) + noise_pred = noise_pred_original + else: + noise_pred = noise_pred_original - torch.sqrt(beta_prod_t) * grads + return noise_pred, latents + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + height: Optional[int] = 512, + width: Optional[int] = 512, + num_inference_steps: Optional[int] = 50, + guidance_scale: Optional[float] = 7.5, + num_images_per_prompt: Optional[int] = 1, + clip_guidance_scale: Optional[float] = 100, + clip_prompt: Optional[Union[str, List[str]]] = None, + num_cutouts: Optional[int] = 4, + use_cutouts: Optional[bool] = True, + generator: Optional[torch.Generator] = None, + latents: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + ): + if isinstance(prompt, str): + batch_size = 1 + elif isinstance(prompt, list): + batch_size = len(prompt) + else: + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + + # get prompt text embeddings + text_input = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + text_embeddings = self.text_encoder(text_input.input_ids.to(self.device))[0] + # duplicate text embeddings for each generation per prompt + text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) + + if clip_guidance_scale > 0: + if clip_prompt is not None: + clip_text_input = self.tokenizer( + clip_prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ).input_ids.to(self.device) + else: + clip_text_input = text_input.input_ids.to(self.device) + text_embeddings_clip = self.clip_model.get_text_features(clip_text_input) + text_embeddings_clip = text_embeddings_clip / text_embeddings_clip.norm(p=2, dim=-1, keepdim=True) + # duplicate text embeddings clip for each generation per prompt + text_embeddings_clip = text_embeddings_clip.repeat_interleave(num_images_per_prompt, dim=0) + + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + max_length = text_input.input_ids.shape[-1] + uncond_input = self.tokenizer([""], padding="max_length", max_length=max_length, return_tensors="pt") + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + # duplicate unconditional embeddings for each generation per prompt + uncond_embeddings = uncond_embeddings.repeat_interleave(num_images_per_prompt, dim=0) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + # get the initial random noise unless the user supplied it + + # Unlike in other pipelines, latents need to be generated in the target device + # for 1-to-1 results reproducibility with the CompVis implementation. + # However this currently doesn't work in `mps`. + latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8) + latents_dtype = text_embeddings.dtype + if latents is None: + if self.device.type == "mps": + # randn does not work reproducibly on mps + latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to( + self.device + ) + else: + latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype) + else: + if latents.shape != latents_shape: + raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") + latents = latents.to(self.device) + + # set timesteps + accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()) + extra_set_kwargs = {} + if accepts_offset: + extra_set_kwargs["offset"] = 1 + + self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) + + # Some schedulers like PNDM have timesteps as arrays + # It's more optimized to move all timesteps to correct device beforehand + timesteps_tensor = self.scheduler.timesteps.to(self.device) + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + + for i, t in enumerate(self.progress_bar(timesteps_tensor)): + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform classifier free guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # perform clip guidance + if clip_guidance_scale > 0: + text_embeddings_for_guidance = ( + text_embeddings.chunk(2)[1] if do_classifier_free_guidance else text_embeddings + ) + noise_pred, latents = self.cond_fn( + latents, + t, + i, + text_embeddings_for_guidance, + noise_pred, + text_embeddings_clip, + clip_guidance_scale, + num_cutouts, + use_cutouts, + ) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents).prev_sample + + # scale and decode the image latents with vae + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + + image = (image / 2 + 0.5).clamp(0, 1) + image = image.cpu().permute(0, 2, 3, 1).numpy() + + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image, None) + + return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=None) diff --git a/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb.json b/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb.json new file mode 100644 index 0000000000..ebadb9070e --- /dev/null +++ b/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb.json @@ -0,0 +1 @@ +{"url": "https://raw.githubusercontent.com/huggingface/diffusers/main/examples/community/clip_guided_stable_diffusion.py", "etag": "W/\"3e4886ba6cb31f36f75ec5127cd691e562bb04d1f0ff257edbe1c182fd6a210a\""} \ No newline at end of file diff --git a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb new file mode 100644 index 0000000000..bbbcb9f656 --- /dev/null +++ b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb @@ -0,0 +1,102 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# limitations under the License. + + +from typing import Optional, Tuple, Union + +import torch + +from diffusers.pipeline_utils import DiffusionPipeline, ImagePipelineOutput + + +class CustomPipeline(DiffusionPipeline): + r""" + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + + Parameters: + unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. + scheduler ([`SchedulerMixin`]): + A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of + [`DDPMScheduler`], or [`DDIMScheduler`]. + """ + + def __init__(self, unet, scheduler): + super().__init__() + self.register_modules(unet=unet, scheduler=scheduler) + + @torch.no_grad() + def __call__( + self, + batch_size: int = 1, + generator: Optional[torch.Generator] = None, + eta: float = 0.0, + num_inference_steps: int = 50, + output_type: Optional[str] = "pil", + return_dict: bool = True, + **kwargs, + ) -> Union[ImagePipelineOutput, Tuple]: + r""" + Args: + batch_size (`int`, *optional*, defaults to 1): + The number of images to generate. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + eta (`float`, *optional*, defaults to 0.0): + The eta parameter which controls the scale of the variance (0 is DDIM and 1 is one type of DDPM). + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. + + Returns: + [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if + `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the + generated images. + """ + + # Sample gaussian noise to begin loop + image = torch.randn( + (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), + generator=generator, + ) + image = image.to(self.device) + + # set step values + self.scheduler.set_timesteps(num_inference_steps) + + for t in self.progress_bar(self.scheduler.timesteps): + # 1. predict noise model_output + model_output = self.unet(image, t).sample + + # 2. predict previous mean of image x_t-1 and add variance depending on eta + # eta corresponds to η in paper and should be between [0, 1] + # do x_t -> x_t-1 + image = self.scheduler.step(model_output, t, image, eta).prev_sample + + image = (image / 2 + 0.5).clamp(0, 1) + image = image.cpu().permute(0, 2, 3, 1).numpy() + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image,) + + return ImagePipelineOutput(images=image), "This is a test" \ No newline at end of file diff --git a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/refs/main b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/refs/main new file mode 100644 index 0000000000..152c8af681 --- /dev/null +++ b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/refs/main @@ -0,0 +1 @@ +b8fa12635e53eebebc22f95ee863e7af4fc2fb07 \ No newline at end of file diff --git a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/snapshots/b8fa12635e53eebebc22f95ee863e7af4fc2fb07/pipeline.py b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/snapshots/b8fa12635e53eebebc22f95ee863e7af4fc2fb07/pipeline.py new file mode 120000 index 0000000000..47bb968080 --- /dev/null +++ b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/snapshots/b8fa12635e53eebebc22f95ee863e7af4fc2fb07/pipeline.py @@ -0,0 +1 @@ +../../blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb \ No newline at end of file diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 61ac2425db..0ded715b6d 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -46,7 +46,6 @@ if is_torch_available(): PNDMPipeline, RePaintPipeline, ScoreSdeVePipeline, - VQDiffusionPipeline, ) from .schedulers import ( DDIMScheduler, @@ -77,6 +76,7 @@ if is_torch_available() and is_transformers_available(): StableDiffusionInpaintPipeline, StableDiffusionInpaintPipelineLegacy, StableDiffusionPipeline, + VQDiffusionPipeline, ) else: from .utils.dummy_torch_and_transformers_objects import * # noqa F403 diff --git a/src/diffusers/utils/dummy_torch_and_accelerate_objects.py b/src/diffusers/utils/dummy_torch_and_accelerate_objects.py index c2c423eeab..335e3ca24d 100644 --- a/src/diffusers/utils/dummy_torch_and_accelerate_objects.py +++ b/src/diffusers/utils/dummy_torch_and_accelerate_objects.py @@ -272,21 +272,6 @@ class ScoreSdeVePipeline(metaclass=DummyObject): requires_backends(cls, ["torch", "accelerate"]) -class VQDiffusionPipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - class DDIMScheduler(metaclass=DummyObject): _backends = ["torch", "accelerate"] diff --git a/src/diffusers/utils/dummy_torch_and_transformers_objects.py b/src/diffusers/utils/dummy_torch_and_transformers_objects.py index 615444425c..bdc9e4f6c0 100644 --- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py +++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py @@ -4,6 +4,21 @@ from ..utils import DummyObject, requires_backends +class VQDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + class LDMTextToImagePipeline(metaclass=DummyObject): _backends = ["torch", "transformers"] From a24862cdaf0dedf3430b2d5cdcdabe2ebc0a7dd8 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 3 Nov 2022 17:55:14 +0000 Subject: [PATCH 28/88] Correct VQDiffusion Pipeline import --- ...3c81220be0a04e4543e6fd9b0f290547749cc06cfb | 324 ------------------ ...20be0a04e4543e6fd9b0f290547749cc06cfb.json | 1 - .../bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb | 102 ------ .../refs/main | 1 - .../pipeline.py | 1 - 5 files changed, 429 deletions(-) delete mode 100644 clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb delete mode 100644 clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb.json delete mode 100644 hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb delete mode 100644 hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/refs/main delete mode 120000 hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/snapshots/b8fa12635e53eebebc22f95ee863e7af4fc2fb07/pipeline.py diff --git a/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb b/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb deleted file mode 100644 index 2c86e9130f..0000000000 --- a/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb +++ /dev/null @@ -1,324 +0,0 @@ -import inspect -from typing import List, Optional, Union - -import torch -from torch import nn -from torch.nn import functional as F - -from diffusers import AutoencoderKL, DiffusionPipeline, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel -from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput -from torchvision import transforms -from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextModel, CLIPTokenizer - - -class MakeCutouts(nn.Module): - def __init__(self, cut_size, cut_power=1.0): - super().__init__() - - self.cut_size = cut_size - self.cut_power = cut_power - - def forward(self, pixel_values, num_cutouts): - sideY, sideX = pixel_values.shape[2:4] - max_size = min(sideX, sideY) - min_size = min(sideX, sideY, self.cut_size) - cutouts = [] - for _ in range(num_cutouts): - size = int(torch.rand([]) ** self.cut_power * (max_size - min_size) + min_size) - offsetx = torch.randint(0, sideX - size + 1, ()) - offsety = torch.randint(0, sideY - size + 1, ()) - cutout = pixel_values[:, :, offsety : offsety + size, offsetx : offsetx + size] - cutouts.append(F.adaptive_avg_pool2d(cutout, self.cut_size)) - return torch.cat(cutouts) - - -def spherical_dist_loss(x, y): - x = F.normalize(x, dim=-1) - y = F.normalize(y, dim=-1) - return (x - y).norm(dim=-1).div(2).arcsin().pow(2).mul(2) - - -def set_requires_grad(model, value): - for param in model.parameters(): - param.requires_grad = value - - -class CLIPGuidedStableDiffusion(DiffusionPipeline): - """CLIP guided stable diffusion based on the amazing repo by @crowsonkb and @Jack000 - - https://github.com/Jack000/glid-3-xl - - https://github.dev/crowsonkb/k-diffusion - """ - - def __init__( - self, - vae: AutoencoderKL, - text_encoder: CLIPTextModel, - clip_model: CLIPModel, - tokenizer: CLIPTokenizer, - unet: UNet2DConditionModel, - scheduler: Union[PNDMScheduler, LMSDiscreteScheduler], - feature_extractor: CLIPFeatureExtractor, - ): - super().__init__() - self.register_modules( - vae=vae, - text_encoder=text_encoder, - clip_model=clip_model, - tokenizer=tokenizer, - unet=unet, - scheduler=scheduler, - feature_extractor=feature_extractor, - ) - - self.normalize = transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) - self.make_cutouts = MakeCutouts(feature_extractor.size) - - set_requires_grad(self.text_encoder, False) - set_requires_grad(self.clip_model, False) - - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - self.enable_attention_slicing(None) - - def freeze_vae(self): - set_requires_grad(self.vae, False) - - def unfreeze_vae(self): - set_requires_grad(self.vae, True) - - def freeze_unet(self): - set_requires_grad(self.unet, False) - - def unfreeze_unet(self): - set_requires_grad(self.unet, True) - - @torch.enable_grad() - def cond_fn( - self, - latents, - timestep, - index, - text_embeddings, - noise_pred_original, - text_embeddings_clip, - clip_guidance_scale, - num_cutouts, - use_cutouts=True, - ): - latents = latents.detach().requires_grad_() - - if isinstance(self.scheduler, LMSDiscreteScheduler): - sigma = self.scheduler.sigmas[index] - # the model input needs to be scaled to match the continuous ODE formulation in K-LMS - latent_model_input = latents / ((sigma**2 + 1) ** 0.5) - else: - latent_model_input = latents - - # predict the noise residual - noise_pred = self.unet(latent_model_input, timestep, encoder_hidden_states=text_embeddings).sample - - if isinstance(self.scheduler, PNDMScheduler): - alpha_prod_t = self.scheduler.alphas_cumprod[timestep] - beta_prod_t = 1 - alpha_prod_t - # compute predicted original sample from predicted noise also called - # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf - pred_original_sample = (latents - beta_prod_t ** (0.5) * noise_pred) / alpha_prod_t ** (0.5) - - fac = torch.sqrt(beta_prod_t) - sample = pred_original_sample * (fac) + latents * (1 - fac) - elif isinstance(self.scheduler, LMSDiscreteScheduler): - sigma = self.scheduler.sigmas[index] - sample = latents - sigma * noise_pred - else: - raise ValueError(f"scheduler type {type(self.scheduler)} not supported") - - sample = 1 / 0.18215 * sample - image = self.vae.decode(sample).sample - image = (image / 2 + 0.5).clamp(0, 1) - - if use_cutouts: - image = self.make_cutouts(image, num_cutouts) - else: - image = transforms.Resize(self.feature_extractor.size)(image) - image = self.normalize(image).to(latents.dtype) - - image_embeddings_clip = self.clip_model.get_image_features(image) - image_embeddings_clip = image_embeddings_clip / image_embeddings_clip.norm(p=2, dim=-1, keepdim=True) - - if use_cutouts: - dists = spherical_dist_loss(image_embeddings_clip, text_embeddings_clip) - dists = dists.view([num_cutouts, sample.shape[0], -1]) - loss = dists.sum(2).mean(0).sum() * clip_guidance_scale - else: - loss = spherical_dist_loss(image_embeddings_clip, text_embeddings_clip).mean() * clip_guidance_scale - - grads = -torch.autograd.grad(loss, latents)[0] - - if isinstance(self.scheduler, LMSDiscreteScheduler): - latents = latents.detach() + grads * (sigma**2) - noise_pred = noise_pred_original - else: - noise_pred = noise_pred_original - torch.sqrt(beta_prod_t) * grads - return noise_pred, latents - - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]], - height: Optional[int] = 512, - width: Optional[int] = 512, - num_inference_steps: Optional[int] = 50, - guidance_scale: Optional[float] = 7.5, - num_images_per_prompt: Optional[int] = 1, - clip_guidance_scale: Optional[float] = 100, - clip_prompt: Optional[Union[str, List[str]]] = None, - num_cutouts: Optional[int] = 4, - use_cutouts: Optional[bool] = True, - generator: Optional[torch.Generator] = None, - latents: Optional[torch.FloatTensor] = None, - output_type: Optional[str] = "pil", - return_dict: bool = True, - ): - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if height % 8 != 0 or width % 8 != 0: - raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") - - # get prompt text embeddings - text_input = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - truncation=True, - return_tensors="pt", - ) - text_embeddings = self.text_encoder(text_input.input_ids.to(self.device))[0] - # duplicate text embeddings for each generation per prompt - text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) - - if clip_guidance_scale > 0: - if clip_prompt is not None: - clip_text_input = self.tokenizer( - clip_prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - truncation=True, - return_tensors="pt", - ).input_ids.to(self.device) - else: - clip_text_input = text_input.input_ids.to(self.device) - text_embeddings_clip = self.clip_model.get_text_features(clip_text_input) - text_embeddings_clip = text_embeddings_clip / text_embeddings_clip.norm(p=2, dim=-1, keepdim=True) - # duplicate text embeddings clip for each generation per prompt - text_embeddings_clip = text_embeddings_clip.repeat_interleave(num_images_per_prompt, dim=0) - - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - max_length = text_input.input_ids.shape[-1] - uncond_input = self.tokenizer([""], padding="max_length", max_length=max_length, return_tensors="pt") - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] - # duplicate unconditional embeddings for each generation per prompt - uncond_embeddings = uncond_embeddings.repeat_interleave(num_images_per_prompt, dim=0) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) - - # get the initial random noise unless the user supplied it - - # Unlike in other pipelines, latents need to be generated in the target device - # for 1-to-1 results reproducibility with the CompVis implementation. - # However this currently doesn't work in `mps`. - latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8) - latents_dtype = text_embeddings.dtype - if latents is None: - if self.device.type == "mps": - # randn does not work reproducibly on mps - latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to( - self.device - ) - else: - latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype) - else: - if latents.shape != latents_shape: - raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") - latents = latents.to(self.device) - - # set timesteps - accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()) - extra_set_kwargs = {} - if accepts_offset: - extra_set_kwargs["offset"] = 1 - - self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) - - # Some schedulers like PNDM have timesteps as arrays - # It's more optimized to move all timesteps to correct device beforehand - timesteps_tensor = self.scheduler.timesteps.to(self.device) - - # scale the initial noise by the standard deviation required by the scheduler - latents = latents * self.scheduler.init_noise_sigma - - for i, t in enumerate(self.progress_bar(timesteps_tensor)): - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - - # predict the noise residual - noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample - - # perform classifier free guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # perform clip guidance - if clip_guidance_scale > 0: - text_embeddings_for_guidance = ( - text_embeddings.chunk(2)[1] if do_classifier_free_guidance else text_embeddings - ) - noise_pred, latents = self.cond_fn( - latents, - t, - i, - text_embeddings_for_guidance, - noise_pred, - text_embeddings_clip, - clip_guidance_scale, - num_cutouts, - use_cutouts, - ) - - # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents).prev_sample - - # scale and decode the image latents with vae - latents = 1 / 0.18215 * latents - image = self.vae.decode(latents).sample - - image = (image / 2 + 0.5).clamp(0, 1) - image = image.cpu().permute(0, 2, 3, 1).numpy() - - if output_type == "pil": - image = self.numpy_to_pil(image) - - if not return_dict: - return (image, None) - - return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=None) diff --git a/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb.json b/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb.json deleted file mode 100644 index ebadb9070e..0000000000 --- a/clip_guided_stable_diffusion/72392adcdf265e793b0dc13d166393a9d1367724bb03f6faca8cfb1c91c30827.8d4a13da440f0a37b6d42d3c81220be0a04e4543e6fd9b0f290547749cc06cfb.json +++ /dev/null @@ -1 +0,0 @@ -{"url": "https://raw.githubusercontent.com/huggingface/diffusers/main/examples/community/clip_guided_stable_diffusion.py", "etag": "W/\"3e4886ba6cb31f36f75ec5127cd691e562bb04d1f0ff257edbe1c182fd6a210a\""} \ No newline at end of file diff --git a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb deleted file mode 100644 index bbbcb9f656..0000000000 --- a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright 2022 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and - -# limitations under the License. - - -from typing import Optional, Tuple, Union - -import torch - -from diffusers.pipeline_utils import DiffusionPipeline, ImagePipelineOutput - - -class CustomPipeline(DiffusionPipeline): - r""" - This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the - library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) - - Parameters: - unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. - scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of - [`DDPMScheduler`], or [`DDIMScheduler`]. - """ - - def __init__(self, unet, scheduler): - super().__init__() - self.register_modules(unet=unet, scheduler=scheduler) - - @torch.no_grad() - def __call__( - self, - batch_size: int = 1, - generator: Optional[torch.Generator] = None, - eta: float = 0.0, - num_inference_steps: int = 50, - output_type: Optional[str] = "pil", - return_dict: bool = True, - **kwargs, - ) -> Union[ImagePipelineOutput, Tuple]: - r""" - Args: - batch_size (`int`, *optional*, defaults to 1): - The number of images to generate. - generator (`torch.Generator`, *optional*): - A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation - deterministic. - eta (`float`, *optional*, defaults to 0.0): - The eta parameter which controls the scale of the variance (0 is DDIM and 1 is one type of DDPM). - num_inference_steps (`int`, *optional*, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - output_type (`str`, *optional*, defaults to `"pil"`): - The output format of the generate image. Choose between - [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. - return_dict (`bool`, *optional*, defaults to `True`): - Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. - - Returns: - [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if - `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the - generated images. - """ - - # Sample gaussian noise to begin loop - image = torch.randn( - (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), - generator=generator, - ) - image = image.to(self.device) - - # set step values - self.scheduler.set_timesteps(num_inference_steps) - - for t in self.progress_bar(self.scheduler.timesteps): - # 1. predict noise model_output - model_output = self.unet(image, t).sample - - # 2. predict previous mean of image x_t-1 and add variance depending on eta - # eta corresponds to η in paper and should be between [0, 1] - # do x_t -> x_t-1 - image = self.scheduler.step(model_output, t, image, eta).prev_sample - - image = (image / 2 + 0.5).clamp(0, 1) - image = image.cpu().permute(0, 2, 3, 1).numpy() - if output_type == "pil": - image = self.numpy_to_pil(image) - - if not return_dict: - return (image,) - - return ImagePipelineOutput(images=image), "This is a test" \ No newline at end of file diff --git a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/refs/main b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/refs/main deleted file mode 100644 index 152c8af681..0000000000 --- a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/refs/main +++ /dev/null @@ -1 +0,0 @@ -b8fa12635e53eebebc22f95ee863e7af4fc2fb07 \ No newline at end of file diff --git a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/snapshots/b8fa12635e53eebebc22f95ee863e7af4fc2fb07/pipeline.py b/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/snapshots/b8fa12635e53eebebc22f95ee863e7af4fc2fb07/pipeline.py deleted file mode 120000 index 47bb968080..0000000000 --- a/hf-internal-testing/diffusers-dummy-pipeline/models--hf-internal-testing--diffusers-dummy-pipeline/snapshots/b8fa12635e53eebebc22f95ee863e7af4fc2fb07/pipeline.py +++ /dev/null @@ -1 +0,0 @@ -../../blobs/bbbcb9f65616524d6199fa3bc16dc0500fb2cbbb \ No newline at end of file From bde4880c9cceada20b387d3110061c65249dabcc Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 3 Nov 2022 17:57:51 +0000 Subject: [PATCH 29/88] make style --- .../dummy_torch_and_transformers_objects.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/diffusers/utils/dummy_torch_and_transformers_objects.py b/src/diffusers/utils/dummy_torch_and_transformers_objects.py index bdc9e4f6c0..ea85a8f57e 100644 --- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py +++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py @@ -4,21 +4,6 @@ from ..utils import DummyObject, requires_backends -class VQDiffusionPipeline(metaclass=DummyObject): - _backends = ["torch", "transformers"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "transformers"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "transformers"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "transformers"]) - - class LDMTextToImagePipeline(metaclass=DummyObject): _backends = ["torch", "transformers"] @@ -92,3 +77,18 @@ class StableDiffusionPipeline(metaclass=DummyObject): @classmethod def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["torch", "transformers"]) + + +class VQDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) From c62b3a2e7e9cb1665db1062d96cc13118350ad8d Mon Sep 17 00:00:00 2001 From: "Duong A. Nguyen" <38061659+duongna21@users.noreply.github.com> Date: Fri, 4 Nov 2022 19:49:57 +0700 Subject: [PATCH 30/88] [Flax] Fix sample batch size DreamBooth (#1129) fix sample batch size --- examples/dreambooth/train_dreambooth_flax.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/dreambooth/train_dreambooth_flax.py b/examples/dreambooth/train_dreambooth_flax.py index d2652606b5..84493b1d94 100644 --- a/examples/dreambooth/train_dreambooth_flax.py +++ b/examples/dreambooth/train_dreambooth_flax.py @@ -361,7 +361,8 @@ def main(): logger.info(f"Number of class images to sample: {num_new_images}.") sample_dataset = PromptDataset(args.class_prompt, num_new_images) - sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=args.sample_batch_size) + total_sample_batch_size = args.sample_batch_size * jax.local_device_count() + sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=total_sample_batch_size) for example in tqdm( sample_dataloader, desc="Generating class images", disable=not jax.process_index() == 0 From 1d0f3c211ebb13897859bd457c218c64b5e8a805 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Fri, 4 Nov 2022 14:58:52 +0100 Subject: [PATCH 31/88] Move accelerate to a soft-dependency (#1134) * finish * finish * Update src/diffusers/modeling_utils.py * Update src/diffusers/pipeline_utils.py Co-authored-by: Anton Lozhkov * more fixes * fix Co-authored-by: Anton Lozhkov --- src/diffusers/__init__.py | 8 - src/diffusers/modeling_utils.py | 34 +- src/diffusers/pipeline_utils.py | 12 +- src/diffusers/utils/__init__.py | 1 + src/diffusers/utils/dummy_pt_objects.py | 15 - .../dummy_torch_and_accelerate_objects.py | 452 ------------------ src/diffusers/utils/import_utils.py | 38 ++ tests/repo_utils/test_check_dummies.py | 4 +- 8 files changed, 82 insertions(+), 482 deletions(-) delete mode 100644 src/diffusers/utils/dummy_torch_and_accelerate_objects.py diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 22b6589973..e4a69641d5 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -1,5 +1,4 @@ from .utils import ( - is_accelerate_available, is_flax_available, is_inflect_available, is_onnx_available, @@ -17,13 +16,6 @@ from .onnx_utils import OnnxRuntimeModel from .utils import logging -# This will create an extra dummy file "dummy_torch_and_accelerate_objects.py" -# TODO: (patil-suraj, anton-l) maybe import everything under is_torch_and_accelerate_available -if is_torch_available() and not is_accelerate_available(): - error_msg = "Please install the `accelerate` library to use Diffusers with PyTorch. You can do so by running `pip install diffusers[torch]`. Or if torch is already installed, you can run `pip install accelerate`." # noqa: E501 - raise ImportError(error_msg) - - if is_torch_available(): from .modeling_utils import ModelMixin from .models import AutoencoderKL, Transformer2DModel, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index 9e05672bf1..1e91ccd56a 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -21,15 +21,20 @@ from typing import Callable, List, Optional, Tuple, Union import torch from torch import Tensor, device -import accelerate -from accelerate.utils import set_module_tensor_to_device -from accelerate.utils.versions import is_torch_version from huggingface_hub import hf_hub_download from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError from requests import HTTPError from . import __version__ -from .utils import CONFIG_NAME, DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT, WEIGHTS_NAME, logging +from .utils import ( + CONFIG_NAME, + DIFFUSERS_CACHE, + HUGGINGFACE_CO_RESOLVE_ENDPOINT, + WEIGHTS_NAME, + is_accelerate_available, + is_torch_version, + logging, +) logger = logging.get_logger(__name__) @@ -41,6 +46,12 @@ else: _LOW_CPU_MEM_USAGE_DEFAULT = False +if is_accelerate_available(): + import accelerate + from accelerate.utils import set_module_tensor_to_device + from accelerate.utils.versions import is_torch_version + + def get_parameter_device(parameter: torch.nn.Module): try: return next(parameter.parameters()).device @@ -319,6 +330,21 @@ class ModelMixin(torch.nn.Module): device_map = kwargs.pop("device_map", None) low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + if low_cpu_mem_usage and not is_accelerate_available(): + low_cpu_mem_usage = False + logger.warn( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + + if device_map is not None and not is_accelerate_available(): + raise NotImplementedError( + "Loading and dispatching requires `accelerate`. Please make sure to install accelerate or set" + " `device_map=None`. You can install accelerate with `pip install accelerate`." + ) + # Check if we can handle device_map and dispatching the weights if device_map is not None and not is_torch_version(">=", "1.9.0"): raise NotImplementedError( diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 36c2d5b888..97e196e723 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -25,7 +25,6 @@ import torch import diffusers import PIL -from accelerate.utils.versions import is_torch_version from huggingface_hub import snapshot_download from packaging import version from PIL import Image @@ -43,6 +42,8 @@ from .utils import ( WEIGHTS_NAME, BaseOutput, deprecate, + is_accelerate_available, + is_torch_version, is_transformers_available, logging, ) @@ -397,6 +398,15 @@ class DiffusionPipeline(ConfigMixin): device_map = kwargs.pop("device_map", None) low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT) + if low_cpu_mem_usage and not is_accelerate_available(): + low_cpu_mem_usage = False + logger.warn( + "Cannot initialize model with low cpu memory usage because `accelerate` was not found in the" + " environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install" + " `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip" + " install accelerate\n```\n." + ) + if device_map is not None and not is_torch_version(">=", "1.9.0"): raise NotImplementedError( "Loading and dispatching requires torch >= 1.9.0. Please either update your PyTorch version or set" diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py index 7395f4edfa..3fa477e7dc 100644 --- a/src/diffusers/utils/__init__.py +++ b/src/diffusers/utils/__init__.py @@ -31,6 +31,7 @@ from .import_utils import ( is_scipy_available, is_tf_available, is_torch_available, + is_torch_version, is_transformers_available, is_unidecode_available, requires_backends, diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index 833f2b6c50..25aa82d6c5 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -272,21 +272,6 @@ class ScoreSdeVePipeline(metaclass=DummyObject): requires_backends(cls, ["torch"]) -class VQDiffusionPipeline(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch"]) - - class DDIMScheduler(metaclass=DummyObject): _backends = ["torch"] diff --git a/src/diffusers/utils/dummy_torch_and_accelerate_objects.py b/src/diffusers/utils/dummy_torch_and_accelerate_objects.py deleted file mode 100644 index 335e3ca24d..0000000000 --- a/src/diffusers/utils/dummy_torch_and_accelerate_objects.py +++ /dev/null @@ -1,452 +0,0 @@ -# This file is autogenerated by the command `make fix-copies`, do not edit. -# flake8: noqa - -from ..utils import DummyObject, requires_backends - - -class ModelMixin(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class AutoencoderKL(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class Transformer2DModel(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class UNet1DModel(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class UNet2DConditionModel(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class UNet2DModel(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class VQModel(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -def get_constant_schedule(*args, **kwargs): - requires_backends(get_constant_schedule, ["torch", "accelerate"]) - - -def get_constant_schedule_with_warmup(*args, **kwargs): - requires_backends(get_constant_schedule_with_warmup, ["torch", "accelerate"]) - - -def get_cosine_schedule_with_warmup(*args, **kwargs): - requires_backends(get_cosine_schedule_with_warmup, ["torch", "accelerate"]) - - -def get_cosine_with_hard_restarts_schedule_with_warmup(*args, **kwargs): - requires_backends(get_cosine_with_hard_restarts_schedule_with_warmup, ["torch", "accelerate"]) - - -def get_linear_schedule_with_warmup(*args, **kwargs): - requires_backends(get_linear_schedule_with_warmup, ["torch", "accelerate"]) - - -def get_polynomial_decay_schedule_with_warmup(*args, **kwargs): - requires_backends(get_polynomial_decay_schedule_with_warmup, ["torch", "accelerate"]) - - -def get_scheduler(*args, **kwargs): - requires_backends(get_scheduler, ["torch", "accelerate"]) - - -class DiffusionPipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class DanceDiffusionPipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class DDIMPipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class DDPMPipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class KarrasVePipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class LDMPipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class PNDMPipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class RePaintPipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class ScoreSdeVePipeline(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class DDIMScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class DDPMScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class EulerAncestralDiscreteScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class EulerDiscreteScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class IPNDMScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class KarrasVeScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class PNDMScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class RePaintScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class SchedulerMixin(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class ScoreSdeVeScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class VQDiffusionScheduler(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - -class EMAModel(metaclass=DummyObject): - _backends = ["torch", "accelerate"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch", "accelerate"]) - - @classmethod - def from_config(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) - - @classmethod - def from_pretrained(cls, *args, **kwargs): - requires_backends(cls, ["torch", "accelerate"]) diff --git a/src/diffusers/utils/import_utils.py b/src/diffusers/utils/import_utils.py index 4ea02dcc94..005cbb6170 100644 --- a/src/diffusers/utils/import_utils.py +++ b/src/diffusers/utils/import_utils.py @@ -15,11 +15,14 @@ Import utilities: Utilities related to imports and our lazy inits. """ import importlib.util +import operator as op import os import sys from collections import OrderedDict +from typing import Union from packaging import version +from packaging.version import Version, parse from . import logging @@ -40,6 +43,8 @@ USE_TF = os.environ.get("USE_TF", "AUTO").upper() USE_TORCH = os.environ.get("USE_TORCH", "AUTO").upper() USE_JAX = os.environ.get("USE_FLAX", "AUTO").upper() +STR_OPERATION_TO_FUNC = {">": op.gt, ">=": op.ge, "==": op.eq, "!=": op.ne, "<=": op.le, "<": op.lt} + _torch_version = "N/A" if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES: _torch_available = importlib.util.find_spec("torch") is not None @@ -309,3 +314,36 @@ class DummyObject(type): if key.startswith("_"): return super().__getattr__(cls, key) requires_backends(cls, cls._backends) + + +# This function was copied from: https://github.com/huggingface/accelerate/blob/874c4967d94badd24f893064cc3bef45f57cadf7/src/accelerate/utils/versions.py#L319 +def compare_versions(library_or_version: Union[str, Version], operation: str, requirement_version: str): + """ + Args: + Compares a library version to some requirement using a given operation. + library_or_version (`str` or `packaging.version.Version`): + A library name or a version to check. + operation (`str`): + A string representation of an operator, such as `">"` or `"<="`. + requirement_version (`str`): + The version to compare the library version against + """ + if operation not in STR_OPERATION_TO_FUNC.keys(): + raise ValueError(f"`operation` must be one of {list(STR_OPERATION_TO_FUNC.keys())}, received {operation}") + operation = STR_OPERATION_TO_FUNC[operation] + if isinstance(library_or_version, str): + library_or_version = parse(importlib_metadata.version(library_or_version)) + return operation(library_or_version, parse(requirement_version)) + + +# This function was copied from: https://github.com/huggingface/accelerate/blob/874c4967d94badd24f893064cc3bef45f57cadf7/src/accelerate/utils/versions.py#L338 +def is_torch_version(operation: str, version: str): + """ + Args: + Compares the current PyTorch version to a given reference with an operation. + operation (`str`): + A string representation of an operator, such as `">"` or `"<="` + version (`str`): + A string version of PyTorch + """ + return compare_versions(parse(_torch_version), operation, version) diff --git a/tests/repo_utils/test_check_dummies.py b/tests/repo_utils/test_check_dummies.py index 0331b5e8c2..d8fa9ce105 100644 --- a/tests/repo_utils/test_check_dummies.py +++ b/tests/repo_utils/test_check_dummies.py @@ -52,13 +52,13 @@ class CheckDummiesTester(unittest.TestCase): def test_read_init(self): objects = read_init() # We don't assert on the exact list of keys to allow for smooth grow of backend-specific objects - self.assertIn("torch_and_accelerate", objects) + self.assertIn("torch", objects) self.assertIn("torch_and_transformers", objects) self.assertIn("flax_and_transformers", objects) self.assertIn("torch_and_transformers_and_onnx", objects) # Likewise, we can't assert on the exact content of a key - self.assertIn("UNet2DModel", objects["torch_and_accelerate"]) + self.assertIn("UNet2DModel", objects["torch"]) self.assertIn("FlaxUNet2DConditionModel", objects["flax"]) self.assertIn("StableDiffusionPipeline", objects["torch_and_transformers"]) self.assertIn("FlaxStableDiffusionPipeline", objects["flax_and_transformers"]) From af7b1c3bf233346e9bf100d0dc6fc5f752bc5cca Mon Sep 17 00:00:00 2001 From: webbigdata-jp <87654083+webbigdata-jp@users.noreply.github.com> Date: Sat, 5 Nov 2022 00:45:58 +0900 Subject: [PATCH 32/88] fix 404 link in example/README.mb (#1136) fix 404 link in README.mb --- examples/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/README.md b/examples/README.md index 56b1b81b90..29872a7a16 100644 --- a/examples/README.md +++ b/examples/README.md @@ -38,9 +38,9 @@ Training examples show how to pretrain or fine-tune diffusion models for a varie | Task | 🤗 Accelerate | 🤗 Datasets | Colab |---|---|:---:|:---:| -| [**Unconditional Image Generation**](./unconditional_training) | ✅ | ✅ | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) -| [**Text-to-Image fine-tuning**](./text2image) | ✅ | ✅ | -| [**Textual Inversion**](./text_inversion) | ✅ | - | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_textual_inversion_training.ipynb) +| [**Unconditional Image Generation**](./unconditional_image_generation) | ✅ | ✅ | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) +| [**Text-to-Image fine-tuning**](./text_to_image) | ✅ | ✅ | +| [**Textual Inversion**](./textual_inversion) | ✅ | - | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_textual_inversion_training.ipynb) | [**Dreambooth**](./dreambooth) | ✅ | - | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_training.ipynb) From 2c108693cc20bf5f1a134d09a8cea1c49122d56e Mon Sep 17 00:00:00 2001 From: Lewington-pitsos Date: Sat, 5 Nov 2022 03:54:01 +1100 Subject: [PATCH 33/88] Test precision increases (#1113) * increase the precision of slice-based tests and make the default test case easier to single out * increase precision of unit tests which already rely on float comparisons Co-authored-by: Patrick von Platen --- .../stable_diffusion/test_stable_diffusion.py | 12 +++--- .../test_stable_diffusion_img2img.py | 38 +++++++++---------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index b01094a607..89fac46e74 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -34,7 +34,7 @@ from diffusers import ( UNet2DModel, VQModel, ) -from diffusers.utils import floats_tensor, load_image, slow, torch_device +from diffusers.utils import floats_tensor, load_numpy, slow, torch_device from diffusers.utils.testing_utils import require_torch_gpu from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -753,12 +753,10 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): # however, they should be extremely close. assert diff.mean() < 2e-2 - def test_stable_diffusion_text2img_pipeline(self): - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/text2img/astronaut_riding_a_horse.png" + def test_stable_diffusion_text2img_pipeline_default(self): + expected_image = load_numpy( + "https://huggingface.co/datasets/lewington/expected-images/resolve/main/astronaut_riding_a_horse.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "CompVis/stable-diffusion-v1-4" pipe = StableDiffusionPipeline.from_pretrained(model_id, safety_checker=None) @@ -773,7 +771,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_text2img_intermediate_state(self): number_of_steps = 0 diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index f5a92fac87..ca8bc191d2 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -29,7 +29,7 @@ from diffusers import ( UNet2DModel, VQModel, ) -from diffusers.utils import floats_tensor, load_image, slow, torch_device +from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device from diffusers.utils.testing_utils import require_torch_gpu from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -156,7 +156,7 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test return extract - def test_stable_diffusion_img2img(self): + def test_stable_diffusion_img2img_default_case(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator unet = self.dummy_cond_unet scheduler = PNDMScheduler(skip_prk_steps=True) @@ -208,8 +208,8 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test assert image.shape == (1, 32, 32, 3) expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218]) - assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 - assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-3 def test_stable_diffusion_img2img_negative_prompt(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -251,7 +251,7 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test assert image.shape == (1, 32, 32, 3) expected_slice = np.array([0.4065, 0.3783, 0.4050, 0.5266, 0.4781, 0.4252, 0.4203, 0.4692, 0.4365]) - assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 def test_stable_diffusion_img2img_multiple_init_images(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -293,7 +293,7 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test assert image.shape == (2, 32, 32, 3) expected_slice = np.array([0.5144, 0.4447, 0.4735, 0.6676, 0.5526, 0.5454, 0.645, 0.5149, 0.4689]) - assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 def test_stable_diffusion_img2img_k_lms(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -348,8 +348,8 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test assert image.shape == (1, 32, 32, 3) expected_slice = np.array([0.4367, 0.4986, 0.4372, 0.6706, 0.5665, 0.444, 0.5864, 0.6019, 0.5203]) - assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 - assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-3 def test_stable_diffusion_img2img_num_images_per_prompt(self): device = "cpu" @@ -472,17 +472,15 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): gc.collect() torch.cuda.empty_cache() - def test_stable_diffusion_img2img_pipeline(self): + def test_stable_diffusion_img2img_pipeline_default(self): init_image = load_image( "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/img2img/sketch-mountains-input.jpg" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/img2img/fantasy_landscape.png" - ) init_image = init_image.resize((768, 512)) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 + expected_image = load_numpy( + "https://huggingface.co/datasets/lewington/expected-images/resolve/main/fantasy_landscape.npy" + ) model_id = "CompVis/stable-diffusion-v1-4" pipe = StableDiffusionImg2ImgPipeline.from_pretrained( @@ -508,19 +506,17 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): assert image.shape == (512, 768, 3) # img2img is flaky across GPUs even in fp32, so using MAE here - assert np.abs(expected_image - image).mean() < 1e-2 + assert np.abs(expected_image - image).mean() < 1e-3 def test_stable_diffusion_img2img_pipeline_k_lms(self): init_image = load_image( "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/img2img/sketch-mountains-input.jpg" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/img2img/fantasy_landscape_k_lms.png" - ) init_image = init_image.resize((768, 512)) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 + expected_image = load_numpy( + "https://huggingface.co/datasets/lewington/expected-images/resolve/main/fantasy_landscape_k_lms.npy" + ) model_id = "CompVis/stable-diffusion-v1-4" lms = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler") @@ -548,7 +544,7 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): assert image.shape == (512, 768, 3) # img2img is flaky across GPUs even in fp32, so using MAE here - assert np.abs(expected_image - image).mean() < 1e-2 + assert np.abs(expected_image - image).mean() < 1e-3 def test_stable_diffusion_img2img_intermediate_state(self): number_of_steps = 0 From 5b20d3b3d7ca38c298bc8425897efbc3b4aece6f Mon Sep 17 00:00:00 2001 From: Chenguo Lin Date: Sat, 5 Nov 2022 01:05:19 +0800 Subject: [PATCH 34/88] fix the parameter naming in `self.downsamplers` (#1108) Co-authored-by: Patrick von Platen --- src/diffusers/models/unet_2d_blocks.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/diffusers/models/unet_2d_blocks.py b/src/diffusers/models/unet_2d_blocks.py index 4132ccbd0c..770043f053 100644 --- a/src/diffusers/models/unet_2d_blocks.py +++ b/src/diffusers/models/unet_2d_blocks.py @@ -462,7 +462,7 @@ class AttnDownBlock2D(nn.Module): self.downsamplers = nn.ModuleList( [ Downsample2D( - in_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" + out_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" ) ] ) @@ -546,7 +546,7 @@ class CrossAttnDownBlock2D(nn.Module): self.downsamplers = nn.ModuleList( [ Downsample2D( - in_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" + out_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" ) ] ) @@ -651,7 +651,7 @@ class DownBlock2D(nn.Module): self.downsamplers = nn.ModuleList( [ Downsample2D( - in_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" + out_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" ) ] ) @@ -729,7 +729,7 @@ class DownEncoderBlock2D(nn.Module): self.downsamplers = nn.ModuleList( [ Downsample2D( - in_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" + out_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" ) ] ) @@ -801,7 +801,7 @@ class AttnDownEncoderBlock2D(nn.Module): self.downsamplers = nn.ModuleList( [ Downsample2D( - in_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" + out_channels, use_conv=True, out_channels=out_channels, padding=downsample_padding, name="op" ) ] ) @@ -886,7 +886,7 @@ class AttnSkipDownBlock2D(nn.Module): down=True, kernel="fir", ) - self.downsamplers = nn.ModuleList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.downsamplers = nn.ModuleList([FirDownsample2D(out_channels, out_channels=out_channels)]) self.skip_conv = nn.Conv2d(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) else: self.resnet_down = None @@ -966,7 +966,7 @@ class SkipDownBlock2D(nn.Module): down=True, kernel="fir", ) - self.downsamplers = nn.ModuleList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.downsamplers = nn.ModuleList([FirDownsample2D(out_channels, out_channels=out_channels)]) self.skip_conv = nn.Conv2d(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) else: self.resnet_down = None From a4802294630b823125a836c8d02b83aac13eab6a Mon Sep 17 00:00:00 2001 From: SkyTNT Date: Sat, 5 Nov 2022 01:38:37 +0800 Subject: [PATCH 35/88] [Community Pipeline] lpw_stable_diffusion: add xformers_memory_efficient_attention and sequential_cpu_offload (#1130) lpw_stable_diffusion: xformers and cpu_offload --- examples/community/lpw_stable_diffusion.py | 60 +++++++++++++++++-- .../community/lpw_stable_diffusion_onnx.py | 2 +- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index 74aed2fec8..39a266dec9 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -12,7 +12,7 @@ from diffusers.pipeline_utils import DiffusionPipeline from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler -from diffusers.utils import deprecate, logging +from diffusers.utils import deprecate, is_accelerate_available, logging from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer @@ -340,13 +340,15 @@ def get_weighted_text_embeddings( # assign weights to the prompts and normalize in the sense of mean # TODO: should we normalize by chunk or in a whole (current implementation)? if (not skip_parsing) and (not skip_weighting): - previous_mean = text_embeddings.mean(axis=[-2, -1]) + previous_mean = text_embeddings.float().mean(axis=[-2, -1]).to(text_embeddings.dtype) text_embeddings *= prompt_weights.unsqueeze(-1) - text_embeddings *= (previous_mean / text_embeddings.mean(axis=[-2, -1])).unsqueeze(-1).unsqueeze(-1) + current_mean = text_embeddings.float().mean(axis=[-2, -1]).to(text_embeddings.dtype) + text_embeddings *= (previous_mean / current_mean).unsqueeze(-1).unsqueeze(-1) if uncond_prompt is not None: - previous_mean = uncond_embeddings.mean(axis=[-2, -1]) + previous_mean = uncond_embeddings.float().mean(axis=[-2, -1]).to(uncond_embeddings.dtype) uncond_embeddings *= uncond_weights.unsqueeze(-1) - uncond_embeddings *= (previous_mean / uncond_embeddings.mean(axis=[-2, -1])).unsqueeze(-1).unsqueeze(-1) + current_mean = uncond_embeddings.float().mean(axis=[-2, -1]).to(uncond_embeddings.dtype) + uncond_embeddings *= (previous_mean / current_mean).unsqueeze(-1).unsqueeze(-1) if uncond_prompt is not None: return text_embeddings, uncond_embeddings @@ -431,6 +433,19 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): new_config["steps_offset"] = 1 scheduler._internal_dict = FrozenDict(new_config) + if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + deprecation_message = ( + f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." + " `clip_sample` should be set to False in the configuration file. Please make sure to update the" + " config accordingly as not setting `clip_sample` in the config might lead to incorrect results in" + " future versions. If you have downloaded this checkpoint from the Hugging Face Hub, it would be very" + " nice if you could open a Pull request for the `scheduler/scheduler_config.json` file" + ) + deprecate("clip_sample not set", "1.0.0", deprecation_message, standard_warn=False) + new_config = dict(scheduler.config) + new_config["clip_sample"] = False + scheduler._internal_dict = FrozenDict(new_config) + if safety_checker is None: logger.warn( f"You have disabled the safety checker for {self.__class__} by passing `safety_checker=None`. Ensure" @@ -451,6 +466,24 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): feature_extractor=feature_extractor, ) + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.unet.set_use_memory_efficient_attention_xformers(True) + + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.unet.set_use_memory_efficient_attention_xformers(False) + def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. @@ -478,6 +511,23 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) + def enable_sequential_cpu_offload(self): + r""" + Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, + text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a + `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. + """ + if is_accelerate_available(): + from accelerate import cpu_offload + else: + raise ImportError("Please install accelerate via `pip install accelerate`") + + device = self.device + + for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: + if cpu_offloaded_model is not None: + cpu_offload(cpu_offloaded_model, device) + @torch.no_grad() def __call__( self, diff --git a/examples/community/lpw_stable_diffusion_onnx.py b/examples/community/lpw_stable_diffusion_onnx.py index 69b942f9ef..12e306a612 100644 --- a/examples/community/lpw_stable_diffusion_onnx.py +++ b/examples/community/lpw_stable_diffusion_onnx.py @@ -701,7 +701,7 @@ class OnnxStableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): clip_input=safety_checker_input[i : i + 1], images=image[i : i + 1] ) images.append(image_i) - has_nsfw_concept.append(has_nsfw_concept_i) + has_nsfw_concept.append(has_nsfw_concept_i[0]) image = np.concatenate(images) else: has_nsfw_concept = None From 2fcae69f2a181960c37be8e3081b9da0f2dc3b21 Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Fri, 4 Nov 2022 19:06:24 +0100 Subject: [PATCH 36/88] Bump to 0.8.0.dev0 (#1131) * Bump to 0.8.0.dev0 * deprecate int timesteps * style --- setup.py | 2 +- src/diffusers/__init__.py | 2 +- .../scheduling_euler_ancestral_discrete.py | 16 ++------- .../schedulers/scheduling_euler_discrete.py | 16 ++------- .../schedulers/scheduling_lms_discrete.py | 33 ++----------------- 5 files changed, 9 insertions(+), 60 deletions(-) diff --git a/setup.py b/setup.py index 16b64fe81a..1bb6af4b10 100644 --- a/setup.py +++ b/setup.py @@ -210,7 +210,7 @@ install_requires = [ setup( name="diffusers", - version="0.7.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) + version="0.8.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) description="Diffusers", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown", diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index e4a69641d5..9c954c3816 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -9,7 +9,7 @@ from .utils import ( ) -__version__ = "0.7.0" +__version__ = "0.8.0.dev0" from .configuration_utils import ConfigMixin from .onnx_utils import OnnxRuntimeModel diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index 134b45a73b..fe45b3d591 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -19,7 +19,7 @@ import numpy as np import torch from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import BaseOutput, deprecate, logging +from ..utils import BaseOutput, logging from .scheduling_utils import SchedulerMixin @@ -253,19 +253,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): timesteps = timesteps.to(original_samples.device) schedule_timesteps = self.timesteps - - if isinstance(timesteps, torch.IntTensor) or isinstance(timesteps, torch.LongTensor): - deprecate( - "timesteps as indices", - "0.8.0", - "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" - " `EulerAncestralDiscreteScheduler.add_noise()` will not be supported in future versions. Make sure to" - " pass values from `scheduler.timesteps` as timesteps.", - standard_warn=False, - ) - step_indices = timesteps - else: - step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps] + step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps] sigma = self.sigmas[step_indices].flatten() while len(sigma.shape) < len(original_samples.shape): diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 6425072ac3..0cb31a4512 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -19,7 +19,7 @@ import numpy as np import torch from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import BaseOutput, deprecate, logging +from ..utils import BaseOutput, logging from .scheduling_utils import SchedulerMixin @@ -262,19 +262,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin): timesteps = timesteps.to(original_samples.device) schedule_timesteps = self.timesteps - - if isinstance(timesteps, torch.IntTensor) or isinstance(timesteps, torch.LongTensor): - deprecate( - "timesteps as indices", - "0.8.0", - "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" - " `EulerDiscreteScheduler.add_noise()` will not be supported in future versions. Make sure to" - " pass values from `scheduler.timesteps` as timesteps.", - standard_warn=False, - ) - step_indices = timesteps - else: - step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps] + step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps] sigma = self.sigmas[step_indices].flatten() while len(sigma.shape) < len(original_samples.shape): diff --git a/src/diffusers/schedulers/scheduling_lms_discrete.py b/src/diffusers/schedulers/scheduling_lms_discrete.py index 43e577b409..8d633267c6 100644 --- a/src/diffusers/schedulers/scheduling_lms_discrete.py +++ b/src/diffusers/schedulers/scheduling_lms_discrete.py @@ -21,7 +21,7 @@ import torch from scipy import integrate from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import BaseOutput, deprecate +from ..utils import BaseOutput from .scheduling_utils import SchedulerMixin @@ -211,22 +211,7 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): if isinstance(timestep, torch.Tensor): timestep = timestep.to(self.timesteps.device) - if ( - isinstance(timestep, int) - or isinstance(timestep, torch.IntTensor) - or isinstance(timestep, torch.LongTensor) - ): - deprecate( - "timestep as an index", - "0.8.0", - "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" - " `LMSDiscreteScheduler.step()` will not be supported in future versions. Make sure to pass" - " one of the `scheduler.timesteps` as a timestep.", - standard_warn=False, - ) - step_index = timestep - else: - step_index = (self.timesteps == timestep).nonzero().item() + step_index = (self.timesteps == timestep).nonzero().item() sigma = self.sigmas[step_index] # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise @@ -269,19 +254,7 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): timesteps = timesteps.to(original_samples.device) schedule_timesteps = self.timesteps - - if isinstance(timesteps, torch.IntTensor) or isinstance(timesteps, torch.LongTensor): - deprecate( - "timesteps as indices", - "0.8.0", - "Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to" - " `LMSDiscreteScheduler.add_noise()` will not be supported in future versions. Make sure to" - " pass values from `scheduler.timesteps` as timesteps.", - standard_warn=False, - ) - step_indices = timesteps - else: - step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps] + step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps] sigma = self.sigmas[step_indices].flatten() while len(sigma.shape) < len(original_samples.shape): From 1172c9634b4a32d6e82301e3d59ce17005e13e85 Mon Sep 17 00:00:00 2001 From: Pi Esposito Date: Fri, 4 Nov 2022 15:25:28 -0300 Subject: [PATCH 37/88] add enable sequential cpu offloading to other stable diffusion pipelines (#1085) * add enable sequential cpu offloading to other stable diffusion pipelines * trigger ci * fix styling * interpolate before converting to device to avoid breking when cpu_offload is enabled with fp16 Co-authored-by: Pedro Gengo * style again I need to stop forgething this thing * fix inpainting bug that could cause device misalignment Co-authored-by: Pedro Gengo * Apply suggestions from code review Co-authored-by: Pedro Gengo Co-authored-by: Patrick von Platen --- .../pipeline_stable_diffusion_img2img.py | 18 +++++++ .../pipeline_stable_diffusion_inpaint.py | 28 ++++++++++- .../stable_diffusion/test_stable_diffusion.py | 2 +- .../test_stable_diffusion_img2img.py | 45 ++++++++++++++++++ .../test_stable_diffusion_inpaint.py | 47 ++++++++++++++++++- 5 files changed, 136 insertions(+), 4 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 8284bac850..08b14b36be 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -5,6 +5,7 @@ import numpy as np import torch import PIL +from diffusers.utils import is_accelerate_available from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict @@ -151,6 +152,23 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): # set slice_size = `None` to disable `set_attention_slice` self.enable_attention_slicing(None) + def enable_sequential_cpu_offload(self): + r""" + Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, + text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a + `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. + """ + if is_accelerate_available(): + from accelerate import cpu_offload + else: + raise ImportError("Please install accelerate via `pip install accelerate`") + + device = torch.device("cuda") + + for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: + if cpu_offloaded_model is not None: + cpu_offload(cpu_offloaded_model, device) + def enable_xformers_memory_efficient_attention(self): r""" Enable memory efficient attention as implemented in xformers. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index c200892ef6..34e8231c63 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -5,6 +5,7 @@ import numpy as np import torch import PIL +from diffusers.utils import is_accelerate_available from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict @@ -151,6 +152,23 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) + def enable_sequential_cpu_offload(self): + r""" + Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, + text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a + `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. + """ + if is_accelerate_available(): + from accelerate import cpu_offload + else: + raise ImportError("Please install accelerate via `pip install accelerate`") + + device = torch.device("cuda") + + for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: + if cpu_offloaded_model is not None: + cpu_offload(cpu_offloaded_model, device) + def enable_xformers_memory_efficient_attention(self): r""" Enable memory efficient attention as implemented in xformers. @@ -361,11 +379,14 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): # prepare mask and masked_image mask, masked_image = prepare_mask_and_masked_image(image, mask_image) - mask = mask.to(device=self.device, dtype=text_embeddings.dtype) - masked_image = masked_image.to(device=self.device, dtype=text_embeddings.dtype) # resize the mask to latents shape as we concatenate the mask to the latents + # we do that before converting to dtype to avoid breaking in case we're using cpu_offload + # and half precision mask = torch.nn.functional.interpolate(mask, size=(height // 8, width // 8)) + mask = mask.to(device=self.device, dtype=text_embeddings.dtype) + + masked_image = masked_image.to(device=self.device, dtype=text_embeddings.dtype) # encode the mask image into latents space so we can concatenate it to the latents masked_image_latents = self.vae.encode(masked_image).latent_dist.sample(generator=generator) @@ -380,6 +401,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): torch.cat([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents ) + # aligning device to prevent device errors when concating it with the latent model input + masked_image_latents = masked_image_latents.to(device=self.device, dtype=text_embeddings.dtype) + num_channels_mask = mask.shape[1] num_channels_masked_image = masked_image_latents.shape[1] diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 89fac46e74..a83299eaf9 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -840,7 +840,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): assert 2 * low_cpu_mem_usage_time < normal_load_time @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") - def test_stable_diffusion_pipeline_with_unet_on_gpu_only(self): + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): torch.cuda.empty_cache() torch.cuda.reset_max_memory_allocated() diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index ca8bc191d2..2d29e1b806 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -599,3 +599,48 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): ) assert test_callback_fn.has_been_called assert number_of_steps == 38 + + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): + torch.cuda.empty_cache() + torch.cuda.reset_max_memory_allocated() + + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + expected_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/fantasy_landscape_k_lms.png" + ) + init_image = init_image.resize((768, 512)) + expected_image = np.array(expected_image, dtype=np.float32) / 255.0 + + model_id = "CompVis/stable-diffusion-v1-4" + lms = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler") + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + model_id, + scheduler=lms, + safety_checker=None, + device_map="auto", + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload() + + prompt = "A fantasy landscape, trending on artstation" + + generator = torch.Generator(device=torch_device).manual_seed(0) + _ = pipe( + prompt=prompt, + init_image=init_image, + strength=0.75, + guidance_scale=7.5, + generator=generator, + output_type="np", + num_inference_steps=5, + ) + + mem_bytes = torch.cuda.max_memory_allocated() + # make sure that less than 1.5 GB is allocated + assert mem_bytes < 1.5 * 10**9 diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index 44a7a3249a..e8dcb43163 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -378,4 +378,49 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-3 + assert np.abs(expected_image - image).max() < 1e-2 + + @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") + def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): + torch.cuda.empty_cache() + torch.cuda.reset_max_memory_allocated() + + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" + ) + expected_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/yellow_cat_sitting_on_a_park_bench_pndm.png" + ) + expected_image = np.array(expected_image, dtype=np.float32) / 255.0 + + model_id = "runwayml/stable-diffusion-inpainting" + pndm = PNDMScheduler.from_config(model_id, subfolder="scheduler") + pipe = StableDiffusionInpaintPipeline.from_pretrained( + model_id, safety_checker=None, scheduler=pndm, device_map="auto" + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing(1) + pipe.enable_sequential_cpu_offload() + + prompt = "Face of a yellow cat, high resolution, sitting on a park bench" + + generator = torch.Generator(device=torch_device).manual_seed(0) + _ = pipe( + prompt=prompt, + image=init_image, + mask_image=mask_image, + generator=generator, + num_inference_steps=5, + output_type="np", + ) + + mem_bytes = torch.cuda.max_memory_allocated() + # make sure that less than 1.5 GB is allocated + assert mem_bytes < 1.5 * 10**9 From 9d8943b7e7361e8527fc662d9769707087c4bad6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chen=20Wu=20=28=E5=90=B4=E5=B0=98=29?= Date: Fri, 4 Nov 2022 15:51:06 -0400 Subject: [PATCH 38/88] Add CycleDiffusion pipeline using Stable Diffusion (#888) * Add CycleDiffusion pipeline for Stable Diffusion * Add the option of passing noise to DDIMScheduler Add the option of providing the noise itself to DDIMScheduler, instead of the random seed generator. * Update README.md * Update README.md * Update pipeline_stable_diffusion_cycle_diffusion.py * Update pipeline_stable_diffusion_cycle_diffusion.py * Update pipeline_stable_diffusion_cycle_diffusion.py * Update pipeline_stable_diffusion_cycle_diffusion.py * Update scheduling_ddim.py * Update import format * Update pipeline_stable_diffusion_cycle_diffusion.py * Update scheduling_ddim.py * Update src/diffusers/schedulers/scheduling_ddim.py Co-authored-by: Patrick von Platen * Update src/diffusers/schedulers/scheduling_ddim.py Co-authored-by: Patrick von Platen * Update src/diffusers/schedulers/scheduling_ddim.py Co-authored-by: Patrick von Platen * Update src/diffusers/schedulers/scheduling_ddim.py Co-authored-by: Patrick von Platen * Update src/diffusers/schedulers/scheduling_ddim.py Co-authored-by: Patrick von Platen * Update scheduling_ddim.py * Update scheduling_ddim.py * Update scheduling_ddim.py * add two tests * Update pipeline_stable_diffusion_cycle_diffusion.py * Update pipeline_stable_diffusion_cycle_diffusion.py * Update README.md * Rename pipeline name as suggested in the latest reviewer comment * Update test_pipelines.py * Update test_pipelines.py * Update test_pipelines.py * Update pipeline_stable_diffusion_cycle_diffusion.py * Remove the generator This generator does not control all randomness during sampling, which can be misleading. * Update optimal hyperparameters * Update src/diffusers/pipelines/stable_diffusion/README.md Co-authored-by: Suraj Patil * Update src/diffusers/pipelines/stable_diffusion/README.md Co-authored-by: Suraj Patil * Update src/diffusers/pipelines/stable_diffusion/README.md Co-authored-by: Suraj Patil * Apply suggestions from code review * uP * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_cycle_diffusion.py Co-authored-by: Suraj Patil * up * up * Replace assert with ValueError * finish docs Co-authored-by: Patrick von Platen Co-authored-by: Suraj Patil --- docs/source/_toctree.yml | 2 + docs/source/api/pipelines/cycle_diffusion.mdx | 99 ++++ docs/source/api/pipelines/overview.mdx | 29 +- docs/source/index.mdx | 1 + src/diffusers/__init__.py | 1 + src/diffusers/pipelines/__init__.py | 1 + .../pipelines/stable_diffusion/README.md | 71 +++ .../pipelines/stable_diffusion/__init__.py | 1 + .../pipeline_cycle_diffusion.py | 527 ++++++++++++++++++ src/diffusers/schedulers/scheduling_ddim.py | 17 +- .../dummy_torch_and_transformers_objects.py | 15 + .../stable_diffusion/test_cycle_diffusion.py | 348 ++++++++++++ 12 files changed, 1097 insertions(+), 15 deletions(-) create mode 100644 docs/source/api/pipelines/cycle_diffusion.mdx create mode 100644 src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py create mode 100644 tests/pipelines/stable_diffusion/test_cycle_diffusion.py diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 70d64b80de..d8efb5eee3 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -78,6 +78,8 @@ - sections: - local: api/pipelines/overview title: "Overview" + - local: api/pipelines/cycle_diffusion + title: "Cycle Diffusion" - local: api/pipelines/ddim title: "DDIM" - local: api/pipelines/ddpm diff --git a/docs/source/api/pipelines/cycle_diffusion.mdx b/docs/source/api/pipelines/cycle_diffusion.mdx new file mode 100644 index 0000000000..50d2a5c87e --- /dev/null +++ b/docs/source/api/pipelines/cycle_diffusion.mdx @@ -0,0 +1,99 @@ + + +# Cycle Diffusion + +## Overview + +Cycle Diffusion is a Text-Guided Image-to-Image Generation model proposed in [Unifying Diffusion Models' Latent Space, with Applications to CycleDiffusion and Guidance](https://arxiv.org/abs/2210.05559) by Chen Henry Wu, Fernando De la Torre. + +The abstract of the paper is the following: + +*Diffusion models have achieved unprecedented performance in generative modeling. The commonly-adopted formulation of the latent code of diffusion models is a sequence of gradually denoised samples, as opposed to the simpler (e.g., Gaussian) latent space of GANs, VAEs, and normalizing flows. This paper provides an alternative, Gaussian formulation of the latent space of various diffusion models, as well as an invertible DPM-Encoder that maps images into the latent space. While our formulation is purely based on the definition of diffusion models, we demonstrate several intriguing consequences. (1) Empirically, we observe that a common latent space emerges from two diffusion models trained independently on related domains. In light of this finding, we propose CycleDiffusion, which uses DPM-Encoder for unpaired image-to-image translation. Furthermore, applying CycleDiffusion to text-to-image diffusion models, we show that large-scale text-to-image diffusion models can be used as zero-shot image-to-image editors. (2) One can guide pre-trained diffusion models and GANs by controlling the latent codes in a unified, plug-and-play formulation based on energy-based models. Using the CLIP model and a face recognition model as guidance, we demonstrate that diffusion models have better coverage of low-density sub-populations and individuals than GANs.* + +*Tips*: +- The Cycle Diffusion pipeline is fully compatible with any [Stable Diffusion](./stable_diffusion) checkpoints +- Currently Cycle Diffusion only works with the [`DDIMScheduler`]. + +*Example*: + +In the following we should how to best use the [`CycleDiffusionPipeline`] + +```python +import requests +import torch +from PIL import Image +from io import BytesIO + +from diffusers import CycleDiffusionPipeline, DDIMScheduler + +# load the pipeline +# make sure you're logged in with `huggingface-cli login` +model_id_or_path = "CompVis/stable-diffusion-v1-4" +scheduler = DDIMScheduler.from_config(model_id_or_path, subfolder="scheduler") +pipe = CycleDiffusionPipeline.from_pretrained(model_id_or_path, scheduler=scheduler).to("cuda") + +# let's download an initial image +url = "https://raw.githubusercontent.com/ChenWu98/cycle-diffusion/main/data/dalle2/An%20astronaut%20riding%20a%20horse.png" +response = requests.get(url) +init_image = Image.open(BytesIO(response.content)).convert("RGB") +init_image = init_image.resize((512, 512)) +init_image.save("horse.png") + +# let's specify a prompt +source_prompt = "An astronaut riding a horse" +prompt = "An astronaut riding an elephant" + +# call the pipeline +image = pipe( + prompt=prompt, + source_prompt=source_prompt, + init_image=init_image, + num_inference_steps=100, + eta=0.1, + strength=0.8, + guidance_scale=2, + source_guidance_scale=1, +).images[0] + +image.save("horse_to_elephant.png") + +# let's try another example +# See more samples at the original repo: https://github.com/ChenWu98/cycle-diffusion +url = "https://raw.githubusercontent.com/ChenWu98/cycle-diffusion/main/data/dalle2/A%20black%20colored%20car.png" +response = requests.get(url) +init_image = Image.open(BytesIO(response.content)).convert("RGB") +init_image = init_image.resize((512, 512)) +init_image.save("black.png") + +source_prompt = "A black colored car" +prompt = "A blue colored car" + +# call the pipeline +torch.manual_seed(0) +image = pipe( + prompt=prompt, + source_prompt=source_prompt, + init_image=init_image, + num_inference_steps=100, + eta=0.1, + strength=0.85, + guidance_scale=3, + source_guidance_scale=1, +).images[0] + +image.save("black_to_blue.png") +``` + +## CycleDiffusionPipeline +[[autodoc]] CycleDiffusionPipeline + - __call__ diff --git a/docs/source/api/pipelines/overview.mdx b/docs/source/api/pipelines/overview.mdx index 5a15473cf1..d68961a2fc 100644 --- a/docs/source/api/pipelines/overview.mdx +++ b/docs/source/api/pipelines/overview.mdx @@ -41,21 +41,24 @@ If you are looking for *official* training examples, please have a look at [exam The following table summarizes all officially supported pipelines, their corresponding paper, and if available a colab notebook to directly try them out. + | Pipeline | Paper | Tasks | Colab |---|---|:---:|:---:| -| [ddpm](./ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | -| [ddim](./ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) -| [latent_diffusion](./latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752)| Text-to-Image Generation | -| [latent_diffusion_uncond](./latent_diffusion_uncond) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Unconditional Image Generation | -| [pndm](./pndm) | [**Pseudo Numerical Methods for Diffusion Models on Manifolds**](https://arxiv.org/abs/2202.09778) | Unconditional Image Generation | -| [score_sde_ve](./score_sde_ve) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | -| [score_sde_vp](./score_sde_vp) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-to-Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) -| [stable_diffusion](./stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) -| [stochastic_karras_ve](./stochastic_karras_ve) | [**Elucidating the Design Space of Diffusion-Based Generative Models**](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | -| [vq_diffusion](./vq_diffusion) | [**Vector Quantized Diffusion Model for Text-to-Image Synthesis**](https://arxiv.org/abs/2111.14822) | Text-to-Image Generation | -| [repaint](./repaint) | [**RePaint: Inpainting using Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2201.09865) | Image Inpainting | +| [cycle_diffusion](./api/pipelines/cycle_diffusion) | [**Cycle Diffusion**](https://arxiv.org/abs/2210.05559) | Image-to-Image Text-Guided Generation | +| [dance_diffusion](./api/pipelines/dance_diffusion) | [**Dance Diffusion**](https://github.com/williamberman/diffusers.git) | Unconditional Audio Generation | +| [ddpm](./api/pipelines/ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | +| [ddim](./api/pipelines/ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | +| [latent_diffusion](./api/pipelines/latent_diffusion) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752)| Text-to-Image Generation | +| [latent_diffusion_uncond](./api/pipelines/latent_diffusion_uncond) | [**High-Resolution Image Synthesis with Latent Diffusion Models**](https://arxiv.org/abs/2112.10752) | Unconditional Image Generation | +| [pndm](./api/pipelines/pndm) | [**Pseudo Numerical Methods for Diffusion Models on Manifolds**](https://arxiv.org/abs/2202.09778) | Unconditional Image Generation | +| [score_sde_ve](./api/pipelines/score_sde_ve) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | +| [score_sde_vp](./api/pipelines/score_sde_vp) | [**Score-Based Generative Modeling through Stochastic Differential Equations**](https://openreview.net/forum?id=PxTIG12RRHS) | Unconditional Image Generation | +| [stable_diffusion](./api/pipelines/stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-to-Image Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb) +| [stable_diffusion](./api/pipelines/stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Image-to-Image Text-Guided Generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) +| [stable_diffusion](./api/pipelines/stable_diffusion) | [**Stable Diffusion**](https://stability.ai/blog/stable-diffusion-public-release) | Text-Guided Image Inpainting | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) +| [stochastic_karras_ve](./api/pipelines/stochastic_karras_ve) | [**Elucidating the Design Space of Diffusion-Based Generative Models**](https://arxiv.org/abs/2206.00364) | Unconditional Image Generation | +| [vq_diffusion](./api/pipelines/vq_diffusion) | [Vector Quantized Diffusion Model for Text-to-Image Synthesis](https://arxiv.org/abs/2111.14822) | Text-to-Image Generation | + **Note**: Pipelines are simple examples of how to play around with the diffusion systems as described in the corresponding papers. diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 62a3e88f17..bae507ac11 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -34,6 +34,7 @@ available a colab notebook to directly try them out. | Pipeline | Paper | Tasks | Colab |---|---|:---:|:---:| +| [cycle_diffusion](./api/pipelines/cycle_diffusion) | [**Cycle Diffusion**](https://arxiv.org/abs/2210.05559) | Image-to-Image Text-Guided Generation | | [dance_diffusion](./api/pipelines/dance_diffusion) | [**Dance Diffusion**](https://github.com/williamberman/diffusers.git) | Unconditional Audio Generation | | [ddpm](./api/pipelines/ddpm) | [**Denoising Diffusion Probabilistic Models**](https://arxiv.org/abs/2006.11239) | Unconditional Image Generation | | [ddim](./api/pipelines/ddim) | [**Denoising Diffusion Implicit Models**](https://arxiv.org/abs/2010.02502) | Unconditional Image Generation | diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 9c954c3816..3f3f3b56a2 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -63,6 +63,7 @@ else: if is_torch_available() and is_transformers_available(): from .pipelines import ( + CycleDiffusionPipeline, LDMTextToImagePipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index bb3440b2bf..eb0635f6ee 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -16,6 +16,7 @@ else: if is_torch_available() and is_transformers_available(): from .latent_diffusion import LDMTextToImagePipeline from .stable_diffusion import ( + CycleDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline, StableDiffusionInpaintPipelineLegacy, diff --git a/src/diffusers/pipelines/stable_diffusion/README.md b/src/diffusers/pipelines/stable_diffusion/README.md index eaa441329a..a76e4c6682 100644 --- a/src/diffusers/pipelines/stable_diffusion/README.md +++ b/src/diffusers/pipelines/stable_diffusion/README.md @@ -103,3 +103,74 @@ image = pipe(prompt).sample[0] image.save("astronaut_rides_horse.png") ``` + +### CycleDiffusion using Stable Diffusion and DDIM scheduler + +```python +import requests +import torch +from PIL import Image +from io import BytesIO + +from diffusers import CycleDiffusionPipeline, DDIMScheduler + + +# load the scheduler. CycleDiffusion only supports stochastic schedulers. + +# load the pipeline +# make sure you're logged in with `huggingface-cli login` +model_id_or_path = "CompVis/stable-diffusion-v1-4" +scheduler = DDIMScheduler.from_config(model_id_or_path, subfolder="scheduler") +pipe = CycleDiffusionPipeline.from_pretrained(model_id_or_path, scheduler=scheduler).to("cuda") + +# let's download an initial image +url = "https://raw.githubusercontent.com/ChenWu98/cycle-diffusion/main/data/dalle2/An%20astronaut%20riding%20a%20horse.png" +response = requests.get(url) +init_image = Image.open(BytesIO(response.content)).convert("RGB") +init_image = init_image.resize((512, 512)) +init_image.save("horse.png") + +# let's specify a prompt +source_prompt = "An astronaut riding a horse" +prompt = "An astronaut riding an elephant" + +# call the pipeline +image = pipe( + prompt=prompt, + source_prompt=source_prompt, + init_image=init_image, + num_inference_steps=100, + eta=0.1, + strength=0.8, + guidance_scale=2, + source_guidance_scale=1, +).images[0] + +image.save("horse_to_elephant.png") + +# let's try another example +# See more samples at the original repo: https://github.com/ChenWu98/cycle-diffusion +url = "https://raw.githubusercontent.com/ChenWu98/cycle-diffusion/main/data/dalle2/A%20black%20colored%20car.png" +response = requests.get(url) +init_image = Image.open(BytesIO(response.content)).convert("RGB") +init_image = init_image.resize((512, 512)) +init_image.save("black.png") + +source_prompt = "A black colored car" +prompt = "A blue colored car" + +# call the pipeline +torch.manual_seed(0) +image = pipe( + prompt=prompt, + source_prompt=source_prompt, + init_image=init_image, + num_inference_steps=100, + eta=0.1, + strength=0.85, + guidance_scale=3, + source_guidance_scale=1, +).images[0] + +image.save("black_to_blue.png") +``` diff --git a/src/diffusers/pipelines/stable_diffusion/__init__.py b/src/diffusers/pipelines/stable_diffusion/__init__.py index 5a452138b7..6623929f86 100644 --- a/src/diffusers/pipelines/stable_diffusion/__init__.py +++ b/src/diffusers/pipelines/stable_diffusion/__init__.py @@ -28,6 +28,7 @@ class StableDiffusionPipelineOutput(BaseOutput): if is_transformers_available() and is_torch_available(): + from .pipeline_cycle_diffusion import CycleDiffusionPipeline from .pipeline_stable_diffusion import StableDiffusionPipeline from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py new file mode 100644 index 0000000000..f38e57983f --- /dev/null +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -0,0 +1,527 @@ +import inspect +from typing import Callable, List, Optional, Union + +import numpy as np +import torch + +import PIL +from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer + +from ...configuration_utils import FrozenDict +from ...models import AutoencoderKL, UNet2DConditionModel +from ...pipeline_utils import DiffusionPipeline +from ...schedulers import DDIMScheduler +from ...utils import deprecate, logging +from . import StableDiffusionPipelineOutput +from .safety_checker import StableDiffusionSafetyChecker + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def preprocess(image): + w, h = image.size + w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + image = image.resize((w, h), resample=PIL.Image.LANCZOS) + image = np.array(image).astype(np.float32) / 255.0 + image = image[None].transpose(0, 3, 1, 2) + image = torch.from_numpy(image) + return 2.0 * image - 1.0 + + +def posterior_sample(scheduler, latents, timestep, clean_latents, eta): + # 1. get previous step value (=t-1) + prev_timestep = timestep - scheduler.config.num_train_timesteps // scheduler.num_inference_steps + + if prev_timestep <= 0: + return clean_latents + + # 2. compute alphas, betas + alpha_prod_t = scheduler.alphas_cumprod[timestep] + alpha_prod_t_prev = ( + scheduler.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else scheduler.final_alpha_cumprod + ) + + variance = scheduler._get_variance(timestep, prev_timestep) + std_dev_t = eta * variance ** (0.5) + + # direction pointing to x_t + e_t = (latents - alpha_prod_t ** (0.5) * clean_latents) / (1 - alpha_prod_t) ** (0.5) + dir_xt = (1.0 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * e_t + noise = std_dev_t * torch.randn(clean_latents.shape, dtype=clean_latents.dtype, device=clean_latents.device) + prev_latents = alpha_prod_t_prev ** (0.5) * clean_latents + dir_xt + noise + + return prev_latents + + +def compute_noise(scheduler, prev_latents, latents, timestep, noise_pred, eta): + # 1. get previous step value (=t-1) + prev_timestep = timestep - scheduler.config.num_train_timesteps // scheduler.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = scheduler.alphas_cumprod[timestep] + alpha_prod_t_prev = ( + scheduler.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else scheduler.final_alpha_cumprod + ) + + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_original_sample = (latents - beta_prod_t ** (0.5) * noise_pred) / alpha_prod_t ** (0.5) + + # 4. Clip "predicted x_0" + if scheduler.config.clip_sample: + pred_original_sample = torch.clamp(pred_original_sample, -1, 1) + + # 5. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = scheduler._get_variance(timestep, prev_timestep) + std_dev_t = eta * variance ** (0.5) + + # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred + + noise = (prev_latents - (alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction)) / ( + variance ** (0.5) * eta + ) + return noise + + +class CycleDiffusionPipeline(DiffusionPipeline): + r""" + Pipeline for text-guided image to image generation using Stable Diffusion. + + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + + Args: + vae ([`AutoencoderKL`]): + Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. + text_encoder ([`CLIPTextModel`]): + Frozen text-encoder. Stable Diffusion uses the text portion of + [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically + the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant. + tokenizer (`CLIPTokenizer`): + Tokenizer of class + [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). + unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. + scheduler ([`SchedulerMixin`]): + A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. + safety_checker ([`StableDiffusionSafetyChecker`]): + Classification module that estimates whether generated images could be considered offensive or harmful. + Please, refer to the [model card](https://huggingface.co/CompVis/stable-diffusion-v1-4) for details. + feature_extractor ([`CLIPFeatureExtractor`]): + Model that extracts features from generated images to be used as inputs for the `safety_checker`. + """ + + def __init__( + self, + vae: AutoencoderKL, + text_encoder: CLIPTextModel, + tokenizer: CLIPTokenizer, + unet: UNet2DConditionModel, + scheduler: DDIMScheduler, + safety_checker: StableDiffusionSafetyChecker, + feature_extractor: CLIPFeatureExtractor, + ): + super().__init__() + + if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + deprecation_message = ( + f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" + f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " + "to update the config accordingly as leaving `steps_offset` might led to incorrect results" + " in future versions. If you have downloaded this checkpoint from the Hugging Face Hub," + " it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`" + " file" + ) + deprecate("steps_offset!=1", "1.0.0", deprecation_message, standard_warn=False) + new_config = dict(scheduler.config) + new_config["steps_offset"] = 1 + scheduler._internal_dict = FrozenDict(new_config) + + if safety_checker is None: + logger.warn( + f"You have disabled the safety checker for {self.__class__} by passing `safety_checker=None`. Ensure" + " that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered" + " results in services or applications open to the public. Both the diffusers team and Hugging Face" + " strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling" + " it only for use-cases that involve analyzing network behavior or auditing its results. For more" + " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ." + ) + + self.register_modules( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=safety_checker, + feature_extractor=feature_extractor, + ) + + def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): + r""" + Enable sliced attention computation. + + When this option is enabled, the attention module will split the input tensor in slices, to compute attention + in several steps. This is useful to save some memory in exchange for a small speed decrease. + + Args: + slice_size (`str` or `int`, *optional*, defaults to `"auto"`): + When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If + a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, + `attention_head_dim` must be a multiple of `slice_size`. + """ + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = self.unet.config.attention_head_dim // 2 + self.unet.set_attention_slice(slice_size) + + def disable_attention_slicing(self): + r""" + Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go + back to computing attention in one step. + """ + # set slice_size = `None` to disable `set_attention_slice` + self.enable_attention_slicing(None) + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + source_prompt: Union[str, List[str]], + init_image: Union[torch.FloatTensor, PIL.Image.Image], + strength: float = 0.8, + num_inference_steps: Optional[int] = 50, + guidance_scale: Optional[float] = 7.5, + source_guidance_scale: Optional[float] = 1, + num_images_per_prompt: Optional[int] = 1, + eta: Optional[float] = 0.1, + generator: Optional[torch.Generator] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: Optional[int] = 1, + **kwargs, + ): + r""" + Function invoked when calling the pipeline for generation. + + Args: + prompt (`str` or `List[str]`): + The prompt or prompts to guide the image generation. + init_image (`torch.FloatTensor` or `PIL.Image.Image`): + `Image`, or tensor representing an image batch, that will be used as the starting point for the + process. + strength (`float`, *optional*, defaults to 0.8): + Conceptually, indicates how much to transform the reference `init_image`. Must be between 0 and 1. + `init_image` will be used as a starting point, adding more noise to it the larger the `strength`. The + number of denoising steps depends on the amount of noise initially added. When `strength` is 1, added + noise will be maximum and the denoising process will run for the full number of iterations specified in + `num_inference_steps`. A value of 1, therefore, essentially ignores `init_image`. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. This parameter will be modulated by `strength`. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + source_guidance_scale (`float`, *optional*, defaults to 1): + Guidance scale for the source prompt. This is useful to control the amount of influence the source + prompt for encoding. + num_images_per_prompt (`int`, *optional*, defaults to 1): + The number of images to generate per prompt. + eta (`float`, *optional*, defaults to 0.1): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a + plain tuple. + callback (`Callable`, *optional*): + A function that will be called every `callback_steps` steps during inference. The function will be + called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. + callback_steps (`int`, *optional*, defaults to 1): + The frequency at which the `callback` function will be called. If not specified, the callback will be + called at every step. + + Returns: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. + When returning a tuple, the first element is a list with the generated images, and the second element is a + list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" + (nsfw) content, according to the `safety_checker`. + """ + if isinstance(prompt, str): + batch_size = 1 + elif isinstance(prompt, list): + batch_size = len(prompt) + else: + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if batch_size != 1: + raise ValueError( + "At the moment only `batch_size=1` is supported for prompts, but you seem to have passed multiple" + f" prompts: {prompt}. Please make sure to pass only a single prompt." + ) + + if strength < 0 or strength > 1: + raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + # set timesteps + self.scheduler.set_timesteps(num_inference_steps) + + if isinstance(init_image, PIL.Image.Image): + init_image = preprocess(init_image) + + # get prompt text embeddings + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + source_text_inputs = self.tokenizer( + source_prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + source_text_input_ids = source_text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + if source_text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(source_text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + source_text_input_ids = source_text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] + source_text_embeddings = self.text_encoder(source_text_input_ids.to(self.device))[0] + + # duplicate text embeddings for each generation per prompt + text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) + source_text_embeddings = source_text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) + + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + + # get unconditional embeddings for classifier free guidance + uncond_tokens = [""] + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + + # duplicate unconditional embeddings for each generation per prompt + uncond_embeddings = uncond_embeddings.repeat_interleave(batch_size * num_images_per_prompt, dim=0) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + source_uncond_tokens = [""] + + max_length = source_text_input_ids.shape[-1] + source_uncond_input = self.tokenizer( + source_uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + source_uncond_embeddings = self.text_encoder(source_uncond_input.input_ids.to(self.device))[0] + + # duplicate unconditional embeddings for each generation per prompt + source_uncond_embeddings = source_uncond_embeddings.repeat_interleave( + batch_size * num_images_per_prompt, dim=0 + ) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + source_text_embeddings = torch.cat([source_uncond_embeddings, source_text_embeddings]) + + # encode the init image into latents and scale the latents + latents_dtype = text_embeddings.dtype + init_image = init_image.to(device=self.device, dtype=latents_dtype) + init_latent_dist = self.vae.encode(init_image).latent_dist + init_latents = init_latent_dist.sample(generator=generator) + init_latents = 0.18215 * init_latents + + if isinstance(prompt, str): + prompt = [prompt] + if len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] == 0: + # expand init_latents for batch_size + deprecation_message = ( + f"You have passed {len(prompt)} text prompts (`prompt`), but only {init_latents.shape[0]} initial" + " images (`init_image`). Initial images are now duplicating to match the number of text prompts. Note" + " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" + " your script to pass as many init images as text prompts to suppress this warning." + ) + deprecate("len(prompt) != len(init_image)", "1.0.0", deprecation_message, standard_warn=False) + additional_image_per_prompt = len(prompt) // init_latents.shape[0] + init_latents = torch.cat([init_latents] * additional_image_per_prompt * num_images_per_prompt, dim=0) + elif len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] != 0: + raise ValueError( + f"Cannot duplicate `init_image` of batch size {init_latents.shape[0]} to {len(prompt)} text prompts." + ) + else: + init_latents = torch.cat([init_latents] * num_images_per_prompt, dim=0) + + # get the original timestep using init_timestep + offset = self.scheduler.config.get("steps_offset", 0) + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + + timesteps = self.scheduler.timesteps[-init_timestep] + timesteps = torch.tensor([timesteps] * batch_size * num_images_per_prompt, device=self.device) + + # add noise to latents using the timesteps + noise = torch.randn(init_latents.shape, generator=generator, device=self.device, dtype=latents_dtype) + clean_latents = init_latents + init_latents = self.scheduler.add_noise(init_latents, noise, timesteps) + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + + if not (accepts_eta and (0 < eta <= 1)): + raise ValueError( + "Currently, only the DDIM scheduler is supported. Please make sure that `pipeline.scheduler` is of" + f" type {DDIMScheduler.__class__} and not {self.scheduler.__class__}." + ) + + extra_step_kwargs["eta"] = eta + + latents = init_latents + source_latents = init_latents + + t_start = max(num_inference_steps - init_timestep + offset, 0) + + # Some schedulers like PNDM have timesteps as arrays + # It's more optimized to move all timesteps to correct device beforehand + timesteps = self.scheduler.timesteps[t_start:].to(self.device) + + for i, t in enumerate(self.progress_bar(timesteps)): + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) + source_latent_model_input = torch.cat([source_latents] * 2) + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + source_latent_model_input = self.scheduler.scale_model_input(source_latent_model_input, t) + + # predict the noise residual + concat_latent_model_input = torch.stack( + [ + source_latent_model_input[0], + latent_model_input[0], + source_latent_model_input[1], + latent_model_input[1], + ], + dim=0, + ) + concat_text_embeddings = torch.stack( + [ + source_text_embeddings[0], + text_embeddings[0], + source_text_embeddings[1], + text_embeddings[1], + ], + dim=0, + ) + concat_noise_pred = self.unet( + concat_latent_model_input, t, encoder_hidden_states=concat_text_embeddings + ).sample + + # perform guidance + ( + source_noise_pred_uncond, + noise_pred_uncond, + source_noise_pred_text, + noise_pred_text, + ) = concat_noise_pred.chunk(4, dim=0) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + source_noise_pred = source_noise_pred_uncond + source_guidance_scale * ( + source_noise_pred_text - source_noise_pred_uncond + ) + + # Sample source_latents from the posterior distribution. + prev_source_latents = posterior_sample( + self.scheduler, source_latents, t, clean_latents, **extra_step_kwargs + ) + # Compute noise. + noise = compute_noise( + self.scheduler, prev_source_latents, source_latents, t, source_noise_pred, **extra_step_kwargs + ) + source_latents = prev_source_latents + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step( + noise_pred, t, latents, variance_noise=noise, **extra_step_kwargs + ).prev_sample + + # call the callback, if provided + if callback is not None and i % callback_steps == 0: + callback(i, t, latents) + + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + + image = (image / 2 + 0.5).clamp(0, 1) + image = image.cpu().permute(0, 2, 3, 1).numpy() + + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( + self.device + ) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) + ) + else: + has_nsfw_concept = None + + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image, has_nsfw_concept) + + return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py index 23648d1bc3..62ee9c0244 100644 --- a/src/diffusers/schedulers/scheduling_ddim.py +++ b/src/diffusers/schedulers/scheduling_ddim.py @@ -208,6 +208,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): eta: float = 0.0, use_clipped_model_output: bool = False, generator=None, + variance_noise: Optional[torch.FloatTensor] = None, return_dict: bool = True, ) -> Union[DDIMSchedulerOutput, Tuple]: """ @@ -225,6 +226,9 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): `self.config.clip_sample` is `True`. If no clipping has happened, "corrected" `model_output` would coincide with the one provided as input and `use_clipped_model_output` will have not effect. generator: random number generator. + variance_noise (`torch.FloatTensor`): instead of generating noise for the variance using `generator`, we + can directly provide the noise for the variance itself. This is useful for methods such as + CycleDiffusion. (https://arxiv.org/abs/2210.05559) return_dict (`bool`): option for returning tuple rather than DDIMSchedulerOutput class Returns: @@ -284,8 +288,17 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): if eta > 0: # randn_like does not support generator https://github.com/pytorch/pytorch/issues/27072 device = model_output.device if torch.is_tensor(model_output) else "cpu" - noise = torch.randn(model_output.shape, dtype=model_output.dtype, generator=generator).to(device) - variance = self._get_variance(timestep, prev_timestep) ** (0.5) * eta * noise + if variance_noise is not None and generator is not None: + raise ValueError( + "Cannot pass both generator and variance_noise. Please make sure that either `generator` or" + " `variance_noise` stays `None`." + ) + + if variance_noise is None: + variance_noise = torch.randn(model_output.shape, dtype=model_output.dtype, generator=generator).to( + device + ) + variance = self._get_variance(timestep, prev_timestep) ** (0.5) * eta * variance_noise prev_sample = prev_sample + variance diff --git a/src/diffusers/utils/dummy_torch_and_transformers_objects.py b/src/diffusers/utils/dummy_torch_and_transformers_objects.py index ea85a8f57e..63e8a60f74 100644 --- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py +++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py @@ -4,6 +4,21 @@ from ..utils import DummyObject, requires_backends +class CycleDiffusionPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + class LDMTextToImagePipeline(metaclass=DummyObject): _backends = ["torch", "transformers"] diff --git a/tests/pipelines/stable_diffusion/test_cycle_diffusion.py b/tests/pipelines/stable_diffusion/test_cycle_diffusion.py new file mode 100644 index 0000000000..0bddd63807 --- /dev/null +++ b/tests/pipelines/stable_diffusion/test_cycle_diffusion.py @@ -0,0 +1,348 @@ +# coding=utf-8 +# Copyright 2022 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import random +import unittest + +import numpy as np +import torch + +from diffusers import AutoencoderKL, CycleDiffusionPipeline, DDIMScheduler, UNet2DConditionModel, UNet2DModel, VQModel +from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device +from diffusers.utils.testing_utils import require_torch_gpu +from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer + +from ...test_pipelines_common import PipelineTesterMixin + + +torch.backends.cuda.matmul.allow_tf32 = False + + +class CycleDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + torch.cuda.empty_cache() + + @property + def dummy_image(self): + batch_size = 1 + num_channels = 3 + sizes = (32, 32) + + image = floats_tensor((batch_size, num_channels) + sizes, rng=random.Random(0)).to(torch_device) + return image + + @property + def dummy_uncond_unet(self): + torch.manual_seed(0) + model = UNet2DModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=3, + out_channels=3, + down_block_types=("DownBlock2D", "AttnDownBlock2D"), + up_block_types=("AttnUpBlock2D", "UpBlock2D"), + ) + return model + + @property + def dummy_cond_unet(self): + torch.manual_seed(0) + model = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=4, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + return model + + @property + def dummy_cond_unet_inpaint(self): + torch.manual_seed(0) + model = UNet2DConditionModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=9, + out_channels=4, + down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"), + up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"), + cross_attention_dim=32, + ) + return model + + @property + def dummy_vq_model(self): + torch.manual_seed(0) + model = VQModel( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=3, + ) + return model + + @property + def dummy_vae(self): + torch.manual_seed(0) + model = AutoencoderKL( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=4, + ) + return model + + @property + def dummy_text_encoder(self): + torch.manual_seed(0) + config = CLIPTextConfig( + bos_token_id=0, + eos_token_id=2, + hidden_size=32, + intermediate_size=37, + layer_norm_eps=1e-05, + num_attention_heads=4, + num_hidden_layers=5, + pad_token_id=1, + vocab_size=1000, + ) + return CLIPTextModel(config) + + @property + def dummy_extractor(self): + def extract(*args, **kwargs): + class Out: + def __init__(self): + self.pixel_values = torch.ones([0]) + + def to(self, device): + self.pixel_values.to(device) + return self + + return Out() + + return extract + + def test_stable_diffusion_cycle(self): + device = "cpu" # ensure determinism for the device-dependent torch.Generator + unet = self.dummy_cond_unet + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + num_train_timesteps=1000, + clip_sample=False, + set_alpha_to_one=False, + ) + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + # make sure here that pndm scheduler skips prk + sd_pipe = CycleDiffusionPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=None, + feature_extractor=self.dummy_extractor, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + source_prompt = "An astronaut riding a horse" + prompt = "An astronaut riding an elephant" + init_image = self.dummy_image.to(device) + + generator = torch.Generator(device=device).manual_seed(0) + output = sd_pipe( + prompt=prompt, + source_prompt=source_prompt, + generator=generator, + num_inference_steps=2, + init_image=init_image, + eta=0.1, + strength=0.8, + guidance_scale=3, + source_guidance_scale=1, + output_type="np", + ) + images = output.images + + image_slice = images[0, -3:, -3:, -1] + + assert images.shape == (1, 32, 32, 3) + expected_slice = np.array([0.4459, 0.4943, 0.4544, 0.6643, 0.5474, 0.4327, 0.5701, 0.5959, 0.5179]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + @unittest.skipIf(torch_device != "cuda", "This test requires a GPU") + def test_stable_diffusion_cycle_fp16(self): + unet = self.dummy_cond_unet + scheduler = DDIMScheduler( + beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + num_train_timesteps=1000, + clip_sample=False, + set_alpha_to_one=False, + ) + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + unet = unet.half() + vae = vae.half() + bert = bert.half() + + # make sure here that pndm scheduler skips prk + sd_pipe = CycleDiffusionPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=None, + feature_extractor=self.dummy_extractor, + ) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + source_prompt = "An astronaut riding a horse" + prompt = "An astronaut riding an elephant" + init_image = self.dummy_image.to(torch_device) + + generator = torch.Generator(device=torch_device).manual_seed(0) + output = sd_pipe( + prompt=prompt, + source_prompt=source_prompt, + generator=generator, + num_inference_steps=2, + init_image=init_image, + eta=0.1, + strength=0.8, + guidance_scale=3, + source_guidance_scale=1, + output_type="np", + ) + images = output.images + + image_slice = images[0, -3:, -3:, -1] + + assert images.shape == (1, 32, 32, 3) + expected_slice = np.array([0.3506, 0.4543, 0.446, 0.4575, 0.5195, 0.4155, 0.5273, 0.518, 0.4116]) + + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + + +@slow +@require_torch_gpu +class CycleDiffusionPipelineIntegrationTests(unittest.TestCase): + def tearDown(self): + # clean up the VRAM after each test + super().tearDown() + gc.collect() + torch.cuda.empty_cache() + + def test_cycle_diffusion_pipeline_fp16(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/cycle-diffusion/black_colored_car.png" + ) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/cycle-diffusion/blue_colored_car_fp16.npy" + ) + init_image = init_image.resize((512, 512)) + + model_id = "CompVis/stable-diffusion-v1-4" + scheduler = DDIMScheduler.from_config(model_id, subfolder="scheduler") + pipe = CycleDiffusionPipeline.from_pretrained( + model_id, scheduler=scheduler, safety_checker=None, torch_dtype=torch.float16, revision="fp16" + ) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + source_prompt = "A black colored car" + prompt = "A blue colored car" + + torch.manual_seed(0) + output = pipe( + prompt=prompt, + source_prompt=source_prompt, + init_image=init_image, + num_inference_steps=100, + eta=0.1, + strength=0.85, + guidance_scale=3, + source_guidance_scale=1, + output_type="np", + ) + image = output.images + + # the values aren't exactly equal, but the images look the same visually + assert np.abs(image - expected_image).max() < 1e-2 + + def test_cycle_diffusion_pipeline(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/cycle-diffusion/black_colored_car.png" + ) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/cycle-diffusion/blue_colored_car.npy" + ) + init_image = init_image.resize((512, 512)) + + model_id = "CompVis/stable-diffusion-v1-4" + scheduler = DDIMScheduler.from_config(model_id, subfolder="scheduler") + pipe = CycleDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, safety_checker=None) + + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + source_prompt = "A black colored car" + prompt = "A blue colored car" + + torch.manual_seed(0) + output = pipe( + prompt=prompt, + source_prompt=source_prompt, + init_image=init_image, + num_inference_steps=100, + eta=0.1, + strength=0.85, + guidance_scale=3, + source_guidance_scale=1, + output_type="np", + ) + image = output.images + + assert np.abs(image - expected_image).max() < 1e-2 From 08a6dc8a5840e0cc09e65e71e9647321ab9bb254 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Sat, 5 Nov 2022 22:17:41 +0100 Subject: [PATCH 39/88] Flax: Flip sin to cos in time embeddings (#1149) Flip sin to cos in t embeddings. This was assumed in the previous implementation, but now the default is the opposite. Fixes #1145. --- src/diffusers/models/embeddings_flax.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/diffusers/models/embeddings_flax.py b/src/diffusers/models/embeddings_flax.py index 1e2272c1fe..bf7d54b82e 100644 --- a/src/diffusers/models/embeddings_flax.py +++ b/src/diffusers/models/embeddings_flax.py @@ -88,4 +88,6 @@ class FlaxTimesteps(nn.Module): @nn.compact def __call__(self, timesteps): - return get_sinusoidal_embeddings(timesteps, embedding_dim=self.dim, freq_shift=self.freq_shift) + return get_sinusoidal_embeddings( + timesteps, embedding_dim=self.dim, freq_shift=self.freq_shift, flip_sin_to_cos=True + ) From b4a1ed85440d4d9c1cafbe118ca6c034000c85f9 Mon Sep 17 00:00:00 2001 From: Cheng Lu Date: Mon, 7 Nov 2022 05:49:55 +0800 Subject: [PATCH 40/88] Add multistep DPM-Solver discrete scheduler (#1132) * add dpmsolver discrete pytorch scheduler * fix some typos in dpm-solver pytorch * add dpm-solver pytorch in stable-diffusion pipeline * add jax/flax version dpm-solver * change code style * change code style * add docs * add `add_noise` method for dpmsolver * add pytorch unit test for dpmsolver * add dummy object for pytorch dpmsolver * Update src/diffusers/schedulers/scheduling_dpmsolver_discrete.py Co-authored-by: Suraj Patil * Update tests/test_config.py Co-authored-by: Suraj Patil * Update tests/test_config.py Co-authored-by: Suraj Patil * resolve the code comments * rename the file * change class name * fix code style * add auto docs for dpmsolver multistep * add more explanations for the stabilizing trick (for steps < 15) * delete the dummy file * change the API name of predict_epsilon, algorithm_type and solver_type * add compatible lists Co-authored-by: Suraj Patil --- docs/source/api/schedulers.mdx | 6 + src/diffusers/__init__.py | 2 + .../pipeline_flax_stable_diffusion.py | 14 +- .../pipeline_stable_diffusion.py | 8 +- src/diffusers/schedulers/__init__.py | 2 + src/diffusers/schedulers/scheduling_ddim.py | 1 + src/diffusers/schedulers/scheduling_ddpm.py | 1 + .../scheduling_dpmsolver_multistep.py | 506 +++++++++++++++ .../scheduling_dpmsolver_multistep_flax.py | 590 ++++++++++++++++++ .../scheduling_euler_ancestral_discrete.py | 1 + .../schedulers/scheduling_euler_discrete.py | 1 + .../schedulers/scheduling_lms_discrete.py | 1 + src/diffusers/schedulers/scheduling_pndm.py | 1 + src/diffusers/utils/dummy_flax_objects.py | 15 + src/diffusers/utils/dummy_pt_objects.py | 15 + tests/test_config.py | 21 +- tests/test_scheduler.py | 182 ++++++ 17 files changed, 1362 insertions(+), 5 deletions(-) create mode 100644 src/diffusers/schedulers/scheduling_dpmsolver_multistep.py create mode 100644 src/diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py diff --git a/docs/source/api/schedulers.mdx b/docs/source/api/schedulers.mdx index f073f6b379..12575a5eca 100644 --- a/docs/source/api/schedulers.mdx +++ b/docs/source/api/schedulers.mdx @@ -70,6 +70,12 @@ Original paper can be found [here](https://arxiv.org/abs/2010.02502). [[autodoc]] DDPMScheduler +#### Multistep DPM-Solver + +Original paper can be found [here](https://arxiv.org/abs/2206.00927) and the [improved version](https://arxiv.org/abs/2211.01095). The original implementation can be found [here](https://github.com/LuChengTHU/dpm-solver). + +[[autodoc]] DPMSolverMultistepScheduler + #### Variance exploding, stochastic sampling from Karras et. al Original paper can be found [here](https://arxiv.org/abs/2006.11239). diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index 3f3f3b56a2..da56dc8881 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -42,6 +42,7 @@ if is_torch_available(): from .schedulers import ( DDIMScheduler, DDPMScheduler, + DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, IPNDMScheduler, @@ -92,6 +93,7 @@ if is_flax_available(): from .schedulers import ( FlaxDDIMScheduler, FlaxDDPMScheduler, + FlaxDPMSolverMultistepScheduler, FlaxKarrasVeScheduler, FlaxLMSDiscreteScheduler, FlaxPNDMScheduler, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py index fe0e284c67..5a910f8453 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py @@ -14,7 +14,12 @@ from transformers import CLIPFeatureExtractor, CLIPTokenizer, FlaxCLIPTextModel from ...models import FlaxAutoencoderKL, FlaxUNet2DConditionModel from ...pipeline_flax_utils import FlaxDiffusionPipeline -from ...schedulers import FlaxDDIMScheduler, FlaxLMSDiscreteScheduler, FlaxPNDMScheduler +from ...schedulers import ( + FlaxDDIMScheduler, + FlaxDPMSolverMultistepScheduler, + FlaxLMSDiscreteScheduler, + FlaxPNDMScheduler, +) from ...utils import logging from . import FlaxStableDiffusionPipelineOutput from .safety_checker_flax import FlaxStableDiffusionSafetyChecker @@ -43,7 +48,8 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline): unet ([`FlaxUNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of - [`FlaxDDIMScheduler`], [`FlaxLMSDiscreteScheduler`], or [`FlaxPNDMScheduler`]. + [`FlaxDDIMScheduler`], [`FlaxLMSDiscreteScheduler`], [`FlaxPNDMScheduler`], or + [`FlaxDPMSolverMultistepScheduler`]. safety_checker ([`FlaxStableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details. @@ -57,7 +63,9 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline): text_encoder: FlaxCLIPTextModel, tokenizer: CLIPTokenizer, unet: FlaxUNet2DConditionModel, - scheduler: Union[FlaxDDIMScheduler, FlaxPNDMScheduler, FlaxLMSDiscreteScheduler], + scheduler: Union[ + FlaxDDIMScheduler, FlaxPNDMScheduler, FlaxLMSDiscreteScheduler, FlaxDPMSolverMultistepScheduler + ], safety_checker: FlaxStableDiffusionSafetyChecker, feature_extractor: CLIPFeatureExtractor, dtype: jnp.dtype = jnp.float32, diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 1ccc87804e..094841f977 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -11,6 +11,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel from ...pipeline_utils import DiffusionPipeline from ...schedulers import ( DDIMScheduler, + DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler, @@ -59,7 +60,12 @@ class StableDiffusionPipeline(DiffusionPipeline): tokenizer: CLIPTokenizer, unet: UNet2DConditionModel, scheduler: Union[ - DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler, EulerDiscreteScheduler, EulerAncestralDiscreteScheduler + DDIMScheduler, + PNDMScheduler, + LMSDiscreteScheduler, + EulerDiscreteScheduler, + EulerAncestralDiscreteScheduler, + DPMSolverMultistepScheduler, ], safety_checker: StableDiffusionSafetyChecker, feature_extractor: CLIPFeatureExtractor, diff --git a/src/diffusers/schedulers/__init__.py b/src/diffusers/schedulers/__init__.py index 1be541ba8b..6217bfcd69 100644 --- a/src/diffusers/schedulers/__init__.py +++ b/src/diffusers/schedulers/__init__.py @@ -19,6 +19,7 @@ from ..utils import is_flax_available, is_scipy_available, is_torch_available if is_torch_available(): from .scheduling_ddim import DDIMScheduler from .scheduling_ddpm import DDPMScheduler + from .scheduling_dpmsolver_multistep import DPMSolverMultistepScheduler from .scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler from .scheduling_euler_discrete import EulerDiscreteScheduler from .scheduling_ipndm import IPNDMScheduler @@ -35,6 +36,7 @@ else: if is_flax_available(): from .scheduling_ddim_flax import FlaxDDIMScheduler from .scheduling_ddpm_flax import FlaxDDPMScheduler + from .scheduling_dpmsolver_multistep_flax import FlaxDPMSolverMultistepScheduler from .scheduling_karras_ve_flax import FlaxKarrasVeScheduler from .scheduling_lms_discrete_flax import FlaxLMSDiscreteScheduler from .scheduling_pndm_flax import FlaxPNDMScheduler diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py index 62ee9c0244..8d4407c16c 100644 --- a/src/diffusers/schedulers/scheduling_ddim.py +++ b/src/diffusers/schedulers/scheduling_ddim.py @@ -115,6 +115,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): "LMSDiscreteScheduler", "EulerDiscreteScheduler", "EulerAncestralDiscreteScheduler", + "DPMSolverMultistepScheduler", ] @register_to_config diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index 114a86b432..171c9598eb 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -108,6 +108,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): "LMSDiscreteScheduler", "EulerDiscreteScheduler", "EulerAncestralDiscreteScheduler", + "DPMSolverMultistepScheduler", ] @register_to_config diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py new file mode 100644 index 0000000000..d166354809 --- /dev/null +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py @@ -0,0 +1,506 @@ +# Copyright 2022 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver + +import math +from typing import List, Optional, Tuple, Union + +import numpy as np +import torch + +from ..configuration_utils import ConfigMixin, register_to_config +from .scheduling_utils import SchedulerMixin, SchedulerOutput + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + + Args: + num_diffusion_timesteps (`int`): the number of betas to produce. + max_beta (`float`): the maximum beta to use; use values lower than 1 to + prevent singularities. + + Returns: + betas (`np.ndarray`): the betas used by the scheduler to step the model outputs + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): + """ + DPM-Solver (and the improved version DPM-Solver++) is a fast dedicated high-order solver for diffusion ODEs with + the convergence order guarantee. Empirically, sampling by DPM-Solver with only 20 steps can generate high-quality + samples, and it can generate quite good samples even in only 10 steps. + + For more details, see the original paper: https://arxiv.org/abs/2206.00927 and https://arxiv.org/abs/2211.01095 + + Currently, we support the multistep DPM-Solver for both noise prediction models and data prediction models. We + recommend to use `solver_order=2` for guided sampling, and `solver_order=3` for unconditional sampling. + + We also support the "dynamic thresholding" method in Imagen (https://arxiv.org/abs/2205.11487). For pixel-space + diffusion models, you can set both `algorithm_type="dpmsolver++"` and `thresholding=True` to use the dynamic + thresholding. Note that the thresholding method is unsuitable for latent-space diffusion models (such as + stable-diffusion). + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and + [`~ConfigMixin.from_config`] functions. + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + solver_order (`int`, default `2`): + the order of DPM-Solver; can be `1` or `2` or `3`. We recommend to use `solver_order=2` for guided + sampling, and `solver_order=3` for unconditional sampling. + predict_epsilon (`bool`, default `True`): + we currently support both the noise prediction model and the data prediction model. If the model predicts + the noise / epsilon, set `predict_epsilon` to `True`. If the model predicts the data / x0 directly, set + `predict_epsilon` to `False`. + thresholding (`bool`, default `False`): + whether to use the "dynamic thresholding" method (introduced by Imagen, https://arxiv.org/abs/2205.11487). + For pixel-space diffusion models, you can set both `algorithm_type=dpmsolver++` and `thresholding=True` to + use the dynamic thresholding. Note that the thresholding method is unsuitable for latent-space diffusion + models (such as stable-diffusion). + dynamic_thresholding_ratio (`float`, default `0.995`): + the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen + (https://arxiv.org/abs/2205.11487). + sample_max_value (`float`, default `1.0`): + the threshold value for dynamic thresholding. Valid only when `thresholding=True` and + `algorithm_type="dpmsolver++`. + algorithm_type (`str`, default `dpmsolver++`): + the algorithm type for the solver. Either `dpmsolver` or `dpmsolver++`. The `dpmsolver` type implements the + algorithms in https://arxiv.org/abs/2206.00927, and the `dpmsolver++` type implements the algorithms in + https://arxiv.org/abs/2211.01095. We recommend to use `dpmsolver++` with `solver_order=2` for guided + sampling (e.g. stable-diffusion). + solver_type (`str`, default `midpoint`): + the solver type for the second-order solver. Either `midpoint` or `heun`. The solver type slightly affects + the sample quality, especially for small number of steps. We empirically find that `midpoint` solvers are + slightly better, so we recommend to use the `midpoint` type. + lower_order_final (`bool`, default `True`): + whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. We empirically + find this trick can stabilize the sampling of DPM-Solver for steps < 15, especially for steps <= 10. + + """ + + _compatible_classes = [ + "DDIMScheduler", + "DDPMScheduler", + "PNDMScheduler", + "LMSDiscreteScheduler", + "EulerDiscreteScheduler", + "EulerAncestralDiscreteScheduler", + ] + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: Optional[np.ndarray] = None, + solver_order: int = 2, + predict_epsilon: bool = True, + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: str = "dpmsolver++", + solver_type: str = "midpoint", + lower_order_final: bool = True, + ): + if trained_betas is not None: + self.betas = torch.from_numpy(trained_betas) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + # Currently we only support VP-type noise schedule + self.alpha_t = torch.sqrt(self.alphas_cumprod) + self.sigma_t = torch.sqrt(1 - self.alphas_cumprod) + self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # settings for DPM-Solver + if algorithm_type not in ["dpmsolver", "dpmsolver++"]: + raise NotImplementedError(f"{algorithm_type} does is not implemented for {self.__class__}") + if solver_type not in ["midpoint", "heun"]: + raise NotImplementedError(f"{solver_type} does is not implemented for {self.__class__}") + + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps) + self.model_outputs = [None] * solver_order + self.lower_order_nums = 0 + + def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None): + """ + Sets the timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, optional): + the device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + self.num_inference_steps = num_inference_steps + timesteps = ( + np.linspace(0, self.num_train_timesteps - 1, num_inference_steps + 1) + .round()[::-1][:-1] + .copy() + .astype(np.int64) + ) + self.timesteps = torch.from_numpy(timesteps).to(device) + self.model_outputs = [ + None, + ] * self.config.solver_order + self.lower_order_nums = 0 + + def convert_model_output( + self, model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor + ) -> torch.FloatTensor: + """ + Convert the model output to the corresponding type that the algorithm (DPM-Solver / DPM-Solver++) needs. + + DPM-Solver is designed to discretize an integral of the noise prediciton model, and DPM-Solver++ is designed to + discretize an integral of the data prediction model. So we need to first convert the model output to the + corresponding type to match the algorithm. + + Note that the algorithm type and the model type is decoupled. That is to say, we can use either DPM-Solver or + DPM-Solver++ for both noise prediction model and data prediction model. + + Args: + model_output (`torch.FloatTensor`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + current instance of sample being created by diffusion process. + + Returns: + `torch.FloatTensor`: the converted model output. + """ + # DPM-Solver++ needs to solve an integral of the data prediction model. + if self.config.algorithm_type == "dpmsolver++": + if self.config.predict_epsilon: + alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep] + x0_pred = (sample - sigma_t * model_output) / alpha_t + else: + x0_pred = model_output + if self.config.thresholding: + # Dynamic thresholding in https://arxiv.org/abs/2205.11487 + dynamic_max_val = torch.quantile( + torch.abs(x0_pred).reshape((x0_pred.shape[0], -1)), self.config.dynamic_thresholding_ratio, dim=1 + ) + dynamic_max_val = torch.maximum( + dynamic_max_val, + self.config.sample_max_value * torch.ones_like(dynamic_max_val).to(dynamic_max_val.device), + )[(...,) + (None,) * (x0_pred.ndim - 1)] + x0_pred = torch.clamp(x0_pred, -dynamic_max_val, dynamic_max_val) / dynamic_max_val + return x0_pred + # DPM-Solver needs to solve an integral of the noise prediction model. + elif self.config.algorithm_type == "dpmsolver": + if self.config.predict_epsilon: + return model_output + else: + alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep] + epsilon = (sample - alpha_t * model_output) / sigma_t + return epsilon + + def dpm_solver_first_order_update( + self, + model_output: torch.FloatTensor, + timestep: int, + prev_timestep: int, + sample: torch.FloatTensor, + ) -> torch.FloatTensor: + """ + One step for the first-order DPM-Solver (equivalent to DDIM). + + See https://arxiv.org/abs/2206.00927 for the detailed derivation. + + Args: + model_output (`torch.FloatTensor`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + current instance of sample being created by diffusion process. + + Returns: + `torch.FloatTensor`: the sample tensor at the previous timestep. + """ + lambda_t, lambda_s = self.lambda_t[prev_timestep], self.lambda_t[timestep] + alpha_t, alpha_s = self.alpha_t[prev_timestep], self.alpha_t[timestep] + sigma_t, sigma_s = self.sigma_t[prev_timestep], self.sigma_t[timestep] + h = lambda_t - lambda_s + if self.config.algorithm_type == "dpmsolver++": + x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output + elif self.config.algorithm_type == "dpmsolver": + x_t = (alpha_t / alpha_s) * sample - (sigma_t * (torch.exp(h) - 1.0)) * model_output + return x_t + + def multistep_dpm_solver_second_order_update( + self, + model_output_list: List[torch.FloatTensor], + timestep_list: List[int], + prev_timestep: int, + sample: torch.FloatTensor, + ) -> torch.FloatTensor: + """ + One step for the second-order multistep DPM-Solver. + + Args: + model_output_list (`List[torch.FloatTensor]`): + direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + current instance of sample being created by diffusion process. + + Returns: + `torch.FloatTensor`: the sample tensor at the previous timestep. + """ + t, s0, s1 = prev_timestep, timestep_list[-1], timestep_list[-2] + m0, m1 = model_output_list[-1], model_output_list[-2] + lambda_t, lambda_s0, lambda_s1 = self.lambda_t[t], self.lambda_t[s0], self.lambda_t[s1] + alpha_t, alpha_s0 = self.alpha_t[t], self.alpha_t[s0] + sigma_t, sigma_s0 = self.sigma_t[t], self.sigma_t[s0] + h, h_0 = lambda_t - lambda_s0, lambda_s0 - lambda_s1 + r0 = h_0 / h + D0, D1 = m0, (1.0 / r0) * (m0 - m1) + if self.config.algorithm_type == "dpmsolver++": + # See https://arxiv.org/abs/2211.01095 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://arxiv.org/abs/2206.00927 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - 0.5 * (sigma_t * (torch.exp(h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + ) + return x_t + + def multistep_dpm_solver_third_order_update( + self, + model_output_list: List[torch.FloatTensor], + timestep_list: List[int], + prev_timestep: int, + sample: torch.FloatTensor, + ) -> torch.FloatTensor: + """ + One step for the third-order multistep DPM-Solver. + + Args: + model_output_list (`List[torch.FloatTensor]`): + direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + current instance of sample being created by diffusion process. + + Returns: + `torch.FloatTensor`: the sample tensor at the previous timestep. + """ + t, s0, s1, s2 = prev_timestep, timestep_list[-1], timestep_list[-2], timestep_list[-3] + m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3] + lambda_t, lambda_s0, lambda_s1, lambda_s2 = ( + self.lambda_t[t], + self.lambda_t[s0], + self.lambda_t[s1], + self.lambda_t[s2], + ) + alpha_t, alpha_s0 = self.alpha_t[t], self.alpha_t[s0] + sigma_t, sigma_s0 = self.sigma_t[t], self.sigma_t[s0] + h, h_0, h_1 = lambda_t - lambda_s0, lambda_s0 - lambda_s1, lambda_s1 - lambda_s2 + r0, r1 = h_0 / h, h_1 / h + D0 = m0 + D1_0, D1_1 = (1.0 / r0) * (m0 - m1), (1.0 / r1) * (m1 - m2) + D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1) + D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1) + if self.config.algorithm_type == "dpmsolver++": + # See https://arxiv.org/abs/2206.00927 for detailed derivations + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (torch.exp(-h) - 1.0)) * D0 + + (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1 + - (alpha_t * ((torch.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://arxiv.org/abs/2206.00927 for detailed derivations + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (torch.exp(h) - 1.0)) * D0 + - (sigma_t * ((torch.exp(h) - 1.0) / h - 1.0)) * D1 + - (sigma_t * ((torch.exp(h) - 1.0 - h) / h**2 - 0.5)) * D2 + ) + return x_t + + def step( + self, + model_output: torch.FloatTensor, + timestep: int, + sample: torch.FloatTensor, + return_dict: bool = True, + ) -> Union[SchedulerOutput, Tuple]: + """ + Step function propagating the sample with the multistep DPM-Solver. + + Args: + model_output (`torch.FloatTensor`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + current instance of sample being created by diffusion process. + return_dict (`bool`): option for returning tuple rather than SchedulerOutput class + + Returns: + [`~scheduling_utils.SchedulerOutput`] or `tuple`: [`~scheduling_utils.SchedulerOutput`] if `return_dict` is + True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + step_index = (self.timesteps == timestep).nonzero() + if len(step_index) == 0: + step_index = len(self.timesteps) - 1 + else: + step_index = step_index.item() + prev_timestep = 0 if step_index == len(self.timesteps) - 1 else self.timesteps[step_index + 1] + lower_order_final = ( + (step_index == len(self.timesteps) - 1) and self.config.lower_order_final and len(self.timesteps) < 15 + ) + lower_order_second = ( + (step_index == len(self.timesteps) - 2) and self.config.lower_order_final and len(self.timesteps) < 15 + ) + + model_output = self.convert_model_output(model_output, timestep, sample) + for i in range(self.config.solver_order - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.model_outputs[-1] = model_output + + if self.config.solver_order == 1 or self.lower_order_nums < 1 or lower_order_final: + prev_sample = self.dpm_solver_first_order_update(model_output, timestep, prev_timestep, sample) + elif self.config.solver_order == 2 or self.lower_order_nums < 2 or lower_order_second: + timestep_list = [self.timesteps[step_index - 1], timestep] + prev_sample = self.multistep_dpm_solver_second_order_update( + self.model_outputs, timestep_list, prev_timestep, sample + ) + else: + timestep_list = [self.timesteps[step_index - 2], self.timesteps[step_index - 1], timestep] + prev_sample = self.multistep_dpm_solver_third_order_update( + self.model_outputs, timestep_list, prev_timestep, sample + ) + + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def scale_model_input(self, sample: torch.FloatTensor, *args, **kwargs) -> torch.FloatTensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.FloatTensor`): input sample + + Returns: + `torch.FloatTensor`: scaled input sample + """ + return sample + + def add_noise( + self, + original_samples: torch.FloatTensor, + noise: torch.FloatTensor, + timesteps: torch.IntTensor, + ) -> torch.FloatTensor: + # Make sure alphas_cumprod and timestep have same device and dtype as original_samples + self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype) + timesteps = timesteps.to(original_samples.device) + + sqrt_alpha_prod = self.alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + while len(sqrt_alpha_prod.shape) < len(original_samples.shape): + sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) + + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps]) ** 0.5 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py new file mode 100644 index 0000000000..c9a6d1cd5c --- /dev/null +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py @@ -0,0 +1,590 @@ +# Copyright 2022 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver + +import math +from dataclasses import dataclass +from typing import List, Optional, Tuple, Union + +import flax +import jax +import jax.numpy as jnp + +from ..configuration_utils import ConfigMixin, register_to_config +from .scheduling_utils_flax import FlaxSchedulerMixin, FlaxSchedulerOutput, broadcast_to_shape_from_left + + +def betas_for_alpha_bar(num_diffusion_timesteps: int, max_beta=0.999) -> jnp.ndarray: + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + + Args: + num_diffusion_timesteps (`int`): the number of betas to produce. + max_beta (`float`): the maximum beta to use; use values lower than 1 to + prevent singularities. + + Returns: + betas (`jnp.ndarray`): the betas used by the scheduler to step the model outputs + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2) ** 2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return jnp.array(betas, dtype=jnp.float32) + + +@flax.struct.dataclass +class DPMSolverMultistepSchedulerState: + # setable values + num_inference_steps: Optional[int] = None + timesteps: Optional[jnp.ndarray] = None + + # running values + model_outputs: Optional[jnp.ndarray] = None + lower_order_nums: Optional[int] = None + step_index: Optional[int] = None + prev_timestep: Optional[int] = None + cur_sample: Optional[jnp.ndarray] = None + + @classmethod + def create(cls, num_train_timesteps: int): + return cls(timesteps=jnp.arange(0, num_train_timesteps)[::-1]) + + +@dataclass +class FlaxDPMSolverMultistepSchedulerOutput(FlaxSchedulerOutput): + state: DPMSolverMultistepSchedulerState + + +class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin): + """ + DPM-Solver (and the improved version DPM-Solver++) is a fast dedicated high-order solver for diffusion ODEs with + the convergence order guarantee. Empirically, sampling by DPM-Solver with only 20 steps can generate high-quality + samples, and it can generate quite good samples even in only 10 steps. + + For more details, see the original paper: https://arxiv.org/abs/2206.00927 and https://arxiv.org/abs/2211.01095 + + Currently, we support the multistep DPM-Solver for both noise prediction models and data prediction models. We + recommend to use `solver_order=2` for guided sampling, and `solver_order=3` for unconditional sampling. + + We also support the "dynamic thresholding" method in Imagen (https://arxiv.org/abs/2205.11487). For pixel-space + diffusion models, you can set both `algorithm_type="dpmsolver++"` and `thresholding=True` to use the dynamic + thresholding. Note that the thresholding method is unsuitable for latent-space diffusion models (such as + stable-diffusion). + + [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` + function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. + [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and + [`~ConfigMixin.from_config`] functions. + + For more details, see the original paper: https://arxiv.org/abs/2206.00927 and https://arxiv.org/abs/2211.01095 + + Args: + num_train_timesteps (`int`): number of diffusion steps used to train the model. + beta_start (`float`): the starting `beta` value of inference. + beta_end (`float`): the final `beta` value. + beta_schedule (`str`): + the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, optional): + option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc. + solver_order (`int`, default `2`): + the order of DPM-Solver; can be `1` or `2` or `3`. We recommend to use `solver_order=2` for guided + sampling, and `solver_order=3` for unconditional sampling. + predict_epsilon (`bool`, default `True`): + we currently support both the noise prediction model and the data prediction model. If the model predicts + the noise / epsilon, set `predict_epsilon` to `True`. If the model predicts the data / x0 directly, set + `predict_epsilon` to `False`. + thresholding (`bool`, default `False`): + whether to use the "dynamic thresholding" method (introduced by Imagen, https://arxiv.org/abs/2205.11487). + For pixel-space diffusion models, you can set both `algorithm_type=dpmsolver++` and `thresholding=True` to + use the dynamic thresholding. Note that the thresholding method is unsuitable for latent-space diffusion + models (such as stable-diffusion). + dynamic_thresholding_ratio (`float`, default `0.995`): + the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen + (https://arxiv.org/abs/2205.11487). + sample_max_value (`float`, default `1.0`): + the threshold value for dynamic thresholding. Valid only when `thresholding=True` and + `algorithm_type="dpmsolver++`. + algorithm_type (`str`, default `dpmsolver++`): + the algorithm type for the solver. Either `dpmsolver` or `dpmsolver++`. The `dpmsolver` type implements the + algorithms in https://arxiv.org/abs/2206.00927, and the `dpmsolver++` type implements the algorithms in + https://arxiv.org/abs/2211.01095. We recommend to use `dpmsolver++` with `solver_order=2` for guided + sampling (e.g. stable-diffusion). + solver_type (`str`, default `midpoint`): + the solver type for the second-order solver. Either `midpoint` or `heun`. The solver type slightly affects + the sample quality, especially for small number of steps. We empirically find that `midpoint` solvers are + slightly better, so we recommend to use the `midpoint` type. + lower_order_final (`bool`, default `True`): + whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. We empirically + find this trick can stabilize the sampling of DPM-Solver for steps < 15, especially for steps <= 10. + + """ + + @property + def has_state(self): + return True + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: Optional[jnp.ndarray] = None, + solver_order: int = 2, + predict_epsilon: bool = True, + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + algorithm_type: str = "dpmsolver++", + solver_type: str = "midpoint", + lower_order_final: bool = True, + ): + if trained_betas is not None: + self.betas = jnp.asarray(trained_betas) + elif beta_schedule == "linear": + self.betas = jnp.linspace(beta_start, beta_end, num_train_timesteps, dtype=jnp.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = jnp.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=jnp.float32) ** 2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = jnp.cumprod(self.alphas, axis=0) + # Currently we only support VP-type noise schedule + self.alpha_t = jnp.sqrt(self.alphas_cumprod) + self.sigma_t = jnp.sqrt(1 - self.alphas_cumprod) + self.lambda_t = jnp.log(self.alpha_t) - jnp.log(self.sigma_t) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + # settings for DPM-Solver + if algorithm_type not in ["dpmsolver", "dpmsolver++"]: + raise NotImplementedError(f"{algorithm_type} does is not implemented for {self.__class__}") + if solver_type not in ["midpoint", "heun"]: + raise NotImplementedError(f"{solver_type} does is not implemented for {self.__class__}") + + def create_state(self): + return DPMSolverMultistepSchedulerState.create(num_train_timesteps=self.config.num_train_timesteps) + + def set_timesteps( + self, state: DPMSolverMultistepSchedulerState, num_inference_steps: int, shape: Tuple + ) -> DPMSolverMultistepSchedulerState: + """ + Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference. + + Args: + state (`DPMSolverMultistepSchedulerState`): + the `FlaxDPMSolverMultistepScheduler` state data class instance. + num_inference_steps (`int`): + the number of diffusion steps used when generating samples with a pre-trained model. + shape (`Tuple`): + the shape of the samples to be generated. + """ + timesteps = ( + jnp.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1) + .round()[::-1][:-1] + .astype(jnp.int32) + ) + + return state.replace( + num_inference_steps=num_inference_steps, + timesteps=timesteps, + model_outputs=jnp.zeros((self.config.solver_order,) + shape), + lower_order_nums=0, + step_index=0, + prev_timestep=-1, + cur_sample=jnp.zeros(shape), + ) + + def convert_model_output( + self, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + ) -> jnp.ndarray: + """ + Convert the model output to the corresponding type that the algorithm (DPM-Solver / DPM-Solver++) needs. + + DPM-Solver is designed to discretize an integral of the noise prediciton model, and DPM-Solver++ is designed to + discretize an integral of the data prediction model. So we need to first convert the model output to the + corresponding type to match the algorithm. + + Note that the algorithm type and the model type is decoupled. That is to say, we can use either DPM-Solver or + DPM-Solver++ for both noise prediction model and data prediction model. + + Args: + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + + Returns: + `jnp.ndarray`: the converted model output. + """ + # DPM-Solver++ needs to solve an integral of the data prediction model. + if self.config.algorithm_type == "dpmsolver++": + if self.config.predict_epsilon: + alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep] + x0_pred = (sample - sigma_t * model_output) / alpha_t + else: + x0_pred = model_output + if self.config.thresholding: + # Dynamic thresholding in https://arxiv.org/abs/2205.11487 + dynamic_max_val = jnp.percentile( + jnp.abs(x0_pred), self.config.dynamic_thresholding_ratio, axis=tuple(range(1, x0_pred.ndim)) + ) + dynamic_max_val = jnp.maximum( + dynamic_max_val, self.config.sample_max_value * jnp.ones_like(dynamic_max_val) + ) + x0_pred = jnp.clip(x0_pred, -dynamic_max_val, dynamic_max_val) / dynamic_max_val + return x0_pred + # DPM-Solver needs to solve an integral of the noise prediction model. + elif self.config.algorithm_type == "dpmsolver": + if self.config.predict_epsilon: + return model_output + else: + alpha_t, sigma_t = self.alpha_t[timestep], self.sigma_t[timestep] + epsilon = (sample - alpha_t * model_output) / sigma_t + return epsilon + + def dpm_solver_first_order_update( + self, model_output: jnp.ndarray, timestep: int, prev_timestep: int, sample: jnp.ndarray + ) -> jnp.ndarray: + """ + One step for the first-order DPM-Solver (equivalent to DDIM). + + See https://arxiv.org/abs/2206.00927 for the detailed derivation. + + Args: + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + + Returns: + `jnp.ndarray`: the sample tensor at the previous timestep. + """ + t, s0 = prev_timestep, timestep + m0 = model_output + lambda_t, lambda_s = self.lambda_t[t], self.lambda_t[s0] + alpha_t, alpha_s = self.alpha_t[t], self.alpha_t[s0] + sigma_t, sigma_s = self.sigma_t[t], self.sigma_t[s0] + h = lambda_t - lambda_s + if self.config.algorithm_type == "dpmsolver++": + x_t = (sigma_t / sigma_s) * sample - (alpha_t * (jnp.exp(-h) - 1.0)) * m0 + elif self.config.algorithm_type == "dpmsolver": + x_t = (alpha_t / alpha_s) * sample - (sigma_t * (jnp.exp(h) - 1.0)) * m0 + return x_t + + def multistep_dpm_solver_second_order_update( + self, + model_output_list: jnp.ndarray, + timestep_list: List[int], + prev_timestep: int, + sample: jnp.ndarray, + ) -> jnp.ndarray: + """ + One step for the second-order multistep DPM-Solver. + + Args: + model_output_list (`List[jnp.ndarray]`): + direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + + Returns: + `jnp.ndarray`: the sample tensor at the previous timestep. + """ + t, s0, s1 = prev_timestep, timestep_list[-1], timestep_list[-2] + m0, m1 = model_output_list[-1], model_output_list[-2] + lambda_t, lambda_s0, lambda_s1 = self.lambda_t[t], self.lambda_t[s0], self.lambda_t[s1] + alpha_t, alpha_s0 = self.alpha_t[t], self.alpha_t[s0] + sigma_t, sigma_s0 = self.sigma_t[t], self.sigma_t[s0] + h, h_0 = lambda_t - lambda_s0, lambda_s0 - lambda_s1 + r0 = h_0 / h + D0, D1 = m0, (1.0 / r0) * (m0 - m1) + if self.config.algorithm_type == "dpmsolver++": + # See https://arxiv.org/abs/2211.01095 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (jnp.exp(-h) - 1.0)) * D0 + - 0.5 * (alpha_t * (jnp.exp(-h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (jnp.exp(-h) - 1.0)) * D0 + + (alpha_t * ((jnp.exp(-h) - 1.0) / h + 1.0)) * D1 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://arxiv.org/abs/2206.00927 for detailed derivations + if self.config.solver_type == "midpoint": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (jnp.exp(h) - 1.0)) * D0 + - 0.5 * (sigma_t * (jnp.exp(h) - 1.0)) * D1 + ) + elif self.config.solver_type == "heun": + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (jnp.exp(h) - 1.0)) * D0 + - (sigma_t * ((jnp.exp(h) - 1.0) / h - 1.0)) * D1 + ) + return x_t + + def multistep_dpm_solver_third_order_update( + self, + model_output_list: jnp.ndarray, + timestep_list: List[int], + prev_timestep: int, + sample: jnp.ndarray, + ) -> jnp.ndarray: + """ + One step for the third-order multistep DPM-Solver. + + Args: + model_output_list (`List[jnp.ndarray]`): + direct outputs from learned diffusion model at current and latter timesteps. + timestep (`int`): current and latter discrete timestep in the diffusion chain. + prev_timestep (`int`): previous discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + + Returns: + `jnp.ndarray`: the sample tensor at the previous timestep. + """ + t, s0, s1, s2 = prev_timestep, timestep_list[-1], timestep_list[-2], timestep_list[-3] + m0, m1, m2 = model_output_list[-1], model_output_list[-2], model_output_list[-3] + lambda_t, lambda_s0, lambda_s1, lambda_s2 = ( + self.lambda_t[t], + self.lambda_t[s0], + self.lambda_t[s1], + self.lambda_t[s2], + ) + alpha_t, alpha_s0 = self.alpha_t[t], self.alpha_t[s0] + sigma_t, sigma_s0 = self.sigma_t[t], self.sigma_t[s0] + h, h_0, h_1 = lambda_t - lambda_s0, lambda_s0 - lambda_s1, lambda_s1 - lambda_s2 + r0, r1 = h_0 / h, h_1 / h + D0 = m0 + D1_0, D1_1 = (1.0 / r0) * (m0 - m1), (1.0 / r1) * (m1 - m2) + D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1) + D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1) + if self.config.algorithm_type == "dpmsolver++": + # See https://arxiv.org/abs/2206.00927 for detailed derivations + x_t = ( + (sigma_t / sigma_s0) * sample + - (alpha_t * (jnp.exp(-h) - 1.0)) * D0 + + (alpha_t * ((jnp.exp(-h) - 1.0) / h + 1.0)) * D1 + - (alpha_t * ((jnp.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2 + ) + elif self.config.algorithm_type == "dpmsolver": + # See https://arxiv.org/abs/2206.00927 for detailed derivations + x_t = ( + (alpha_t / alpha_s0) * sample + - (sigma_t * (jnp.exp(h) - 1.0)) * D0 + - (sigma_t * ((jnp.exp(h) - 1.0) / h - 1.0)) * D1 + - (sigma_t * ((jnp.exp(h) - 1.0 - h) / h**2 - 0.5)) * D2 + ) + return x_t + + def step( + self, + state: DPMSolverMultistepSchedulerState, + model_output: jnp.ndarray, + timestep: int, + sample: jnp.ndarray, + return_dict: bool = True, + ) -> Union[FlaxDPMSolverMultistepSchedulerOutput, Tuple]: + """ + Predict the sample at the previous timestep by DPM-Solver. Core function to propagate the diffusion process + from the learned model outputs (most often the predicted noise). + + Args: + state (`DPMSolverMultistepSchedulerState`): + the `FlaxDPMSolverMultistepScheduler` state data class instance. + model_output (`jnp.ndarray`): direct output from learned diffusion model. + timestep (`int`): current discrete timestep in the diffusion chain. + sample (`jnp.ndarray`): + current instance of sample being created by diffusion process. + return_dict (`bool`): option for returning tuple rather than FlaxDPMSolverMultistepSchedulerOutput class + + Returns: + [`FlaxDPMSolverMultistepSchedulerOutput`] or `tuple`: [`FlaxDPMSolverMultistepSchedulerOutput`] if + `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. + + """ + prev_timestep = jax.lax.cond( + state.step_index == len(state.timesteps) - 1, + lambda _: 0, + lambda _: state.timesteps[state.step_index + 1], + (), + ) + + model_output = self.convert_model_output(model_output, timestep, sample) + + model_outputs_new = jnp.roll(state.model_outputs, -1, axis=0) + model_outputs_new = model_outputs_new.at[-1].set(model_output) + state = state.replace( + model_outputs=model_outputs_new, + prev_timestep=prev_timestep, + cur_sample=sample, + ) + + def step_1(state: DPMSolverMultistepSchedulerState) -> jnp.ndarray: + return self.dpm_solver_first_order_update( + state.model_outputs[-1], + state.timesteps[state.step_index], + state.prev_timestep, + state.cur_sample, + ) + + def step_23(state: DPMSolverMultistepSchedulerState) -> jnp.ndarray: + def step_2(state: DPMSolverMultistepSchedulerState) -> jnp.ndarray: + timestep_list = jnp.array([state.timesteps[state.step_index - 1], state.timesteps[state.step_index]]) + return self.multistep_dpm_solver_second_order_update( + state.model_outputs, + timestep_list, + state.prev_timestep, + state.cur_sample, + ) + + def step_3(state: DPMSolverMultistepSchedulerState) -> jnp.ndarray: + timestep_list = jnp.array( + [ + state.timesteps[state.step_index - 2], + state.timesteps[state.step_index - 1], + state.timesteps[state.step_index], + ] + ) + return self.multistep_dpm_solver_third_order_update( + state.model_outputs, + timestep_list, + state.prev_timestep, + state.cur_sample, + ) + + if self.config.solver_order == 2: + return step_2(state) + elif self.config.lower_order_final and len(state.timesteps) < 15: + return jax.lax.cond( + state.lower_order_nums < 2, + step_2, + lambda state: jax.lax.cond( + state.step_index == len(state.timesteps) - 2, + step_2, + step_3, + state, + ), + state, + ) + else: + return jax.lax.cond( + state.lower_order_nums < 2, + step_2, + step_3, + state, + ) + + if self.config.solver_order == 1: + prev_sample = step_1(state) + elif self.config.lower_order_final and len(state.timesteps) < 15: + prev_sample = jax.lax.cond( + state.lower_order_nums < 1, + step_1, + lambda state: jax.lax.cond( + state.step_index == len(state.timesteps) - 1, + step_1, + step_23, + state, + ), + state, + ) + else: + prev_sample = jax.lax.cond( + state.lower_order_nums < 1, + step_1, + step_23, + state, + ) + + state = state.replace( + lower_order_nums=jnp.minimum(state.lower_order_nums + 1, self.config.solver_order), + step_index=(state.step_index + 1), + ) + + if not return_dict: + return (prev_sample, state) + + return FlaxDPMSolverMultistepSchedulerOutput(prev_sample=prev_sample, state=state) + + def scale_model_input( + self, state: DPMSolverMultistepSchedulerState, sample: jnp.ndarray, timestep: Optional[int] = None + ) -> jnp.ndarray: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + state (`DPMSolverMultistepSchedulerState`): + the `FlaxDPMSolverMultistepScheduler` state data class instance. + sample (`jnp.ndarray`): input sample + timestep (`int`, optional): current timestep + + Returns: + `jnp.ndarray`: scaled input sample + """ + return sample + + def add_noise( + self, + original_samples: jnp.ndarray, + noise: jnp.ndarray, + timesteps: jnp.ndarray, + ) -> jnp.ndarray: + sqrt_alpha_prod = self.alphas_cumprod[timesteps] ** 0.5 + sqrt_alpha_prod = sqrt_alpha_prod.flatten() + sqrt_alpha_prod = broadcast_to_shape_from_left(sqrt_alpha_prod, original_samples.shape) + + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps]) ** 0.0 + sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() + sqrt_one_minus_alpha_prod = broadcast_to_shape_from_left(sqrt_one_minus_alpha_prod, original_samples.shape) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index fe45b3d591..7f44067325 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -73,6 +73,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): "LMSDiscreteScheduler", "PNDMScheduler", "EulerDiscreteScheduler", + "DPMSolverMultistepScheduler", ] @register_to_config diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 0cb31a4512..50a1bd89f8 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -74,6 +74,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin): "LMSDiscreteScheduler", "PNDMScheduler", "EulerAncestralDiscreteScheduler", + "DPMSolverMultistepScheduler", ] @register_to_config diff --git a/src/diffusers/schedulers/scheduling_lms_discrete.py b/src/diffusers/schedulers/scheduling_lms_discrete.py index 8d633267c6..d636fe6fe8 100644 --- a/src/diffusers/schedulers/scheduling_lms_discrete.py +++ b/src/diffusers/schedulers/scheduling_lms_discrete.py @@ -73,6 +73,7 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): "PNDMScheduler", "EulerDiscreteScheduler", "EulerAncestralDiscreteScheduler", + "DPMSolverMultistepScheduler", ] @register_to_config diff --git a/src/diffusers/schedulers/scheduling_pndm.py b/src/diffusers/schedulers/scheduling_pndm.py index 0082ede787..eec18af8d3 100644 --- a/src/diffusers/schedulers/scheduling_pndm.py +++ b/src/diffusers/schedulers/scheduling_pndm.py @@ -94,6 +94,7 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin): "LMSDiscreteScheduler", "EulerDiscreteScheduler", "EulerAncestralDiscreteScheduler", + "DPMSolverMultistepScheduler", ] @register_to_config diff --git a/src/diffusers/utils/dummy_flax_objects.py b/src/diffusers/utils/dummy_flax_objects.py index 708022d85b..8e308bb41b 100644 --- a/src/diffusers/utils/dummy_flax_objects.py +++ b/src/diffusers/utils/dummy_flax_objects.py @@ -94,6 +94,21 @@ class FlaxDDPMScheduler(metaclass=DummyObject): requires_backends(cls, ["flax"]) +class FlaxDPMSolverMultistepScheduler(metaclass=DummyObject): + _backends = ["flax"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["flax"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["flax"]) + + class FlaxKarrasVeScheduler(metaclass=DummyObject): _backends = ["flax"] diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index 25aa82d6c5..9d296d2997 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -302,6 +302,21 @@ class DDPMScheduler(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class DPMSolverMultistepScheduler(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class EulerAncestralDiscreteScheduler(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/test_config.py b/tests/test_config.py index 7a9f270af3..5084769def 100755 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -19,7 +19,14 @@ import tempfile import unittest import diffusers -from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, PNDMScheduler, logging +from diffusers import ( + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + PNDMScheduler, + logging, +) from diffusers.configuration_utils import ConfigMixin, register_to_config from diffusers.utils.testing_utils import CaptureLogger @@ -283,3 +290,15 @@ class ConfigTester(unittest.TestCase): assert pndm.__class__ == PNDMScheduler # no warning should be thrown assert cap_logger.out == "" + + def test_load_dpmsolver(self): + logger = logging.get_logger("diffusers.configuration_utils") + + with CaptureLogger(logger) as cap_logger: + dpm = DPMSolverMultistepScheduler.from_config( + "hf-internal-testing/tiny-stable-diffusion-torch", subfolder="scheduler" + ) + + assert dpm.__class__ == DPMSolverMultistepScheduler + # no warning should be thrown + assert cap_logger.out == "" diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 29186aaac9..056f723835 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -24,6 +24,7 @@ import torch.nn.functional as F from diffusers import ( DDIMScheduler, DDPMScheduler, + DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, IPNDMScheduler, @@ -549,6 +550,187 @@ class DDIMSchedulerTest(SchedulerCommonTest): assert abs(result_mean.item() - 0.1941) < 1e-3 +class DPMSolverMultistepSchedulerTest(SchedulerCommonTest): + scheduler_classes = (DPMSolverMultistepScheduler,) + forward_default_kwargs = (("num_inference_steps", 25),) + + def get_scheduler_config(self, **kwargs): + config = { + "num_train_timesteps": 1000, + "beta_start": 0.0001, + "beta_end": 0.02, + "beta_schedule": "linear", + "solver_order": 2, + "predict_epsilon": True, + "thresholding": False, + "sample_max_value": 1.0, + "algorithm_type": "dpmsolver++", + "solver_type": "midpoint", + "lower_order_final": False, + } + + config.update(**kwargs) + return config + + def check_over_configs(self, time_step=0, **config): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_config(tmpdirname) + new_scheduler.set_timesteps(num_inference_steps) + # copy over dummy past residuals + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output, new_output = sample, sample + for t in range(time_step, time_step + scheduler.config.solver_order + 1): + output = scheduler.step(residual, t, output, **kwargs).prev_sample + new_output = new_scheduler.step(residual, t, new_output, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def test_from_pretrained_save_pretrained(self): + pass + + def check_over_forward(self, time_step=0, **forward_kwargs): + kwargs = dict(self.forward_default_kwargs) + num_inference_steps = kwargs.pop("num_inference_steps", None) + sample = self.dummy_sample + residual = 0.1 * sample + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residuals (must be after setting timesteps) + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + with tempfile.TemporaryDirectory() as tmpdirname: + scheduler.save_config(tmpdirname) + new_scheduler = scheduler_class.from_config(tmpdirname) + # copy over dummy past residuals + new_scheduler.set_timesteps(num_inference_steps) + + # copy over dummy past residual (must be after setting timesteps) + new_scheduler.model_outputs = dummy_past_residuals[: new_scheduler.config.solver_order] + + output = scheduler.step(residual, time_step, sample, **kwargs).prev_sample + new_output = new_scheduler.step(residual, time_step, sample, **kwargs).prev_sample + + assert torch.sum(torch.abs(output - new_output)) < 1e-5, "Scheduler outputs are not identical" + + def full_loop(self, **config): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config(**config) + scheduler = scheduler_class(**scheduler_config) + + num_inference_steps = 10 + model = self.dummy_model() + sample = self.dummy_sample_deter + scheduler.set_timesteps(num_inference_steps) + + for i, t in enumerate(scheduler.timesteps): + residual = model(sample, t) + sample = scheduler.step(residual, t, sample).prev_sample + + return sample + + def test_step_shape(self): + kwargs = dict(self.forward_default_kwargs) + + num_inference_steps = kwargs.pop("num_inference_steps", None) + + for scheduler_class in self.scheduler_classes: + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + sample = self.dummy_sample + residual = 0.1 * sample + + if num_inference_steps is not None and hasattr(scheduler, "set_timesteps"): + scheduler.set_timesteps(num_inference_steps) + elif num_inference_steps is not None and not hasattr(scheduler, "set_timesteps"): + kwargs["num_inference_steps"] = num_inference_steps + + # copy over dummy past residuals (must be done after set_timesteps) + dummy_past_residuals = [residual + 0.2, residual + 0.15, residual + 0.10] + scheduler.model_outputs = dummy_past_residuals[: scheduler.config.solver_order] + + time_step_0 = scheduler.timesteps[5] + time_step_1 = scheduler.timesteps[6] + + output_0 = scheduler.step(residual, time_step_0, sample, **kwargs).prev_sample + output_1 = scheduler.step(residual, time_step_1, sample, **kwargs).prev_sample + + self.assertEqual(output_0.shape, sample.shape) + self.assertEqual(output_0.shape, output_1.shape) + + def test_timesteps(self): + for timesteps in [25, 50, 100, 999, 1000]: + self.check_over_configs(num_train_timesteps=timesteps) + + def test_thresholding(self): + self.check_over_configs(thresholding=False) + for order in [1, 2, 3]: + for solver_type in ["midpoint", "heun"]: + for threshold in [0.5, 1.0, 2.0]: + for predict_epsilon in [True, False]: + self.check_over_configs( + thresholding=True, + predict_epsilon=predict_epsilon, + sample_max_value=threshold, + algorithm_type="dpmsolver++", + solver_order=order, + solver_type=solver_type, + ) + + def test_solver_order_and_type(self): + for algorithm_type in ["dpmsolver", "dpmsolver++"]: + for solver_type in ["midpoint", "heun"]: + for order in [1, 2, 3]: + for predict_epsilon in [True, False]: + self.check_over_configs( + solver_order=order, + solver_type=solver_type, + predict_epsilon=predict_epsilon, + algorithm_type=algorithm_type, + ) + sample = self.full_loop( + solver_order=order, + solver_type=solver_type, + predict_epsilon=predict_epsilon, + algorithm_type=algorithm_type, + ) + assert not torch.isnan(sample).any(), "Samples have nan numbers" + + def test_lower_order_final(self): + self.check_over_configs(lower_order_final=True) + self.check_over_configs(lower_order_final=False) + + def test_inference_steps(self): + for num_inference_steps in [1, 2, 3, 5, 10, 50, 100, 999, 1000]: + self.check_over_forward(num_inference_steps=num_inference_steps, time_step=0) + + def test_full_loop_no_noise(self): + sample = self.full_loop() + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_mean.item() - 0.3301) < 1e-3 + + class PNDMSchedulerTest(SchedulerCommonTest): scheduler_classes = (PNDMScheduler,) forward_default_kwargs = (("num_inference_steps", 50),) From e86a280c455130d597e352a6fe90367b14bfe925 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 12:27:17 +0100 Subject: [PATCH 41/88] Remove warning about half precision on MPS (#1163) Remove warning about half precision on MPS. --- src/diffusers/pipeline_utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 97e196e723..a708d0cfb5 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -209,13 +209,13 @@ class DiffusionPipeline(ConfigMixin): for name in module_names.keys(): module = getattr(self, name) if isinstance(module, torch.nn.Module): - if module.dtype == torch.float16 and str(torch_device) in ["cpu", "mps"]: + if module.dtype == torch.float16 and str(torch_device) in ["cpu"]: logger.warning( - "Pipelines loaded with `torch_dtype=torch.float16` cannot run with `cpu` or `mps` device. It" - " is not recommended to move them to `cpu` or `mps` as running them will fail. Please make" - " sure to use a `cuda` device to run the pipeline in inference. due to the lack of support for" - " `float16` operations on those devices in PyTorch. Please remove the" - " `torch_dtype=torch.float16` argument, or use a `cuda` device to run inference." + "Pipelines loaded with `torch_dtype=torch.float16` cannot run with `cpu` device. It" + " is not recommended to move them to `cpu` as running them will fail. Please make" + " sure to use an accelerator to run the pipeline in inference, due to the lack of" + " support for`float16` operations on this device in PyTorch. Please, remove the" + " `torch_dtype=torch.float16` argument, or use another device for inference." ) module.to(torch_device) return self From cd502b25cf0debac6f98d27a6638ef95208d1ea2 Mon Sep 17 00:00:00 2001 From: "Duong A. Nguyen" <38061659+duongna21@users.noreply.github.com> Date: Mon, 7 Nov 2022 19:34:45 +0700 Subject: [PATCH 42/88] Fix typo latens -> latents (#1171) fix typo --- examples/community/composable_stable_diffusion.py | 2 +- examples/community/imagic_stable_diffusion.py | 2 +- examples/community/interpolate_stable_diffusion.py | 2 +- examples/community/lpw_stable_diffusion.py | 2 +- examples/community/seed_resize_stable_diffusion.py | 2 +- examples/community/stable_diffusion_mega.py | 2 +- examples/community/wildcard_stable_diffusion.py | 2 +- .../pipelines/latent_diffusion/pipeline_latent_diffusion.py | 2 +- .../latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py | 2 +- .../pipelines/stable_diffusion/pipeline_cycle_diffusion.py | 2 +- .../stable_diffusion/pipeline_flax_stable_diffusion.py | 2 +- .../stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py | 2 +- .../stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py | 2 +- .../pipelines/stable_diffusion/pipeline_stable_diffusion.py | 2 +- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 2 +- .../stable_diffusion/pipeline_stable_diffusion_inpaint.py | 2 +- .../pipeline_stable_diffusion_inpaint_legacy.py | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/community/composable_stable_diffusion.py b/examples/community/composable_stable_diffusion.py index 10d34d255a..eb207e1bdd 100644 --- a/examples/community/composable_stable_diffusion.py +++ b/examples/community/composable_stable_diffusion.py @@ -32,7 +32,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offsensive or harmful. diff --git a/examples/community/imagic_stable_diffusion.py b/examples/community/imagic_stable_diffusion.py index 92aa677b46..0c95fb4358 100644 --- a/examples/community/imagic_stable_diffusion.py +++ b/examples/community/imagic_stable_diffusion.py @@ -54,7 +54,7 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offsensive or harmful. diff --git a/examples/community/interpolate_stable_diffusion.py b/examples/community/interpolate_stable_diffusion.py index de1c6f687a..761aaeca69 100644 --- a/examples/community/interpolate_stable_diffusion.py +++ b/examples/community/interpolate_stable_diffusion.py @@ -65,7 +65,7 @@ class StableDiffusionWalkPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/examples/community/lpw_stable_diffusion.py b/examples/community/lpw_stable_diffusion.py index 39a266dec9..e4ee7bf3c6 100644 --- a/examples/community/lpw_stable_diffusion.py +++ b/examples/community/lpw_stable_diffusion.py @@ -398,7 +398,7 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/examples/community/seed_resize_stable_diffusion.py b/examples/community/seed_resize_stable_diffusion.py index f912663a68..92cd1c04f9 100644 --- a/examples/community/seed_resize_stable_diffusion.py +++ b/examples/community/seed_resize_stable_diffusion.py @@ -37,7 +37,7 @@ class SeedResizeStableDiffusionPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/examples/community/stable_diffusion_mega.py b/examples/community/stable_diffusion_mega.py index 7239519415..67112b282b 100644 --- a/examples/community/stable_diffusion_mega.py +++ b/examples/community/stable_diffusion_mega.py @@ -42,7 +42,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionMegaSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/examples/community/wildcard_stable_diffusion.py b/examples/community/wildcard_stable_diffusion.py index b0f6375d50..9ad0d8e9fa 100644 --- a/examples/community/wildcard_stable_diffusion.py +++ b/examples/community/wildcard_stable_diffusion.py @@ -99,7 +99,7 @@ class WildcardStableDiffusionPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py index d894886284..cfa3994913 100644 --- a/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +++ b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py @@ -32,7 +32,7 @@ class LDMTextToImagePipeline(DiffusionPipeline): [BertTokenizer](https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. """ diff --git a/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py b/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py index a7ffb4adc9..c8da6f193e 100644 --- a/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +++ b/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py @@ -18,7 +18,7 @@ class LDMPipeline(DiffusionPipeline): Vector-quantized (VQ) Model to encode and decode images to and from latent representations. unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - [`DDIMScheduler`] is to be used in combination with `unet` to denoise the encoded image latens. + [`DDIMScheduler`] is to be used in combination with `unet` to denoise the encoded image latents. """ def __init__(self, vqvae: VQModel, unet: UNet2DModel, scheduler: DDIMScheduler): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index f38e57983f..3d2ec7d55b 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -107,7 +107,7 @@ class CycleDiffusionPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py index 5a910f8453..73ec322c66 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py @@ -47,7 +47,7 @@ class FlaxStableDiffusionPipeline(FlaxDiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`FlaxUNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`FlaxDDIMScheduler`], [`FlaxLMSDiscreteScheduler`], [`FlaxPNDMScheduler`], or [`FlaxDPMSolverMultistepScheduler`]. safety_checker ([`FlaxStableDiffusionSafetyChecker`]): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index 5e6b2e6f2f..04ecdbecc6 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -46,7 +46,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py index 2ce9831a16..517242921d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py @@ -59,7 +59,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 094841f977..9c7edabf69 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -44,7 +44,7 @@ class StableDiffusionPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 08b14b36be..f6f38ab1d3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -55,7 +55,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 34e8231c63..a7af1c9d33 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -56,7 +56,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 5c06b74bfa..91dcefc91d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -62,7 +62,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. scheduler ([`SchedulerMixin`]): - A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. safety_checker ([`StableDiffusionSafetyChecker`]): Classification module that estimates whether generated images could be considered offensive or harmful. From 0dd8c6b4dbab4069de9ed1cafb53cbd495873879 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 14:32:51 +0100 Subject: [PATCH 43/88] Fix community pipeline links (#1162) * Change title to match the sidebar in _toctree. * Fix custom pipe link, add link to contribute. * Fix community pipeline links. --- README.md | 2 +- docs/source/using-diffusers/contribute_pipeline.mdx | 2 +- docs/source/using-diffusers/custom_pipeline_overview.mdx | 2 +- src/diffusers/pipeline_utils.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9c44cff2c8..d3ac5702cd 100644 --- a/README.md +++ b/README.md @@ -353,7 +353,7 @@ Textual Inversion is a technique for capturing novel concepts from a small numbe ## Stable Diffusion Community Pipelines -The release of Stable Diffusion as an open source model has fostered a lot of interesting ideas and experimentation. Our [Community Examples folder](https://github.com/huggingface/diffusers/tree/main/examples/community) contains many ideas worth exploring, like interpolating to create animated videos, using CLIP Guidance for additional prompt fidelity, term weighting, and much more! Take a look and [contribute your own](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipelines). +The release of Stable Diffusion as an open source model has fostered a lot of interesting ideas and experimentation. Our [Community Examples folder](https://github.com/huggingface/diffusers/tree/main/examples/community) contains many ideas worth exploring, like interpolating to create animated videos, using CLIP Guidance for additional prompt fidelity, term weighting, and much more! [Take a look](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview) and [contribute your own](https://huggingface.co/docs/diffusers/using-diffusers/contribute_pipeline). ## Other Examples diff --git a/docs/source/using-diffusers/contribute_pipeline.mdx b/docs/source/using-diffusers/contribute_pipeline.mdx index 6122a996e1..18e84cdfbc 100644 --- a/docs/source/using-diffusers/contribute_pipeline.mdx +++ b/docs/source/using-diffusers/contribute_pipeline.mdx @@ -128,7 +128,7 @@ pipe = DiffusionPipeline.from_pretrained("google/ddpm-cifar10-32", custom_pipeli pipe() ``` -Another way to upload your custom_pipeline, besides sending a PR, is uploading the code that contains it to the Hugging Face Hub, [as exemplified here](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipelines#loading-custom-pipelines-from-the-hub). +Another way to upload your custom_pipeline, besides sending a PR, is uploading the code that contains it to the Hugging Face Hub, [as exemplified here](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview#loading-custom-pipelines-from-the-hub). **Try it out now - it works!** diff --git a/docs/source/using-diffusers/custom_pipeline_overview.mdx b/docs/source/using-diffusers/custom_pipeline_overview.mdx index b1e9c0ce61..ae5bad2d7b 100644 --- a/docs/source/using-diffusers/custom_pipeline_overview.mdx +++ b/docs/source/using-diffusers/custom_pipeline_overview.mdx @@ -10,7 +10,7 @@ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express o specific language governing permissions and limitations under the License. --> -# Loading and Saving Custom Pipelines +# Loading and Adding Custom Pipelines Diffusers allows you to conveniently load any custom pipeline from the Hugging Face Hub as well as any [official community pipeline](https://github.com/huggingface/diffusers/tree/main/examples/community) via the [`DiffusionPipeline`] class. diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index a708d0cfb5..628e632012 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -303,8 +303,8 @@ class DiffusionPipeline(ConfigMixin): For more information on how to load and create custom pipelines, please have a look at [Loading and - Creating Custom - Pipelines](https://huggingface.co/docs/diffusers/main/en/using-diffusers/custom_pipelines) + Adding Custom + Pipelines](https://huggingface.co/docs/diffusers/using-diffusers/custom_pipeline_overview) torch_dtype (`str` or `torch.dtype`, *optional*): force_download (`bool`, *optional*, defaults to `False`): From b500df11559265857d6b51685affdc13822f625f Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 7 Nov 2022 17:15:41 +0100 Subject: [PATCH 44/88] [Docs] Add loading script (#1174) * add loading script * Apply suggestions from code review * Apply suggestions from code review Co-authored-by: Anton Lozhkov Co-authored-by: Suraj Patil * correct * Apply suggestions from code review Co-authored-by: Pedro Cuenca * uP Co-authored-by: Anton Lozhkov Co-authored-by: Suraj Patil Co-authored-by: Pedro Cuenca --- docs/source/using-diffusers/loading.mdx | 368 +++++++++++++++++++++++- 1 file changed, 364 insertions(+), 4 deletions(-) diff --git a/docs/source/using-diffusers/loading.mdx b/docs/source/using-diffusers/loading.mdx index 35f53b5664..35f1e0f928 100644 --- a/docs/source/using-diffusers/loading.mdx +++ b/docs/source/using-diffusers/loading.mdx @@ -12,7 +12,370 @@ specific language governing permissions and limitations under the License. # Loading -The core functionality for saving and loading systems in `Diffusers` is the HuggingFace Hub. +A core premise of the diffusers library is to make diffusion models **as accessible as possible**. +Accessibility is therefore achieved by providing an API to load complete diffusion pipelines as well as individual components with a single line of code. + +In the following we explain in-detail how to easily load: + +- *Complete Diffusion Pipelines* via the [`DiffusionPipeline.from_pretrained`] +- *Diffusion Models* via [`ModelMixin.from_pretrained`] +- *Schedulers* via [`ConfigMixin.from_config`] + +## Loading pipelines + +The [`DiffusionPipeline`] class is the easiest way to access any diffusion model that is [available on the Hub](https://huggingface.co/models?library=diffusers). Let's look at an example on how to download [CompVis' Latent Diffusion model](https://huggingface.co/CompVis/ldm-text2im-large-256). + +```python +from diffusers import DiffusionPipeline + +repo_id = "CompVis/ldm-text2im-large-256" +ldm = DiffusionPipeline.from_pretrained(repo_id) +``` + +Here [`DiffusionPipeline`] automatically detects the correct pipeline (*i.e.* [`LDMTextToImagePipeline`]), downloads and caches all required configuration and weight files (if not already done so), and finally returns a pipeline instance, called `ldm`. +The pipeline instance can then be called using [`LDMTextToImagePipeline.__call__`] (i.e., `ldm("image of a astronaut riding a horse")`) for text-to-image generation. + +Instead of using the generic [`DiffusionPipeline`] class for loading, you can also load the appropriate pipeline class directly. The code snippet above yields the same instance as when doing: + +```python +from diffusers import LDMTextToImagePipeline + +repo_id = "CompVis/ldm-text2im-large-256" +ldm = LDMTextToImagePipeline.from_pretrained(repo_id) +``` + +Diffusion pipelines like `LDMTextToImagePipeline` often consist of multiple components. These components can be both parameterized models, such as `"unet"`, `"vqvae"` and "bert", tokenizers or schedulers. These components can interact in complex ways with each other when using the pipeline in inference, *e.g.* for [`LDMTextToImagePipeline`] or [`StableDiffusionPipeline`] the inference call is explained [here](https://huggingface.co/blog/stable_diffusion#how-does-stable-diffusion-work). +The purpose of the [pipeline classes](./api/overview#diffusers-summary) is to wrap the complexity of these diffusion systems and give the user an easy-to-use API while staying flexible for customization, as will be shown later. + +### Loading pipelines that require access request + +Due to the capabilities of diffusion models to generate extremely realistic images, there is a certain danger that such models might be misused for unwanted applications, *e.g.* generating pornography or violent images. +In order to minimize the possibility of such unsolicited use cases, some of the most powerful diffusion models require users to acknowledge a license before being able to use the model. If the user does not agree to the license, the pipeline cannot be downloaded. +If you try to load [`runwayml/stable-diffusion-v1-5`](https://huggingface.co/runwayml/stable-diffusion-v1-5) the same way as done previously: + +```python +from diffusers import DiffusionPipeline + +repo_id = "runwayml/stable-diffusion-v1-5" +stable_diffusion = DiffusionPipeline.from_pretrained(repo_id) +``` + +it will only work if you have both *click-accepted* the license on [the model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) and are logged into the Hugging Face Hub. Otherwise you will get an error message +such as the following: + +``` +OSError: runwayml/stable-diffusion-v1-5 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' +If this is a private repository, make sure to pass a token having permission to this repo with `use_auth_token` or log in with `huggingface-cli login` +``` + +Therefore, we need to make sure to *click-accept* the license. You can do this by simply visiting +the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) and clicking on "Agree and access repository": + +![access_request](https://github.com/patrickvonplaten/scientific_images/blob/master/access_request.png) + +Second, you need to login with your access token: + +``` +huggingface-cli login +``` + +before trying to load the model. Or alternatively, you can pass [your access token](https://huggingface.co/docs/hub/security-tokens#user-access-tokens) directly via the flag `use_auth_token`. In this case you do **not** need +to run `huggingface-cli login` before: + +```python +from diffusers import DiffusionPipeline + +repo_id = "runwayml/stable-diffusion-v1-5" +stable_diffusion = DiffusionPipeline.from_pretrained(repo_id, use_auth_token="") +``` + +The final option to use pipelines that require access without having to rely on the Hugging Face Hub is to load the pipeline locally as explained in the next section. + +### Loading pipelines locally + +If you prefer to have complete control over the pipeline and its corresponding files or, as said before, if you want to use pipelines that require an access request without having to be connected to the Hugging Face Hub, +we recommend loading pipelines locally. + +To load a diffusion pipeline locally, you first need to manually download the whole folder structure on your local disk and then pass a local path to the [`DiffusionPipeline.from_pretrained`]. Let's again look at an example for +[CompVis' Latent Diffusion model](https://huggingface.co/CompVis/ldm-text2im-large-256). + +First, you should make use of [`git-lfs`](https://git-lfs.github.com/) to download the whole folder structure that has been uploaded to the [model repository](https://huggingface.co/CompVis/ldm-text2im-large-256/tree/main): + +``` +git lfs install +git clone https://huggingface.co/runwayml/stable-diffusion-v1-5 +``` + +The command above will create a local folder called `./stable-diffusion-v1-5` on your disk. +Now, all you have to do is to simply pass the local folder path to `from_pretrained`: + +```python +from diffusers import DiffusionPipeline + +repo_id = "./stable-diffusion-v1-5" +stable_diffusion = DiffusionPipeline.from_pretrained(repo_id) +``` + +If `repo_id` is a local path, as it is the case here, [`DiffusionPipeline.from_pretrained`] will automatically detect it and therefore not try to download any files from the Hub. +While we usually recommend to load weights directly from the Hub to be certain to stay up to date with the newest changes, loading pipelines locally should be preferred if one +wants to stay anonymous, self-contained applications, etc... + +### Loading customized pipelines + +Advanced users that want to load customized versions of diffusion pipelines can do so by swapping any of the default components, *e.g.* the scheduler, with other scheduler classes. +A classical use case of this functionality is to swap the scheduler. [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) uses the [`PNDMScheduler`] by default which is generally not the most performant scheduler. Since the release +of stable diffusion, multiple improved schedulers have been published. To use those, the user has to manually load their preferred scheduler and pass it into [`DiffusionPipeline.from_pretrained`]. + +*E.g.* to use [`EulerDiscreteScheduler`] or [`DPMSolverMultistepScheduler`] to have a better quality vs. generation speed trade-off for inference, one could load them as follows: + +```python +from diffusers import DiffusionPipeline, EulerDiscreteScheduler, DPMSolverMultistepScheduler + +repo_id = "runwayml/stable-diffusion-v1-5" + +scheduler = EulerDiscreteScheduler.from_config(repo_id, subfolder="scheduler") +# or +# scheduler = DPMSolverMultistepScheduler.from_config(repo_id, subfolder="scheduler") + +stable_diffusion = DiffusionPipeline.from_pretrained(repo_id, scheduler=scheduler) +``` + +Three things are worth paying attention to here. +- First, the scheduler is loaded with [`ConfigMixin.from_config`] since it only depends on a configuration file and not any parameterized weights +- Second, the scheduler is loaded with a function argument, called `subfolder="scheduler"` as the configuration of stable diffusion's scheduling is defined in a [subfolder of the official pipeline repository](https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/scheduler) +- Third, the scheduler instance can simply be passed with the `scheduler` keyword argument to [`DiffusionPipeline.from_pretrained`]. This works because the [`StableDiffusionPipeline`] defines its scheduler with the `scheduler` attribute. It's not possible to use a different name, such as `sampler=scheduler` since `sampler` is not a defined keyword for [`StableDiffusionPipeline.__init__`] + +Not only the scheduler components can be customized for diffusion pipelines; in theory, all components of a pipeline can be customized. In practice, however, it often only makes sense to switch out a component that has **compatible** alternatives to what the pipeline expects. +Many scheduler classes are compatible with each other as can be seen [here](https://github.com/huggingface/diffusers/blob/0dd8c6b4dbab4069de9ed1cafb53cbd495873879/src/diffusers/schedulers/scheduling_ddim.py#L112). This is not always the case for other components, such as the `"unet"`. + +One special case that can also be customized is the `"safety_checker"` of stable diffusion. If you believe the safety checker doesn't serve you any good, you can simply disable it by passing `None`: + +```python +from diffusers import DiffusionPipeline, EulerDiscreteScheduler, DPMSolverMultistepScheduler + +stable_diffusion = DiffusionPipeline.from_pretrained(repo_id, safety_checker=None) +``` + +Another common use case is to reuse the same components in multiple pipelines, *e.g.* the weights and configurations of [`"runwayml/stable-diffusion-v1-5"`](https://huggingface.co/runwayml/stable-diffusion-v1-5) can be used for both [`StableDiffusionPipeline`] and [`StableDiffusionImg2ImgPipeline`] and we might not want to +use the exact same weights into RAM twice. In this case, customizing all the input instances would help us +to only load the weights into RAM once: + +```python +from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline + +model_id = "runwayml/stable-diffusion-v1-5" +stable_diffusion_txt2img = StableDiffusionPipeline.from_pretrained(model_id) + +components = stable_diffusion_txt2img.components + +# weights are not reloaded into RAM +stable_diffusion_img2img = StableDiffusionImg2ImgPipeline(**components) +``` + +Note how the above code snippet makes use of [`DiffusionPipeline.components`]. + +### How does loading work? + +As a class method, [`DiffusionPipeline.from_pretrained`] is responsible for two things: +- Download the latest version of the folder structure required to run the `repo_id` with `diffusers` and cache them. If the latest folder structure is available in the local cache, [`DiffusionPipeline.from_pretrained`] will simply reuse the cache and **not** re-download the files. +- Load the cached weights into the _correct_ pipeline class – one of the [officially supported pipeline classes](./api/overview#diffusers-summary) - and return an instance of the class. The _correct_ pipeline class is thereby retrieved from the `model_index.json` file. + +The underlying folder structure of diffusion pipelines correspond 1-to-1 to their corresponding class instances, *e.g.* [`LDMTextToImagePipeline`] for [`CompVis/ldm-text2im-large-256`](https://huggingface.co/CompVis/ldm-text2im-large-256) +This can be understood better by looking at an example. Let's print out pipeline class instance `pipeline` we just defined: + +```python +from diffusers import DiffusionPipeline + +repo_id = "CompVis/ldm-text2im-large-256" +ldm = DiffusionPipeline.from_pretrained(repo_id) +print(ldm) +``` + +*Output*: +``` +LDMTextToImagePipeline { + "bert": [ + "latent_diffusion", + "LDMBertModel" + ], + "scheduler": [ + "diffusers", + "DDIMScheduler" + ], + "tokenizer": [ + "transformers", + "BertTokenizer" + ], + "unet": [ + "diffusers", + "UNet2DConditionModel" + ], + "vqvae": [ + "diffusers", + "AutoencoderKL" + ] +} +``` + +First, we see that the official pipeline is the [`LDMTextToImagePipeline`], and second we see that the `LDMTextToImagePipeline` consists of 5 components: +- `"bert"` of class `LDMBertModel` as defined [in the pipeline](https://github.com/huggingface/diffusers/blob/cd502b25cf0debac6f98d27a6638ef95208d1ea2/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py#L664) +- `"scheduler"` of class [`DDIMScheduler`] +- `"tokenizer"` of class `BertTokenizer` as defined [in `transformers`](https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertTokenizer) +- `"unet"` of class [`UNet2DConditionModel`] +- `"vqvae"` of class [`AutoencoderKL`] + +Let's now compare the pipeline instance to the folder structure of the model repository `CompVis/ldm-text2im-large-256`. Looking at the folder structure of [`CompVis/ldm-text2im-large-256`](https://huggingface.co/CompVis/ldm-text2im-large-256/tree/main) on the Hub, we can see it matches 1-to-1 the printed out instance of `LDMTextToImagePipeline` above: + +``` +. +├── bert +│   ├── config.json +│   └── pytorch_model.bin +├── model_index.json +├── scheduler +│   └── scheduler_config.json +├── tokenizer +│   ├── special_tokens_map.json +│   ├── tokenizer_config.json +│   └── vocab.txt +├── unet +│   ├── config.json +│   └── diffusion_pytorch_model.bin +└── vqvae + ├── config.json + └── diffusion_pytorch_model.bin +``` + +As we can see each attribute of the instance of `LDMTextToImagePipeline` has its configuration and possibly weights defined in a subfolder that is called **exactly** like the class attribute (`"bert"`, `"scheduler"`, `"tokenizer"`, `"unet"`, `"vqvae"`). Importantly, every pipeline expects a `model_index.json` file that tells the `DiffusionPipeline` both: +- which pipeline class should be loaded, and +- what sub-classes from which library are stored in which subfolders + +In the case of `CompVis/ldm-text2im-large-256` the `model_index.json` is therefore defined as follows: + +``` +{ + "_class_name": "LDMTextToImagePipeline", + "_diffusers_version": "0.0.4", + "bert": [ + "latent_diffusion", + "LDMBertModel" + ], + "scheduler": [ + "diffusers", + "DDIMScheduler" + ], + "tokenizer": [ + "transformers", + "BertTokenizer" + ], + "unet": [ + "diffusers", + "UNet2DConditionModel" + ], + "vqvae": [ + "diffusers", + "AutoencoderKL" + ] +} +``` + +- `_class_name` tells `DiffusionPipeline` which pipeline class should be loaded. +- `_diffusers_version` can be useful to know under which `diffusers` version this model was created. +- Every component of the pipeline is then defined under the form: +``` +"name" : [ + "library", + "class" +] +``` + - The `"name"` field corresponds both to the name of the subfolder in which the configuration and weights are stored as well as the attribute name of the pipeline class (as can be seen [here](https://huggingface.co/CompVis/ldm-text2im-large-256/tree/main/bert) and [here](https://github.com/huggingface/diffusers/blob/cd502b25cf0debac6f98d27a6638ef95208d1ea2/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py#L42) + - The `"library"` field corresponds to the name of the library, *e.g.* `diffusers` or `transformers` from which the `"class"` should be loaded + - The `"class"` field corresponds to the name of the class, *e.g.* [`BertTokenizer`](https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertTokenizer) or [`UNet2DConditionModel`] + + +## Loading models + +Models as defined under [src/diffusers/models](https://github.com/huggingface/diffusers/tree/main/src/diffusers/models) can be loaded via the [`ModelMixin.from_pretrained`] function. The API is very similar the [`DiffusionPipeline.from_pretrained`] and works in the same way: +- Download the latest version of the model weights and configuration with `diffusers` and cache them. If the latest files are available in the local cache, [`ModelMixin.from_pretrained`] will simply reuse the cache and **not** re-download the files. +- Load the cached weights into the _defined_ model class - one of [the existing model classes](./api/models) - and return an instance of the class. + +In constrast to [`DiffusionPipeline.from_pretrained`], models rely on fewer files that usually don't require a folder structure, but just a `diffusion_pytorch_model.bin` and `config.json` file. + +Let's look at an example: + +```python +from diffusers import UNet2DConditionModel + +repo_id = "CompVis/ldm-text2im-large-256" +model = UNet2DConditionModel.from_pretrained(repo_id, subfolder="unet") +``` + +Note how we have to define the `subfolder="unet"` argument to tell [`ModelMixin.from_pretrained`] that the model weights are located in a [subfolder of the repository](https://huggingface.co/CompVis/ldm-text2im-large-256/tree/main/unet). + +As explained in [Loading customized pipelines]("./using-diffusers/loading#loading-customized-pipelines"), one can pass a loaded model to a diffusion pipeline, via [`DiffusionPipeline.from_pretrained`]: + +```python +from diffusers import DiffusionPipeline + +repo_id = "CompVis/ldm-text2im-large-256" +ldm = DiffusionPipeline.from_pretrained(repo_id, unet=model) +``` + +If the model files can be found directly at the root level, which is usually only the case for some very simple diffusion models, such as [`google/ddpm-cifar10-32`](https://huggingface.co/google/ddpm-cifar10-32), we don't +need to pass a `subfolder` argument: + +```python +from diffusers import UNet2DModel + +repo_id = "google/ddpm-cifar10-32" +model = UNet2DModel.from_pretrained(repo_id) +``` + +## Loading schedulers + +Schedulers cannot be loaded via a `from_pretrained` method, but instead rely on [`ConfigMixin.from_config`]. Schedulers are **not parameterized** or **trained**, but instead purely defined by a configuration file. +Therefore the loading method was given a different name here. + +In constrast to pipelines or models, loading schedulers does not consume any significant amount of memory and the same configuration file can often be used for a variety of different schedulers. +For example, all of: + +- [`DDPMScheduler`] +- [`DDIMScheduler`] +- [`PNDMScheduler`] +- [`LMSDiscreteScheduler`] +- [`EulerDiscreteScheduler`] +- [`EulerAncestralDiscreteScheduler`] +- [`DPMSolverMultistepScheduler`] + +are compatible with [`StableDiffusionPipeline`] and therefore the same scheduler configuration file can be loaded in any of those classes: + +```python +from diffusers import StableDiffusionPipeline +from diffusers import ( + DDPMScheduler, + DDIMScheduler, + PNDMScheduler, + LMSDiscreteScheduler, + EulerDiscreteScheduler, + EulerAncestralDiscreteScheduler, + DPMSolverMultistepScheduler, +) + +repo_id = "runwayml/stable-diffusion-v1-5" + +ddpm = DDPMScheduler.from_config(repo_id, subfolder="scheduler") +ddim = DDIMScheduler.from_config(repo_id, subfolder="scheduler") +pndm = PNDMScheduler.from_config(repo_id, subfolder="scheduler") +lms = LMSDiscreteScheduler.from_config(repo_id, subfolder="scheduler") +euler_anc = EulerAncestralDiscreteScheduler.from_config(repo_id, subfolder="scheduler") +euler = EulerDiscreteScheduler.from_config(repo_id, subfolder="scheduler") +dpm = DPMSolverMultistepScheduler.from_config(repo_id, subfolder="scheduler") + +# replace `dpm` with any of `ddpm`, `ddim`, `pndm`, `lms`, `euler`, `euler_anc` +pipeline = StableDiffusionPipeline.from_pretrained(repo_id, scheduler=dpm) +``` + +## API [[autodoc]] modeling_utils.ModelMixin - from_pretrained @@ -29,6 +392,3 @@ The core functionality for saving and loading systems in `Diffusers` is the Hugg [[autodoc]] pipeline_flax_utils.FlaxDiffusionPipeline - from_pretrained - save_pretrained - - -Under further construction 🚧, open a [PR](https://github.com/huggingface/diffusers/compare) if you want to contribute! From de7536281a029167c34d30c9b5d822b19caabb69 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 7 Nov 2022 17:25:13 +0100 Subject: [PATCH 45/88] fix image docs --- docs/source/imgs/access_request.png | Bin 0 -> 104814 bytes docs/source/using-diffusers/loading.mdx | 6 +++++- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 docs/source/imgs/access_request.png diff --git a/docs/source/imgs/access_request.png b/docs/source/imgs/access_request.png new file mode 100644 index 0000000000000000000000000000000000000000..33c6abc88dfb226e929b44c30c173c787b407045 GIT binary patch literal 104814 zcmaI7Wk6fa_B~8n3Z)b%(Be|O6f5rVw0N-s#fn>ScTEkqBE3AkzrY3PUkv1E<;s6h~eK&{T;ANn#UgnAY z$fZyIpwn#4#O~r!W9UReip8*?q8{5pV}h2Rf?U4a1b$e-LuRbp-@h?nd(p|CO_EHo zWd8A$q$Ox-T6cw?>7Th9oelV{OGI=p<@4!aJb0y9_@O{O=iQG#U%V@RkjPo&I8V;a zsZD5V8@T`Xb~0&H`$6bcWH&?he=^^dV$s|ZQ(swkUmyY+1K8=_Dh76%r11miH2%lvAM>>>UbnzN{M!qsy8my>nyX>%%8?;|uC##;40e)z&!`mc z{{?khzC;k|tQ?t9|G)4HDxN3O8Moih|9>b^(4B0(N)}yF`=36zt!byFnHbly4O|Ya{%w_eF zTvU9mm#A2O)($dj&s!?FhHn4ZHxaG^@7mEh&j2`Vv}n%7d$CHK{%_NeKGJ}jGMvzna@o0ws)TeJ82j+0BH+m=B(f}d~E)FPGULEB7 zqU)4hA#Bu|R1z+jjm^JQ=9ZkC_ARkJuGD^I%@R>i%216jG+ z9LqCKbYk@9;nRq;lb5wj?=1t&77yb&$J6z;kosf4R$!J*9#8phj%v3$EJ_1xkO!W> zmUn#&tgGr@<8&{lf3;o4NbJ0uy!%yAsQKY-os5 z1yU8FrTZI#&BAlB_C@4iFja6Y7A*O9T;A|3n~q_M8XAl!g){j}R&u4vpuq9r%8a=+M2#7qA_pu}V)*k7sqvH5i(KO-JMO={i69 zCm9f%!p4|eV-9B0e&2Wl+(^Na0*T#@euE1&8%~25HW>rrH_I37*Bh3*LZeXs4+XwN>twoeXas(>mj0+ot~> zeLGuw+J1BeQv7;0T@om@ z6tXQPF!-H7j?CBnWE5n`xHt4W&O>3``+P%dqUREV!NoUMNrS!w>L!sEzwxZI2|^!) zAAP9JlhUXgMRV^3XbiMe$j36z_1Y&^c|wgEK$=5nuAA#7GL@a)HSy?iJ%aGHN3;My zS=0Qp^h+rM*h+Wt#SxVN!3|qwP*u;tyr@|7wX(|@72VwASGC$)XM>Cyr%h_~VIDOj zD)T`-7qBZ@TtkN4EfrM$Vm_6aGVe*gO7PcVb^fQT6Puj(OYf|2eR?ej!7;Jx1P;#GGyguL^1;fzeU|AK% z>E(m%CA7VtXR*(g%%~e7oZh2plf>BGR++cwIL9(yikl8nve$?(*EWXp!JSZK0m@ z#+%5U`RoN&%gJXv$Z!^0b6S!(NTIeR@A}|DF=q*=Yus;XSpB~vglL%#>ga%14PU%` zl`3iIk)1{x4(tdRH{?P~jYyzDSLAR6$CD<%Cr{tp+}kK)*uXnPV72&zV2`161wc0h z-}=+~=t1Md*g+jR6#y~QTdU2|*!l1z8xV(DD&aLcu$>91Mu;`U5jp=gd&NSTY39$YQ zfJK9fYSu(wqPpx`sYTEBHMWMTof8e<2n)G(8jokYrL3)7bEIAu4tOeZ^IL_Fbt@MY z(DR&u!5JUha0#O5mv7RSr@>VFs9R5MNQ=iN+3$!IzTOh_-x@7NANIYXCy(_Y7~(YO z8Uu!`j6DbM?@1xO9*Rm1d|lcSyWY1WL34)HqCLfpH1GWgEY|(rJ5%^Z?0l6zz4?Zs zK?MI&Ct9A9YCJK^n^t&(7wgnt35llBUuYGlIuCgm-K-#UXr69R@qIvV`8TR3+)rmc zrIkJ7N2jPaY3)Sw|$;$B`D>sJSX6(3`sc!ZEt+ zu-BIiw!}apQ4s|CQMy$a){81j%AQ4R>a`>nf^Di+>rh1D-ovBx}g+QeY~_4Q`$W}hho zHU5L{s~JiRC}VFl7!dzMb*{4xpSE&823S$qH_c~hL~Zh$x>Dy@QlozK06$u+f<~R0 z_oiG}3EBL$SkpV+nSBhKY!C9uANK~Gi(NaE(*3K)ArH7jQ6b5mg>P2pwERG+Ih&(~ zuJ&kmSuWXKlbsYFLEufhuUl{HjeYnVm5H6mQgbwavtW17HG4E&MLPAPyU-pdn^AX1 z)iMpXT9t4{`gZ}P+UzBd5Ii?5Pz6;mEknns-x1K{X0KYlDk@}UMI}cc)|7eg;8R8Q zEha=i%>U^>`Y84te5qQse=6mO(|3rrz0D)oKq2%Y{>EX4q~{1=A7Q>Ao=^mCjC?pI zJ8TV4ZThZ(xmMrQRQg?j6b@O|6?8eN7IfJ=#|2q}O+I(--4(Mw=0o$CJW}X%FrRVX zD7%>_=q;-7GLJPxh+cDY70)~*RR#1i0FCFW&60igA{k|MKaJI2w)f~d!6q0ztmR;a zmZPC>YaCdQby@?i4}iJ6$ig>_>E0p0l7K`MBIvd2!ZQd8S$pj~Ll!{24Th`Eon93j zTTVL8b*W+#s5KrAxA^=1`ZYuJ{JEya%uFqA({*8nuHczRY4cWVoks^k9|*UF68t!+ zsg;u?ZR%|BZ&px7sBXN*O-uOw`^}Oay7*=qPS9pk z%m+g@irvu_l8*PRwObJ}?aE3WAZ4o|Q z2>o2pngvIn>DG2P&pG)JC@jsLlDr68uN{F6C;whsWm$Iz~G!?MR>pD2dIh2 zQ?sL^>mN4-h@!C$x?jW(G~~t{Z?KDJBW$gL$9#V~LG7`>xo3Hv)kq(c5Bx4&G7kND zHa(v|r7exq!{aZ7TIOtY@qQ<1m-&yms?W{iQ^ZxMh?t@F&bTkZdPP3R^;N$M#yd$A zfv>MoM<4=q+*&*OHtbg%**NuP?7+`}gD$sYZi6CUpxWIV!S#o5QEk zyU$*T7(AhmON*PIiCWHwU|zUW0Z*KsjU7CO;02^$8xG|To|Rg%$l7 zC6}pDTWJ;DPG&W>!Y&fn5e+9bdx!tt@0p0Zc(_VwogCZiE6~xN0q@s0j&(Gpg^Mqp zsa(jSrAO;BMyDlUeg;tSJV1RlLQ-j=()D5|R{zu!+0$l3eZMWBD~h``ONr3gO?aN) z$emjfi7n!}p&e5_q;W;Nx0ca{*bx!Y+?dsku=Sm=m$R^VjDx>9IZ-i###i_7D0|WN5NYfi2ESJ7~W` zB+Ss|dVzoHR~BZuFm=zs*Tql}d+Eb_wWWF#XvY#mF=VIR<|`}fdS1Rzz{J7?0ZmC! zEp-QUsRF&<6)gICqiUlwI(!Rw@=)4j%6aj{bE6HXJ@Fk|kr`*qOHIwX1X2YzbHrf> z5zBt9o{LWXC82{iXH>tOQ4fby1JU}lszpw`(2T2LUfB$aW!J4?s&88NqZXK!;}Z@+ z@GEWLEY<;>7Z2RDz1x_c7!6o<5A6!#AKH;RZ;j>lmo>&gQDG>i4!(bwtIVFT-0-8y#CquUd`e)-5VuZBzN=K`RQd)pSBo16;-*5Ce&2T5NS{zPM(%R*;(TwzD|7x4PcR5y(Z;gEO(agm-na+3cb*(j zRogbQ*J1{IetfAZ$Bt0VGa%c&$qYg1Y<|cys6O%}=ZRzTJaMIN|16M+uyU*}Yt1!C zXh7_^F@rjccWU9~83FwL)jFTJGFDo8T!k4>x>_o`Dyaxn-b)l1G-P>T^{IO~O`=NN za_W;K?zEA~=hs8%s=9UD(wk_(03Ri6pOa09bBuXWAK#SIaZ&g@Y2$tLn^z4x}UgU8AIP2D9@ zIvV`lfA4Mg8+OF$m$TP}>>b}gxX#!(+*jOk4V~>M=udu#TH##?YG)6&Mq+7pyu_Ecr^hM%qWq17a=;AC(v$gGwUVCwNFol#e; zH=mmq_!d_3a^OaPw~ZuEmg%bDKPNc2v3@SB43c_Qk2W3?IbDoPsHT3-s%v0yqYwzo zmLGaZq2xx?dSl=j)pA3SD_Ii;Gctfy^-NZ_9ytX_(vuMT)3uUQ4UXtrUa#%SC|&tF z+`1~WNGSY9APPm90cbDEI7CFvq&pRzi<5prNQvT0}Bexvgc@inacpPQ-FyG1-g9?e5+l%6hiqyIEQ(?%qyo z`|;Jas8ukZCxxNx49_HtXiktouty_pxN0^q7_4*TLX(5vxwLoWbpe_eYLmOd%f0H> zCn*oCE;@XP@Z{d8c7_1?&mTi)e@LxE=HoTVmAc<}1n6z*xBAS&-ygrh->IR&Gz;0D ziphVJCKn}k#d;0PW~|+)Of*`meDHj6YRGc>X0>vR)509yAesV3?3^S04X5kAR=j#W z0@|9BNDC`mCpU8M@%K;Y1OMb9aea;T>$Eeu)UIY-J`#2)wOlX3ACoa@Ua@a9JALFz zZXtTJXd#!)NX-(8!j8^zS9OH1l3xkaDBB?eKe#m4H%$LKi{~|%oY7p5gedD;f*;Q z#anvL0V|#wuw~Oa%18yoKHGM16pck1bbU#>U`KC^&DFM08(cESXf1uhbu?c;G2%Xp ze5iC&OuNFI!?V1jblMPwKW^W81T^T8{AazTUhH~^^kRq9l4mt&rw#xgTm8uHo@={+ z(7Repyt#PnIHG31MGmiSzu++3*+N|-|A850D7vlRzKrBZZlRXRZa0$l=hz04E52le zWFHA9Ivv@I3vWF)9}h2uP5BJL=9LPuGWP1his?(-orZ0ISMYV^SlXkH~THKq~u+l#AbQQ zJcl~p`kH*1_bvf^j$La>n@wN!pqYzY*~gCxRAB$LaengU6#6i$_g#s@Xw_PUgf1h?9szrP6lRy~;Tbjhra z?boeG0={V8;rD&wzAPRT&Ab&(n(Q(w(h!x_aqTSr=@BQ@WTGViI8d)~RbSd(2)}^{ z)wVh}X9{)4cL+m8D3vxCeFHtggd-w+$Tn@q?m-4-XD=^X> zwM#Ms_TQ7sNRpUwN%oQ6T%?Sd&bS7^UAJ5S8(Je0xBL3d-aB-n=`3~{6uCo>tYBDg z(|QyOBED6)oG-Dl=fF)&p;@n%I17Cj#tUcQuXf-iOHgk~*P|~u#{_5M{_dL3NsYwX zHXJATvW;2;{EiP;z6A!=q@QTs?S7-;i-LuDslw&jstSFAPu1M4Xq69CbEKyYol&y)ztu0!DuIOjlTh!wRfuhAhg5YaNYpmFnZh4_0t+ zaT&Tyt=)IPe+{ud13P{2k>I4KSC8g~;&K-i7UmZg78YDBw?r}X@x>1h52rMQg!~RA zTHjMvke2S!v$A?gM@JVxOd+@@ExX50Pwzq>sEe^t@<1QkE#hC5;Kaurcf25V+a_ZS zre8GOHv_)Y2rdisz^?yl6Tf?N7v_g-cxe8s?EGzK+|N>mTcfv^i#qN-{S8T}c_V@| z_WwuJxz%2*Z?%`dqKWysi24fG1NDF9lsi49Ps?yy{y!SXTQRm$>AZEr`^bj>$TEKg z8n2?ro&V-?JLCPYX!OVRu2-Y}@8R3~s&|m+M|}c8zXOc|CQevf4o06W_=p$fq`vs` zZ1hDqw`^WU=&kJXn|04gS9nA%hGYJT@IAN7MU?m){D?3jbpJ|4HH-g&O#N zY`|H5^Eb0RL(>Tqdmw;4E-*Z?+D)TN=HK-19>|$ay*)eAku2c77guYr`$JVgab5_# z{hR0>)8L6G%yo80Rl7R%`PiPtmLNuVlh2JC&6LyAPhkeM2M{2>z6h;VQn&6-JY8h;GH$odtdhXq6B=9~+(_Fo$$VIt5sj z4Nb-n)}mv=RrWCc=>-ZLt}h7%TNnFYIU*Cd%lR^UFbBr_YMFQPCNIe!yQALp&|q$umx?N|80a7j4)Ff| zcxKe~_sJZ0jUYG1>3mbUxiHv4^sV%jkVvr53+TzSsJD|`{icX0hW8Ia;HG49hDO3A zM-aXppgUH3`orqaFZlY-{j|yZI}CO{+~E@!-SNUy!-!XaIsW+cB{>~QPtIB3d`9QR zESPsfV)ArZ!jY z_8v&}5m$qQ*Ghf04!0tb#hmQ1PfI}kAV<6Pna0cVqrlc9_34uVy{Qbc9tXqAS@=aG zB7lZ$G6BIQ(XJR}n|yZ8Hg zfDsFOHUm|eQFEU6SNZIUz9r%sQQobj-1>uNzHu_`%jy0DPvYkj0_P7eL{N4&0Evro zK{MGd;W&_ovzr6%^GF8^Ap>F`qK8hhaw5SH=LKQt*tm%2_M;1I)}om~?2Fe{?|`On zKMy-x3kXLgN}yva*r2Olxt2y}xiVc}KAnjv1$BVJOK-DgJg@AjNsEXE5m39gT?AkA zSpaEL19k`TknUmIi(*95dJXAn1G(U^*ikZ0fj6Ngt`bauyt z_Dsa7$kS%#^O={`>6KecfSBMMO%KxItYSH7W;qoZ!2H)s@@wo_*+e^TM@Mo>fbAF~ z9|p)L)_5LzNnqe9bY&tcdQEjNv?9p$d^pBTC~SpjCNiAJWUTe@WkH8G-XP>HwDDZ* z*V{(t09EYNg?Sb7f2Q$U<`DVGd!y1`mM62j&f*_j+T2NN-v8_?k^x;hk8I6)yZdKW zmYkZNQek-5t%r|c^Dq}saz7s=rRDg~S%5&hn}wxXP{8ac26ch;eMnM*!1) zHBat>s~dHi5=<_ADayCuV0LzIc_uy5_Wn0l^xocbkVSvJQx-%IjOXcs1w>f?i_5yay0ASpX@vo+?75OxnSvbJgahJe=uZv z9C}?>fGcww>^`**ol4y}b$U|!Mmg0{Qr*@lKkFJ6GRHhmD z-H9WK#b59({t1yP_>mWLfXov93c`N-nW1kiXy2n57NKe4{J2wjW&281B;MUTF_^V0 zFRW(&Raw2O_aeEmnnk=p?b9DxuT9g^j5ZWsl%S7!Z{*BBde*%B;~gc6^74-+a#P-i zx4;O}k9a*%=4I~G?bhyk<8{bWpBj3Vcs2eYpg#=P(ADi8$-@p61`qtdH=IY#v7*~-NEP$q|q z?HY40eF}5EFx_nyMqU=<#&IJ}Cb~a(T?&h?$j`qq{?h|eqcU%#Gk8`F*ETN?4e({d(x-)reji!W0SXSIx{PHc2BdU zKbG9)QIYl{qoXT93jXuQPo!6cK4@!eEak6ms$s8rh-%qV&dA6R5)r+0w)S%wV^mOn zjWam%7#kZXp_I-4#bd-?Z@o$KPrZcJ9uP8oYKgb?&0p{lS=PjaG*(fvYq&k2aZKK` z+ai)bs8;#(e2XVTR=h>-N(d`VBc5M>xa7_2c6&r$PPYjs5LrKJiQs<2#5XY!^i`5p z>+Ri?bBJqME$ft0BL1gZclSl}j-*B8J3l1_WizCMM}BdHyzMulyBV3V-6rzIC`J2i zN4(f5!&I`P0F4VJlIp+J(9{f^4~V}7OnJrkhfjK!FdNc&l19`^O-;==P!sEY_HtP! z{>X+)$o;j`$Mhvy^UwbI!-%wv4m-*1&pyQS$jdEx;D!9~ZN0=4pTfc(&s5>~-TVH> zQ$nsQ{*Wmn6Ikhz7OMPSVX*u*;ziwxR2`|;XX1akWh5n}3CbE>Ij&rhx4X~G|AvJx z;S#T)v~-fdHtdfPdGXs-=xqnU{D`W<0(H#9eUGHIB*c+4fB~*y*knjNdUNOUb1_s&N(I zY-1>X#Zmq~^k>k})8EIubRPU?O9qs^eojmQa=DA&^gZi3(Np7${BM{TMgH~zJkjzuJe<7v3Gdl`@x}o$&X^Um!z*7T*y7Di!fBZJ^@aDwYdY;g|v%0H4n~B=d zbmgo^@PXLZfBVXsrzdU-Q^khjqROEG`nQqyanb}aCG19>)K|`4DtE^1^a$Q@$0$@S zXLfDzG~kE^_5>Z5f~6{YBw7C-LutlDls9r(6z-R|>deT02!d3cpBv5BwRS-~)mH?$ ztc5ZQj;Q+V|I+a^x~ZKvV?S}~oai_vMIO%IBqk# zATKsWEBsl!ezZZaNh1>ScZRdkwYeGFKjA-7l@eXo6REA)u>zD98_&`Ef_hq53)d4Y zqttai4z6yno#*B^s9ZPnAe6k(R;~gXcL?r3cJo&@aF0uorOGLw1WN*4Z&U{98Z*nr z%MP>NwIvTL>8iDJBnHPhD`Ucph#=3Ry8vr)Kb}$ZZJyt)wB`GSYy5s8M^c_f_n@GQ zdxXF|P<59*h}Y4kvodbxkCg;eV=OD%t2$R{A=ZuH``=vxUV0inhsJNP>74GpOK8^X zz0LUDTO$=&y}kBMzK~VrDwqol0hvs#~)5v zKZsbB zM~QlW2EIWW_~4OPGqDE{rMBf|*7JK^PM2RdSSq4b2B*vE%p;(a{|4CFU9ih|k6 z*Q1kXq&c6udt;>WlR9gd^=;7p{$@-A7 zVzq>(IRLtTN@#3ZZw42hU$s&VG)sIj1WDa;dznEdn@-v{x5JQBnfs-&^zlZ8tCT-` zuT;-ssR*?vanZlGLH9d|f8+lyhEB-r7te1hg7p zy>k_=t}ES6BdXl^V3u`G?@`xPMz$}7?O8+*4WOH1@p)a;m-~-*pNhJOBm-517lP7X zw7}p3oZ%fY9w82!6Kh?gEl=J=s!D$B6nG-L;{Xc51Qc8uX-Anww40WDNqlfjf4X-u z?`QGuvjhI{9Bjm^@8VaOux%R7pv6Xzy4!Hsu0ik-09M*+u_y9U#Hwyr-_vyE#YT7` zhGI>u+lHIENJ8vUm->cagI+>~mjP?}3Ls{I^k@7W3kz?d5fwMWbd%edz0z<_>&jls zf|D0oW4y%|Lv@yFV_kFf-7|L2G1N8hq;1rJJsds+kbOtHJR?IyEEz!#o<#YSYz_)X zv=(D?Bj`hgeV?w7u1^ZPnBREtS3-{S zA6(p*gwxFGf+!RQN`$A%Z%>qc=_cx(A~mq}_M)6Vi^u`O$mq?MH#2yWb22VPgEaeU zv^QRJ=SYf+$Pi6)MxkLtCRrPW&+W~j%3n7}SVx6J=7BL;Iv*pB+(P$-m|Ii&m-Vm- zVWp>TU;o*mDpO7I^S>Pz!Z9Y6Jx|HW74%!xlnmn)V=N`*W_p1Do=zfDeN_2Wmh$JT zesRMJ$svG?3xO&9)$1LNji!246*o>no}jvWX33S%9_^||65e7^;BWIGjbFQ~a}>c$ zE>DkI-R%=FWhNbJv1YgDbE($nL?R@d zOWkRB!zW5TraaePvrGs}(68K*RdeCq6K(6Kq#x72EQz0`%Cb>Ib5a`#MVa+eT{cek zqqo>Eb=Y+-2kBdh-boGuCY3uk$0poUv`4*2fI1X)_D1&RE`E$zTJc}RgF~H3*kd|% zn)+>z z>oI+!I5TF0>GKwLCXA!WT-| zezaU2u6~#SYM;7?gsqt<>3_It&_70-GUpw$a6(S}JUJ~=ZT%_y#A_WHd}m(5lEO1p z52e?^Y)%e4L8cCP_OzsI56$5#AEP$VU%>WcN^mLA!F+zX?0azK=};n%_qQW{R);pP z^|i!txLxp(u893Jl);s4hAp7%Qjgk^$hzb{b;*JIc^BD(i3!Mu)Jve|#K}#e0Z>zO zBDb(l=tgY$ho<{Y3)h@uiqt~sE7u8zwe_8jLXl{L%f%RxvD-UgTy2p@3K$o3@rWC9zdcpV~^BFYi^_ z3(Pc2(cN+4s9@~R*5RJ~Tsf#kJ5*k4xXDzQJ})iB1P zk3UwPUTiT7lSqM|#Ywn^_KKQEcymllwRL%fmgwGS&VJF^_~(GQyk$>W@W3Q#Dsv04 zKzyVG;pk+Y>6BP zULVm6@_4V6mXiZ`Ly36?njTgv&n)Ulxy2^ePB+FdG4Fg#ZBH>@_h0eJ_bXA@`BIJ# zvh?tY5VSorkMr;hQlKSwEH++dDds69wA4~A6FG~UOEGkd;SbLPhsaT1^`4X0|Lp> zmPy>rufH)RMd+I;DWvP3c%>**nQlrz1_De&bt`y=;**FfZQd4_pQs2FXz(aO;-3*a zb`P`7+r1~%!7Z&)(gDmU3v zF19?-#14ttPpxKleOGZ*-hRvF7xqeP;8Uznq#_sc(E5uu!3#n+T<1Pc61g35H_5;SPKbnJ{#2D1!HNM-06W+j35E@k=Gv4P#5 zjpVIPvmGQQ9zu`i8rk0-!K8ZctlOn>Ye)g|7&0T;oedYC&o%;C#nSfiuF+gQr6wLE zgB>v$Hg@g0VFrl9FW<+y8nsxY_xkzODA1-`LBz`Wjf=Gw5nm^4Ot~n`MwuhIL#f42 zNu&J>0hCzNXX`pWA77hg8;2@{y6=T=0WPzj#XVJ?=U$$&;R&8i+so0gO!X#E7G#|- zx$A%rNL%2unDtZtCA5#QG>EDT&&@m?6k2leJ9b2TZBB9**y00sSMnxJpb z|BWeS`T}y%enVfBx#w27^_?4H62UseOHOKB+{vq)$$BQ=(g$*utDkm~Yiduh(W2(b z^>Z%_#p|3ZHB`Tnt@5r>cya`~tvRiiS}U%h;j}#VM=7S+TSV~gVVWE9Ub7v+TCkWz zcY@>Vt$Fdw9uzY#XMbJZjFVh3olius%USsj-lWf)AXgL!_i-8Y^VO>4>ZrKgol6zp8;_$F7O z#*~P5pr}>ZP?5LXL@Vz6yQ_2pSoL;XYQZqd3UluHi?C9UlTuSIEL958~WNvyq_6JvPj1} zJV={ePiAC;!k)pxey}TYYrPHymQr)!MaxL;?45iT%J=`>?07twZyntWav%~rjl??R z;5s>5d(jh{hy{Z;D>vk&Xf8Mck+9t|OkRJp`n@DL%+*XlDa?IP4+yq?sioaH%QE@H znA}Wh;VU#Cf7+<2#$?|gXlKTe#J%jt;at;RvL9-0*{H9e+}roT(&gMObB7n_6tC6_ASnP=))I+;Sz%5WNZLw8q$>Uk}pW#xU3cf0>=F$>t2cP3msv9W9?a%d-- z&JfP{+MFF9+xPZRk`>dJv+`0#WMwJliEjnQuFSK9jv9{HmxWcMz}1(g;u%%k8y62| zC3^Fov3(6Q4Y^Tn@kR8I74*=bz~RBWgK>|B68U6C&7_nqs$jdp%2kqm7v zo;nhYt?b8_q3rGD2#j*>@&Uc3mh^k3fDB+67Qsfw-NyLIn5}Grs#sqm;4*fE1%#K9 zbzkJDog($0w{ozXmE{mT*VAC%3aqQ5pvYBQ4I3^(=g7fdmOVtoND797xM{24QPhd* zNLiGUnID(v1zY8B@V9Gs;b%H=3e6%U?~bObs5gHoy>o;;F*(S7qT2OQ_n&|OGqA|1 zjV*O4!d%&Ww+<8Rx{9k`ju|W00LCqgO^Q|WO9EZ1)=UVmL2SYck|lG^o?xs6_ftBg zOsJqMU6Z^9UtGFHn(T(>yw{btR`^*%4JKRGH~UmZW+%|*a0XQKFf;V+z>0CPFOClM z60?+}j5sHv3`m$epw~3L|3EX%}QgJ zN%swU^t{D!P6t;^HthGT`XdvZhz60&Q|pfHQ$&sGA!`Xc(?*RD%(f*kX?!AHD?weo zSYufFWB8@_*FD=~^}feEr3Jv?svySPD3+1Oli3Fmd(_6JMxpHohS}S;$^K)y zCkiy)=|Hwog-5Q~*<2rsVg$5AwgWUYk4tK-WLVsw^+%Fms6Dia5p>)UIvK5o3YybW()j%4pWdj>9F zlHSQ%;Z`65-MR67?+i7*^FZ|{`tgf^4k7{X$e8N0CxwzGIz-ljOeXWOorOcLx>taE zlbL2@&y`yFwjNHk7i*hqNSKuL*|V!Cm=A+=7VifmC zGb`6(IHLnnG&^35hNR=vp`n&LfAZOmTuojdoMbYhcVqZcpXi&Hd22yVs0kN-&no$ zaWq44WQ4o^zUacIzT(X%{q(%(c^RqtW`j-xpClOia556GqiX87I4f2Y$n@Py%@kqe zNcmLN)RbzfHs+?J#Nqmjm)$>pZ^Z<5SDTv))(3|SKaMWi14^_uQf3KYEe6a_G)y4q)Up`phIK4Yw2?yN{|9XWg>6R5@}1z zb@zJ(`g~a?b>ZJQTi^r4%-Jz^^Rm#D%CM8kr;gV&z&B- zHJFe&P;by6W%2?{1s!H5potaYeI}(5Ike(ufXlc~7Rr>Fh_^dCb#912D8CEap$Sxx zSL%{w^}Yz~f_V?BGD=VoT2D}TR^`UP!ylpzQc`?ySB2rvG6LdRp ztJbZQpro2&XfEtO$!YxfB=hDt$XXkUQSr!Wjf7Y+Pl=V_#8`oGNDVBpqCqGjzK2&n z6ifam(3IV-*JQHpU=@;Bu{9|6E^6Z;KYL8?SzfHazd)rdv&L&{cAS{N)wNaPRws^1 zu8LSOj<50BEU~=yWSs(eF)jknnraiiWa<`!h@$PyDOP+-00;fnjM@&2^=5RGMuvkm z?{46H%#6%WDHBwoS=IwHvl$Ydd2v1nb^zXx2_*baUiutGI?CGtX{>B|idxx8pTX*K zt1{_^Lh>wM=Lw~E+=6z)te+$0(#L_^z$0>BT{`OmR4CiD zEYw#k7u@d_smx^niS;;n0xA6+6lIp=T)p!^Q-^+wASaF=z+DNZmRX7n-M_F&Xt8Dq z=&%DkaZ{PJ{0CS(v|4tVuBN0e-y^DFkxSx#v8{qrhl+b@Oy+CCa@Zc{H}AQAs=6e&#)yucP5?> zOV54o#eS&^1g}j|T%;`Nt(+_TAFZb7@6#xsF=8$M|X}>jQ0p$IiPtrtF>ylcx z$1my-W8?ln_AlfNHF{(6ejfOgd*t2ugREVN4yF_R^;6;t?RGGzDP%En;3IeXo1Co( zS_-)fLCR?_FR`KwRi;UbuY-?I9R*`iXfGZtY{5(E>`+|{vx0i}%2dy04^!Iv^Dlmi zEaKhYMhoBmnKNsYOpbTqWgD52O+CGJ?aw?B=*{p z3a&qHX8;_!o-Sr|xjSdMuM&ZA(-a4Cd&ZPp;5ufJHOW)B>I27i;BXdPt>okO1Bgv^ z%I!-zGp}b+FO>tHSJtPhw3(jC1=FL3N3%&F;Y#K0VN7C5d;S_E4)X=%Nu^C{j|FtL zLJ3mGNDLpbDW+agUf$>970o+!`F%L3%k1yXR6t9q?pwLN8Bft2St7|}9A#9FD-ezet6 zR(`Fcp_E{o&=edSF=4~6#RhF`t4A068<5KQ!b5wc+T8J&5(Ze#n6bfprL1aqSTp08hAm$r4k6}T-jo-cPuWKnN_meW8?*F6UD*WPP(9! zSjKiVq??oM_9(kJ5upWPLm#H(actP6zRqr%cabg+-H0%aFH`FA#?e47HnIQQ%QkJj zowM0vTViEDxuWuJ=A>-rYO}`cR86S8RV9UO4`!TSzgT7FXWX$!XQL21euZeCh`>2- zz`(=uTZj~T@GXFpcYUv~$OI!qq}FpZTdC4SH>Esq!roEJp7%;HDIx|30;s02wTn$g zg~Gxvn$`Y6oF03-67Eqx6oS3F-IA1n2ObV|`Ek5`B8ag?vZpE4j{xT&< zF<0AkF`f7ziV19isSlLPoZh!eQ9vT*m=8#^ld5_IC+4W_H?|wD-iP&|H*Zdw=63z; zYZNGv5xVx}%m*`1Dr?#>LoS}Ko)*-$dcj}5*ldx6lmnD*_mxDoEtBF~%Ao>Jlm!*j zGaQIYXgno(AxW`fmF6~K3cJXU(6(oBEyiDWaygj*of&b;c2yHyK65(K(kVk{fme}P zeL_dUeV2kXwwpos(gpF#KBUgL_C-Y7nowMpEAMSZ!qie|f$Zx8I(tP-su9U-AVD0j zHRz0^)va;?fZF0O+mSq0#3P4&g9-Dv&aZetqwq|SCxnBevN)p~uA4wzW}N<-7=Eb# zw#>b7@LuO?EpZYkLr(UmZOUqreX3{(`iwLtq4LMTlE{Yst=^%MMjI9;yz)f5#W?L| zTwG*DP4I-)RdPIJY-k();%BwK;)8P~rQ2HQj&dJq1$M?+>^LWv6q8Et)TxhYIY?k6 z%nbQ{o`D}tszH3@8V~X96X8ty&shKpvnN+)v{p3?{4TFpnLJ0H066p%7f$Kll@rbu zN%q2Wd3Vye88?@q#6RPAcUH)6L>Ql_vK(zqGwzJL+)72l?Ex=e!q9f zt^BA;4kWxZ;>P(-3M}9KJjxPT0hF#=r$_t3o9BAs;e7iiqmYG7jT;Q@c?V z(d36-IS@S0dL3SPk(pmNov!%D+G^tBz*N-WnK|V4p8%KOc2jG8PC^gAp#_mB2%l~6 zofa?8i^B=e_q^0?o~F1S8N^D@b6*v`6EH_p_+Mr0qZrcg=3f3>D$ZU?#eA9dw>`!e zLrnLLV2(JqC*l64Omb{yGjED!Y{~r<7PHI03^4euF)6!b=N?Edvff!-*1Od;iCr&~ zIsYD!i7}?OL_1L_M9TEuVF^hSN=N}xWK%x8WAnE{388{=S+Io-5a+`kJK1#A-cl=v zc&ZX{DG(M$5d!6ZNn1G#7g2~*zjFXW;c2Qde*S>y)y??7=!CTb4nzO3jNOvGcZb78 z-BzezD^9=6H#KOWAU9U)0ObR<>{}K0=v2is` zm}tz29Yf6Q#LVn5d+a!7X0~HyW*jp!GsMiy%uF#eGc$}~{eGa`mG;M8UFlxw{5a|< zb*X!*s;i#rs*~l}Hdjo&S)4Y=c-f$`2vfzDmR6C{wHv7I?3*GSDdkFdL%!No1m56! zd+5u$cJWvy(Byl84cxebzbJb_=oMKTR?yfoz%I?2=uGhGeeW@c(`X?hj&Q(t?JKsG z8h>+?d>hAxC0}@NeL-4ClM?*d%IL7lQPJ9D&%)Y36<~i}WxIO4V$VTf(>b*cwqoIN zG{2~wU++L0aFb=;!Qpl!pGL$jiq zC`T}M3U%U@9WZx}(%oFMwjH`)9q$(5mKMrkN?$66yXxeM_`K;d4?aoC`T@Sl{hG>= z5+0H?U1^Us5pmIzGTyu-4Ul?$R=WEE!eI+ctGR>mhNhrR4oMI4Qs4W5T_{a5{L&dS z@eaC&&V6%>4_7H-7lA}<@;)5$_NDv~1W z9^{_lKbgSdahVrVHmR7dx*pQQO=H^0A@Y-2PZaKGDrD)sRI5!MPa`g!NOvEY-rg-R za>~P@eOo)25hF$moAn&PToF+jhi>^HP*Ec9KBXO?l+*EVnm3o4R_Dqi1qUpy9%m}= zS?%so=EoygZ|}vV0=vZQFc`Bo=)cG@F$i?gFGAIA6=&bFmCCMV%hfeB1{(;=dDYA3 zw2S0%PDRhUDyff2)~mEj4k5y_8#t9AqN-t-hJ-3$MSo`kQR5|0r?_Tvv@RCRwMm9M z%G&3h%iBBMD>LLZjRh~Za@uMgpxX_f_t@hjzGnjv47WT(NAn&7+6V9yR&Q6zXDVBF zOfVbG^Kb7n#%lI{JmSFaY~MlyJe=SS^D))1U}`Zte>PRJjZ9)zi=%ZQ_I&fRnh-CZfK2yE0pa`9T_vt|0-tl>9|j?9+;?nTzW>hEfMuF3&#IEy;O6;I>JoVpzYIE z&7AH{FN|q9u*-KGZ;?`)vYeoEbuar`v&0mpT9hd=vaFkDrPa3_2cuY(LWPy*5@5NT zdqf|xyi1456=a|NgMLK%>N(xyy3l&>M3fUlZSaYfE5y21yldLe8)vKS+jan5bh|x{ z{e!6MQ&_qG1GHDT{y?c12i;5hBIiKnwpg(ud6n6Xt^CF8_9=Imqq(50s=!&sDCqzZ zG1V(<<7n5gIq9e}y^-D|^+bhxiqaAM{F&PDFFbEZR7^VJksjG|R5IJF{%Qu^DEJfv zi0*G~oI2CHrpssk7(AT{F#J8GETYkfdTZTtlCzXz!I30PbWy|zBCz4@>cP4AfOHMKomklk|F#n&hQE< zZ|KE972Tu7sNygk{UsZBMzwS5SmpLUa6s?pGyAC7TaDcX&TTCGZf=D3#|w6y-B(HP zn>M~T9?BlhXVc4EiV@*XqEg`3l*H4%sHOS~Vl`azHs7R6x!v!*Oz1XFDzw~@)Wena z1&!}nuJ*2sp_q=k&&gx`VSIe@vb5;|mS(6FHR4^4j1mhKq>GHO7{=t+y0<^%FZi7| z1RN^&rU_~Xt}y=SSza1Rn|xAY-5LBf!|gz$1D9@&>8Ea$6+&V5V9GpOzj%*7R`&EB zWCL>N2h!OMSixy)pAH~{rQ>W1F%P<_(WQ#CQ5Z9mra&+K~ZP zMh`1_)@k9My$>Y{r@_+OeNnE8L#vkvk-yhGU>Xe7NY5p|Z}lKvv(39azo*R6_+8cg zwF4XAYd95N3hL5LcnYI-5c~da-qO4tcG{jWjEUmS?AUQ}HKl~8P%@yfGJai1F-Q`4 z;OqSyk}gwZoqIj6wf}l_B6i-HoQr3%sCU|QATDL`K5fLz=Nh7PP}e0i4?DiuH&W@6 zYk|n(#P|JnX7Z!AkPt@75sjru?)>qhjV3TWEKzyH5Gx=N?-PUHw_M<``Qu!q!*ByX* zJ~!eNK_Jq_QE=bcAHo8Ie6kaCgR_W4v_zR>TD&cZuc7F-R3`#DjmcdNC&iV$xp|i- ztn#;9X}G@A!J6T==e{7KV{6~qKfyA(@tY$Bc-99gWKH(%_YAgy>ayqVsQ>D#)i9wQ zJmGPszBoHEApV-pH6(zQITYV#Pa=|5DiW0;sXY)CzB6g)ZlONJ>JhPZ7v+2;>`oQ5 zSSlo%qT-oMW$)Z|djH%Zz1&+Y)0@0lNK@0ogSxIZS-}kYhhMyZ!q|H*zA*yJKlX41 z6%E;RxJBJt?+R0zFhX}{3eu@e=6mlDK~?CjYGU8+w<7RVldMU3NU6^=_5#69SGGOijsy6aCD_1(^f zu6Wwq<&{S1gd0#mAC>2+=8%OlBJmcvTX;dTpZ9}r)GAu;@aNPjS(ye`5+&9q&E)3h!`Pv;3IHU4MgtYyU=5}KiqD%|ihqXZl zU9Bp--;+gqC&90b>7P=()`}c1(}=q9Cf(&7m{V2eGyZ0srcM26Dd_p@j4GkhF(45t zF7GUamOBed`qmyu#?2r<4Z^jiMK(4gV&B!ueq03*&r`&jZVir(k|YCHBhGota_IJy zwS<>kX{{~XqMU`rheaQgk!5-g`%AxEAP=Fpy=)2VCBXs2H_OIBI(9LSH;FG)wBgx= zN|b0ji^fzE0PF36Qt3yXDj%xej@WuaD_k!d^yfPM)oiVkT30F`e9jqtX^0ddw@ zN72UMq*$3&9Dm6`*i`<_=NJ{+*1lh~YLvmN=+fR8n;2svSysF4rgT}77+K(Cm3a!= z6-QL-vd_kU5@HF==TRK%r-h35nXiHfQBay=AAa&sx?v}&PYhU42+So^)t@ZZaYUa3 z<`)5cx+#<*Y4_$|FXY&{<|JwJeyZSUADs%88hFy)388t(q|C>+;Ek(^K~lx^E zZN@KWB~f~;47=oNy-Pa6=`hctacc1oYj4HkHA?TkpQb*#(znIcv&;wNXv04d4S z(s29AaubAmjygvz##3UrT_B4d`H3D{MggS}%>0z|jFw{gt{Z;*1BQRz7#|IMy7NmN zVvARil(1cZ3-aVO~y)(BGA&IVW5FDY_=5UmhjkDuPS5@9|IZ@a5^e1 zaA^aFSzsjlZMESDJ5lRxNMZ)#*}wRJLmRg?E1+1M+IPI2TcN)ZJkk9)M~ipv`_<-} z!00w3Ze3vKlFKI4T0$7*R8?r-l}tN#pDpZLmm~HMg-=pcjn53WCZPWz0_(wezT>PZp}t*87790-_7pt+-H%l1+`1Hd$+_<3A%No=ME&VB<_>u;7dfXvwjWuIZK&0o`2<$YG43-qPwaH$!4rTjZaJp6 zNOY8Rv?t84SL-`V>SG$SaJg5+CDXh&+|$7%nxnSZ+2c24>*S%9AvuRQ$V7)E8d*y3 zTa#^TGr3@e6`8+-UMTz!U%(Rz7Hr{CMD#|AI{;~*Rl#atp*O%UnTQh!h|tBB+&es- zS3q5jUzyz+flhmRb$e%KE7+%eZn*2-E0jFPxc+3rlv|eFa|{;?ppjdHXwf9SGJ%j5 zIB}u)<@5FM%^rS|sAUM*(eKufA0$a3iLc-k{^WASLR;wK#X0J$nLXhn*DCYPnIg&~ z`#w#jgs_k1D+5yMWxX;db~yEZZ8mb;|-`u_;FuI};WIpk3gN2Bw!^b%iD> zN1W5~Mh-lkQbk49H0Zbp4zlHReA4^*3}pvuL#_R4d*mYtJF6_ONUndJi~^~?>cgTO z35lw{RB4$@mmKDKKr$#sXa8fj_d^zGEzyC~r=x$jpSHOzQa3nKSWa7wKd&Uo{Aq-N z9mm4l}RDV~I?9MJ!eGCX-c@=P%zO{Ye~zQ+_vp+`0H+V5$?4 z|74N;-r@(o3%IEN-X;G@lCu9iQU$1sdE!DcQ8lrlJlQ{4DVS&Xsin9Xs)V*8Jg5?v zxWLlU*D89`Tzf5KWsZ$Vnlout%slAbg*5QPsv#5A6Z1ao_L{{_B^BbdlhgVlHO96A>toAw+*FDl>qdq<(8xy)b`Z zdVFZ+`zLb?%j~9Y2*vfnrg;e9f-nVi3c?zG=@5M}|T8pb( zDg*NR7Oj)SL5Rg?nQa-SYEt8XOS>~6`SQRkxwx>xM1wbp#f}%0R;SMX+sRcN0?h83 z@9ZoWOh-$L8iIRJ)Kghigh>%`yp%+B*QiBa+Kvz3&dU>n=zS!WNTXc}yf>++JCxiQ z&8lPki)q9BC$~kFxf#0_pzvsOq$TL0K^J_t7EgI44VQ#M8Ec1YZEIr#k;M(5p5#we zH3MqOVum~0Uyz9rr-;)KBB>Q8Edc&cL6&}PS9;XnsG8h{JpqXIH* zc+SLn{_>9TYI(-)jm*5dQG?n0T*^DxTV&%p8bt$4Spf`V_Re3M%DoNdIm&WkFv+h1 zL7_d6)KvxUlY!Xrf7?}5idwtZerAhxmi$^3%FwSe%6DQ((tjY7O$OK zP*}G+*Uf?_aR_%XKi)gP)U}s5QY))XNUshLtyg<1f>T>+;Fe;^I__?~&&x{}LiM({ zR%V8)n36Udo674oRVy5D+Iop%?}yC$($p}KIwEcZuihg<1|l=tFxo$_7GdIxcTSAV zb)H0-FE1xEJNTikQx$dIv7_2R{AW+$RyldKL+RusRj;G6t~z2Yf&n&SQinOEqr?0p zyCKgXK9AC^-fPRcMhkZ!IP)BN)Np0S&k7TZszU|7uC0`e#bFj#0eC;Ho(02FP4_wK zVmJ<$MPcLZ45XUPhDrZ8t`@$L{@IGcvx~;OBhS4@FV|Uxu*SA)c;wq;f}HTdO2Z>l zbPJE7b9QT%pO@B>r=l6%5F$n3GUR!p+RmwoXaRovXSJ5p=po1nuWNd1OUJFQz;EM? zX4x=#mMaxZ%rHMlbjOcq-wbm4jAq5%(l{M>soH~rs;pc-V*tso=SvVzqd^nQwa!QN z9}D$1Ose33!oXxMDz{_}UZ|^)p?d=Is&Cw)Q{mwg`~b>;2G@AZg@Fe$W9Ju5XV0CK zd%ddj+n%U&#Y{xeCiVUdjYkL18`7dATcK|n2Ng)s`JJ^Zt<&nvB5>(6H$x1$M`5Mq zeIH8E=vkI*l9%W05dq~@G2Us@H7zH8QGJ1e_ly8yt9at0HM_%FINX`q6`P_o7nZ>t zNOUwM&1#FPk(a(yef`qxjwP~*)ChuohT63SM`CidPOTOE5SW%PpSCVmzS(4YSj&#j zlWB-4A{;32U%;MI8x3J&h>lsmY)9{`eshrNkQ6q8eFFA)i-OX2chx0u2pw+=;trU*&jgfO%c7u;> zB;Oii`N%Y(F+x*$ix87`)V;Z`jI+q*1YHe{>Gh~|qS2`3?jJ$Nu(Fmw=FuwFSx?Iy z|J-TY)c5tzB3S8k*)ET$4Ggy)4nD@Ze)e%k$^}uy^)99n_Qr5UG1X zoIZTJsorQ0Bbi+z(a49{5DAIM&*g0}ky{&sdKbo;LIgKp4;DyuN36Hj^)(=P^Lpjb zEb$hi?{!<2g+>~N1^HV3pm!i594XkU9g^7?1fQU(q0L0)UPLfvb=4PvW>!G*`(XHg zLhJ#o8-omj>R%FX8IbDWE5x{udIOzC&_q6@XO9xtp5FN++j(JrA`MyM=gw7WJ>SZ4 z6uc@nYV*cl22ywO?N{@eV92vAUXOw zRZ#AZDBbL&Sj8h}*r~7rBDGiBP)M4RtcLi&d1pyxW0gs9b(-s3=oXdYpY@C_=U!7(f*CF5NkvvT{12+u#cc0BF7$iX$QlkQwM3mpfB%}2? zOlNVCHd3cqgylWHr$QU{x|6z6Dg~Mfe4vQ~q&D{xJLkDH?OFn^d~w_AbWf&V`qIsx ziG6Exp)ja1o*c0w@Rp`*B~Sn8y`Q6pK0mUI*c7haH2;1ngBM+WVX_@Q5LNFlhD>m9+Dv;5CqrBo zg>Lv_)Igef(3cstWK;J^W8=4PnogY=p{zn-{x$_!F_!c<4A0ONL`TydnIRV}GnO3} zOMPOo;lZV9rqsSHtz-^MY-vNRrAukrFT;hR;ZMnQ6cQ`SR^x_Fp0T2Tr+>{?0Q)Rg zlb_NdapYTCxqSCw#oQABo?sWaCd)R1wj9aq#1sDLK>-?$y+yl8%_L= z_8t`J-ZoHvH$5@hj*X0~a>N6J0JXkCDlN_sHS)ng*O=L~@eO+$-R*E)j)Zi*(P!vb zX+jFKS3c(B)z2wRV=w(&{8|uVCkr)B3smyf>+BvTE-eNP_<*Q-9sBAFPSe;9Y4!>V z>LVU8cRe*c`;^q~xD=|SSb|O;rB-R%Z4=@PYsp{5O!&3W=yI(Sxr%FB4cFWq&2B^z z2y{zdPSk2RW6+M86~E6!$tfo)YDVnbvFnTROXBJj|zM-pf2* zkJSkse+K|&jvh=ROySd7-`zI`+d2=>Ls|i~d*N5kN`?i?{XaK4%~;S}Xr4JJBFU@B z_jxe1gi!zmmS1$ju1d!}=l$uQagZA)DPkPXnU~1;w}cwnZkWXOpEE97@BGCgiVGRf z9*@QZ#sB6crPJOoaj6Cc2ZkcJlR!zIHF;R z`3wSZluz41CSw07I`QOzu&4`gR{6`YVYTw?lFUNqwENwoA-;=>;oFrOuCsZ)H$rH) z6{UWI{U6=ukN~W1{U0QyaJhkat%8Q*ht0Y!+G}I+AH>xsAh~dYm&4s-u1H6`xA2=M zndEOT#4|cF-)r*{aS_;i*vrV9&N(W_QhFaR5lL4ma~kF~|7{{w?Vf(!@dfxc0gqr;2OTswthx-Wmu@RiqRbZjuGIb-5Q zXMM^BSK#O+(}5A^DnKJ~8&dO0XEJ8V>0P}(`(;{TmGhmaO{|bb%Uv>1iti?qm14Z& z7ModLF2!$N^6IPJ&%Rv7Xz#-SJ!V5_TCb=fkvi~y?*c?=&rZ@b65tFlgFbXNs`kVM z!fTX7-IaX^-JM)GkY(fw^Gh0#oNq8=ifVkFrS_88BoE<<{iaARd{YKdX2-xzd%Z5z zu%kYG?TM)r@Rs0D1W!qA?mA6;lhyw>#!68$*l^q!JVY6f)?vyv8raj=+ED$25AWbA zY^lRZbM&Fj!}!{$3JU{kTNTYSbGaI1G~0`nUH(P&nLcfwvS@sDFmdwUwgJVUUOj!F zbKpK9?vh$ecaWm`RGdM`NN%RVPNR9c9T3-a=$SWvN8)hD-L|T;L4+|mNz#YF>}0Lq zTM-t8O6xb@VLgo7nSXITjn64GTwSJi!^AXb9iuaEx1Kz#Z$_rAzQ-0OMKiFKKWrd! z%NJ`d!aTFFDzR%d7XEc$Qk;lg?!z5Wn`f%5ek~Wf?XXX$)7asVTZ&5BaE~(VuuR!D z5_{;NN6&hSRr645D|ZB5z_V!tfS1?i`H59yzT9xm z3gene!i8jraCG8bz@F_HK~sn5d^^NVi-QpB-qGFnk6ir1GN(?^P%u-ZCe{ zFanEfEEgf}gmEhSskQ*pLnK4iRA~-shY`n+#b&4?A-g# zR&RQs5RuolWbcMlVI13A?XYto2;iZtsQAlS<)DG6*E+d6Yw7htxj%$WLJYquW>M!- zwIc)}4X3k~W~KA9HE;2A0tS)h=nc-!?M$23SDP zV0&+ZZ@#1CKm-g6AHL6%Yc>+8D(pyEdzO7Nr8{AshyLrzYa`C}9#{6CX@XC*Gk z`tq;0kC{tRW+tHpQ2za(h(-KE@XvFWE^7AQag&8#@+AKm|Jh0&i~C>tWww~v|BU-i zRfOaT|7RY5Cn-|4|8V$TXHH!F?LSlHXrcWtEK>ikSPWNcc^CfB(YiIq`Y&HEg^yGF zeObt)cH@`)OeVGFfB6U^)MfS%9U;*!I{b&>n^h+g{I=*Vj>cCwB2i zPC#e!uPlmoXTGR+3S5A@VeI*H72bXIe1jKHZfonsiFp$0I+;RBnF?_sb+`3gJ65oU zJ4r|*J0S_!bXM)gvpBN)K9{8^g7#VQ0SsNIg6cwd507h)a8!ZTXv9v1@m?bTZzP*QK2G6kn^2~^s*@bC)KZjq};e+@PX zdO{C&MM3?4KFP-R9*wPvU|o#AC5}Lad`9GdVbP}hY&ld|tNgdb{f^&VpzoT= z*;#&~VYDDr;I+IZQX5U-u%}e{^Y_uq7aw@XG*1}dZI!>;qTCJU~=Lh>JEznF#lc=LH}!xLO+=1G)%jS6J_RaHyf|!A#4P!p$fEuBeA3sR1<%)TNbL}xpg6T; zje7&BrJ?C-9v0s>=pkq&Muux{T`72WF4wXO@{}K`2sHI2y9eq=fY%NfHs6{)3hcdo6k`sS5>!&izVb$K! zCXVFUOtqY8iJWIzpO5*RtRt3E3=)kwCyO>hf9JW|sH0^?)y;*9tcqyx^DZ0@oSQ=C z^5MnwO|oT@_-c0~t()i93Tl?>Y=yXn2R!-FIunH1UDK`!-e^M!;Z68#Y4euKe~95` zF|J?V)=tOh^U6B#1hxXt_Sl!LZyM4$)4B*{Q1xXqJp>i*-W zC0e`0Y(5;pSr`ocgIl3oo`K9PaE;5;*Hf;;p>?9(?yuIEh-Zri_WeTg+QI1&X#t__ zM5NKHj)avRtgxl4)~cP{1anox9}OsjUnc@D$tt%^wWRLf7=qWVCqW&X5g6I+t{K<; zF=dBjKCu!WnVRE^FflIbe)}WZ4de%#EQJ@nc_bHl@YGQ$WnL2)tO+KxmR0H~!N;8y zXjQc6z%M58aN$WH6j=?aLgU`#wL%h;3bco<9}~A>26_txO8IlkD2zbV=6a;LZfred zoxy{Ej$MNF$vG<)HKtLVV(Ia%!`!@Eovy`qhG=s8_ixPKw%la4O3XJ~(Q0f*5o9~knq9Mx8!x1U28S}!x7&6AMKe#!U{|uK4Bdk921#P!JgoHK!(W!vV)1vk%5R)n-v}#g=bm%U& z7a<&4tAey~=#tL;Q>Hx#l@H%4fN)s{dl-BW46$h0I#KRs?eR&o?dl?*!H*owwM2$n z*XkViRkjcUEY8Z4I=1@<+gqu}%ZWL@OBd>@&dgP{U=-P*s(-HE24C5(O%ut6O$AOX z4d7X5Ww$&333a!_F3;VbYHG>uPMGR=Ro|mxmZ7j5Vv@S6WqO2+Qy z>Vc#8$UI*X|BHi-ad6gFYb<_6k;Im$?~lo;>+A{y9QoxKX~i3UI90{m>BFh{ z#+vgT=5Gk7rIDiMC9bl_%Jp+5W{#F=9VQIM!J0agR#r$R9Shc#AJx?jb%rNQ1yOiP zA073YPDZUCEI#oKgPYypcq)slZZy23=)9q^urwG+*^G96{p%deApl2^vfVd121o#l zy9=jhQr)Q)Tj-%V1;ZMfxIZ0!eTOO(U{M3icZ|DRFngBv#;N4y4ayP|!|LrmD|cpT zlc-+p^D8DPh()eRn@vIXfZ4QrB%d4u#v7E!q3A?4+&cwJ1y^!@n zT4y%?BsMnd`|2U7q+ty6)=qElwxP{j$5jS9ZV-r1$=rU<;AVZSOY1%BPT8?RXn3)q zlpVLpWX{|noLN-o=pKP%IqCx-XTQl~B4=wk-RTSYkH}o(xq7Pk9t`~u{flhNxMFkA z{Ih?nkA{)e+gQr*YoH@9y4}dkq)ol#dq_js?U%|W#FdtGX(tSRwxt75_Ua5CG6|dI z-cf|R6?IT8;sx()y!^Vw;GXj}Wksz3p95rVkkc>6QWP%H!&!k|zY;)vX9uLCj~6&K z?MRb4c4ww=Pw2w0hB(xb-;>I;nA73<}vP!a7knhAWw7DFWfGf(ja?ONY_H7qM=D^r8aZkA3}pGa@6Rm1)!zIYi}h*7YR zpe`P5#)=F&t9aV5OiY-lx6)VTa0Y~Fr*{9EOcWc8vsy9H;Dex%vEFrNh>I-+-rE^+ zEgXhfBilUKh?KWUY9sQKEj6Cw&6}C0pU8qhA*B!Mhj+fi;eUxmz35IFSg-y?UIH%Dv*JqPhHFIW2!9a-Nyk-3(86@*%BiZn10+p& zlxd7R<_%OaPND3q_rg04b6;|}h-W6elCA=R*VVZ9G# z84Cz``X-R&1<1QgL_TfhK-rqL*}4_d^)jS`3^&xh}47(Z*YGpY&+|rW4YWA(%^v_Dfk)G;rr;hqNV{Q zZ_X)jL-9T7lOP3}lQFQG$Rl)i0>YI6?$sTb9CkzRa!a{;ONH?^Q}+0#39r^+=h4aq zRD1fjFZ$M>SzP))2qb*deS8nf;+eU80OER@KdvwSFY$`4?rrI`7R7nHgi9M|54?j zQQCj`Uo~?f&KZi;k7cL6-4O&w*W{f=>7SX&@nwi+8zHp^j=FfU>1;ViJ|p;f-6G5ZE1K zzY*tj#Xa#BmDu5@)pWm5o*s>)A8lTXUvUi2y&?0bDvQX|;_a#5B1e0Ci%#bV9?G-gYeZ3)rqy^$-x_#Ycg}sn#U|Fl#1w3l{4Sv6ar6krI ztER}=zPq5``|@#?LjUoy1T7cL%_E=|KC>>m4b*?fdH~+_AEg-W&3*jaYdi5^UI~U# z9d$Q?<6DCcanbF~VOD97bK3R9xl60VOFkYff4SriMKQNldA4cmD51N?0xI8T`>s0d zYq8wgzE|(_UC8b|{=_HaDI@aB0d~gu1sH@ljS^wUEo@L2uqS55G`+(i?OJ;6aHc#i z#nHAgivzW%w)_GJ9t^Gs=}gPO5Ti-GR-)k9<%*Yp{D< z$5w$4oq>C%)l7nbca}8LOKs(arHFy_Qg;>S4IGFWCD2}FZm;ude4hjgFtU_O=v_bT ztbJI0*j;(^fvAX0EPYX!Ot1`#Am8~Nc3mvmsvcmD+zOH=V)^M#=EEL&h|SrQmXr0R zr?T|VHADv2(M^DHCev4w2!$}?Ow-$of)l{o#L(v{Bu-@AEuFwmTovbU-sn*P-Lbp+BilLi>m2l|yVZ zW?B0ejc?(`aM{~jwsK(3(<|y)cL+A>iuLnAohVIu!=uZqCz!%Aii%vFMSzSJ)rRqE zzx{ow|DF5TvkBbmN}G|Tt;R6fu&grO!{G2N_4};-o#WtYGu{lXUMwcY@k{)hD3_=W5mQ`McZ5}c z=|}KK3GGly+E^65dTkYs7^~p1$JHzpz;nnH*l&-%2obM&u-U<)NjL4&2eFa;yMX3) z#FX&P(qAA=IGQWlfa_zkQ1R}A<|WCy)6a^z z;+z9M;Xmsfx$=0SKBuvvEc_ExfkGzU?u)E3*uCOyf3j{Y~v0u^7<2Q2TRG5R{w(0oiE8Py0Ll6)lF+*F*wsX zl6&%7=S<8UI1mxZiA5vtd#nI!ggq@-ZFdy3a~~0Ejo~z;N(XyQ8^)X_cTJLcuX5Gp zQb8-DRu;b$Jr!tUyu9^+YJqbTSet{GMm@>byS|Gb>cCYMi-yjHvoV2q7xw|$>1&pj z2xDOMa!!6JbE!z{c$=}Ks)XZy$QesMidroZWBeoi9_WXqh;Hs^l7B9l!3_mGz-(o& z+(wlnD*5bT4CqMYs(J7nIlw@3c_)KWzH>c&-Gr8oLBQ5X{8zB2em>HI%-JD$V$!-MK5^cPcB5ZT4pxW+w0 z>&~IuO(^{;g6sSSIg(0!xNP2bSodLCh&?%+675d3VUF!IK2E6dMM7a|V1r#-I?bKF zF~gT-3$i7xHnmxzKNY*#cUNyhSY9&9A`##`XhQRJW)@{kst_4b0VM{3zui9GDYait zeL`-!ay!bz*C(4>6HFckW%=}w1Ol#3tC94TqFJ6Pz*&5>=5ZWnq=T;=*VWHqS66>h z>lPWB_NK9-HX8H}UxwPkoxi$M7pFg2Q_U zMLXntFl`6`aRIq!MD;=!Cw&CN0tyA&H&LNDAASFb-06P{Wp|wR7wSK@*}<0+~OQ z@$iG{rumBT7K+_6dtFCfiNYpkJf>*+l z=M=EteRB0KCgRAp?$_|T=h5!`3FV}cf~1J%J4f2wObgBh#Tj!q<{QDQbSy4G|K7bn zofiznI0oN4V}e9(JI=~u$-5Eta6XjkRSQu1ZZ3WSyV+euF?8;xR_*Ys%6mt`Bf^^M zw{Ig3cEOS`!pm_blcG99mp!zx#cr`3o6}No(J?^Jbrf_H^4EL7Nj=fe!@kZt$-h zI6SBJ8hiSH>|-}Dh%^ZH47pB^=h7Dw{EwFyl{gChThE_6?by3c7c=^ztu zZF8AGh81$^3sDs|_D6eI2EYA;p``c!@hwNPv5a)@<}Ce<>Czn<8EAaFuyy3mxZdx6--$Z z^c8mXeqXM%7IdxU2$rcZI`tLhDHpIeWYCDSzf74?0p%=qM-(GK%DpZ)LM8@mQ$Tz; zsv-X;gk?OpP55VFCRB2}1+z3NfvbpK@SAeLMj5%gb%v#mN4i4OJ=){FoPGHaVPh)3 z)e-t*d{L7j6~)vyN^_IR#62Nrf@2A)hV-7APUzfLC+~NzzdV2yS?zHxJDL`zUsi6x zW8muO>^+2JOGMl~2)&4V41HUsq^=_toTr&2bg4+vF*HIxIoMOle#v1nKK5y^*`w?z z$On6sp^BItR|PRE1{jEbYODJCx6<=J6~{mMD+>lK4$g%Sc3X>1y1!7Co(%kH7PMhr z{Tj^WL6C*@9P8`?RB0VI=%+Q#<6;ZSjf(vXg3#aET5KLt1LY7n3iK~>dTO8x#JPt^ zIA>fP|A27N1ZxK$n_ zb1`v2S{$=^z7r7#U3DH<>d3aI#M-{?sIsX63SaCr$9^cx;W0mT8~X=#@=W05UY=T- z*8&xOflEItp(=$Y{V$~RA={4M-w2I4Jj>%lm@(cJwkH<91zuRRZW;u5CDRXYg|?ta zl4f^W7wrjXVG+(YZe7yueLYGV*APAKVq9R+}wR4uyv%}I}Ttmc-Q=8m4YNr4e^@MF1@yDQwv?yJuKJbS7JuVG-DJY zc4`~5A8p4Rq~RP!S&&aI{v(%+X2uN@qn39L&qtGlPR<3=_t6QW+QIEb_exwT%jFq+^dlJ;~7pNYg$ zc5%4_!X5j3U2e8bHAB_n>BXa6lQ8l|T)M!f(vQx`iv`bOirdMf<(8Q~W0 z9^@4n`k4e-KebtH7@XX=Uq_=YFvMbFiT)f zlrz3--Te(3>1f4n%>0b!cV5=oN9D=H#Wd$ae?KN6MKRNK#lWB(XGR&R3EP}WwSbCR zgm7skTHKC7m>Y^e#SOQZS!%2UBQvY|gW>(P3gtvc&k@ThA>4oO0_5sMuUazG7~SuQ zS;ONhRYH*#6v~M1N=76L%$V*(oI|G=MClyTJQmrD@;V2XZ`_mV4P&on<~g1?93Dd; zcp`ElrViIskFN(WU;tiMAl2uKkmh0-f*EkSPKVL3XSU$wvQ|Co(x=fJZXnaBpeBS@ zMlE>bHFb?o4b#abf0#6|G=qU;OPYUGMNTY@vYQk8=HnQh)W7TImWsW(MCIHPxH?sF z2UTi{=x0bY)Upd?jMG_%7C*BpTY2J%N3=OoA2G!T2m8oyR)lf1>mx5|>n9VkLKmjF ztEF5Sq}km#iMXo@WlGU!@C0rex@OEQnOP;QittUnUa_0d0#@X;Ppq0&3T#d&2;Ry& z8h>+16Ln^ED~|D>oKZZSjQWraiuT4Wt$lBu^JM#LGgeYCs97A`aY#Yq_L=zWmcZ?F%1j9w*|bLb%QS-cc_Ra>Qk2s~CUu#fNW&ZZIOqOK3fl7nXbBacm8{s5D?D+)2mBvj zGFdfqNLwA5!FyU8&D%Y0RH=|W!1cU3(^chH2yOYwFR~usVC8rgC1d=B5T0}I$hgU@ z=(FY0&Z;K^8Fwr0r1uXOF6X?L!q#}G@7=v3(yqeP3ttDdy(-Zo^mRNel=AY9Pa;y~ zXAO-;ewVeY7sH)p;MbhOGCF+})GrfX!%KeMj7TxB6`o~R6n%!-WM~YE)x0ht^rW~` z-gJH78dkBhCCCF^jI3Tqsbs-^OoZ`?>stQK@^Igf$o_WUS9WG5z9yuaRE>k|2PV6Fjo?hG8!V0`RSGTfTz*HW-F z$ossl7Snr_#2NI8(03eD1h|OgL~J381&eOQE^wJ!|>KQG>96L_m@&x?x>M zw!mq7?Ka~EM>roB!@;&TK~NogIHicw$>9M96VH>>Xs>QpgM+>}|`zKchQL>W|U| zPcap$Q!n5}ZZ?NDrgE*Mes>^tTBB9pxLa3kYDvE_oBlgDFgJ5CeUmz`>k8GY54xrg z+O6xeIMtV?y+T%8f=0=&V0~EVbR1YRtnGt(Oo`+r@Y3XsE4@1qyf4#m9hPOW zS8QoIW@?%@A@!aXofb0vYmkb;jiGLu^DycMXU3QTP?GM9y^d%K=6TxGY7la9>~2=; zw&n%;IZWovk~Rt~$IVOBsyDwdv|?P|w@z@>+=k2h#Xe_CkuC%2;2?wOAu8)&(Lir} zZO%_&W?oxKGfBzM>lq}4239X>mt`gX5ygtVsxj<_w8d%5Za%UKQ6u$`;6 zNTz$7NQGKBSvV1Vkx&6Um5!?7@=7c)Q^L9}`8CEkf3`i#fL-0eh;lPB zTuyL08I6>gM`RAmNcoEY{fAEfLyxEd&zkVYsrxb=*eW&@eP`Na!w@8`ee2fuA&%A> z$o}XjkL41zr zdi~WU>v;Ji6KoaC#O>{SK~if?czNHn2oViA1y`DwlLyL5)SaviXA&MWsXu&U1-d^P!#i*X)9%ODOhJXmvu%j2 z=qnMIOEmN_xcrE5aRuUnC(=9W)zN!F;>pz?PM zUzg~nCRQAgPULh$ zscB-tP_bO;oHbaQ&`h9pc|h4Gp&MG^q|v9d++!yh*R(%=fzB-RucAEal^@YcVtrq^ zF%KB5w!p6WiCb_a0o&pIl^B?VY)Q-*so+V<1biDOczM62Tz?;t@0 z;FAl1jauaP0)Dsy1V&4%K^evGP;rVlqY|9lBp1-oJ44_(m?g2s2$+4KqFz2p>nCut zCHQ|(Y>#LwXLm8AkmPk88vB#YYI8QSOqIE;Mx}y@n?t^6nH1F#5Mh=ZT9$*mY+Pb1 zmm7+1BRlcP``(v%`S%yZWgs88?t-O$k-G>?jTG4NT$PT<4kHn>|I;zW#Gv`9-phZD zE=qCCGcDbRXOQE0C9N)q67LEN`9op@`Rj6p z&5NFLdZ5cjl^beudwBWRfHQAeXv-~XIOlOYhJ&ZL`d=ZVJO?)dib#WduKkm?+);at zS`b&Pktcb*Ca#`{9AoIec`;ej+%XF3=H+>?Eedze&4_w;gR$HTl!IZKEvIuz`@Eaq z@!t0ubGj6TI)AzCW#x6+zp>w`3JXJj?5b;rh3c0EZYA$c$D5fA$$=h?lbyy|8erHB`_I?vBQY8a?$eXrB+ZL|f~cf>3Y_k7p;Kty}!oSYn=lUHwJAxVyT^AF4j_WO?# z?6(Y!5@Z)QMwMV`Nptie+JS1b1PK%WX41!-*|fB2Rn_j!xbx4G+Jgr$pu`IfF?0fN zEq~Tqm?W0K{a^{PsZ`HSKde!)MEQ$s=0EY?F1M40EWWWZt~F&yUbCpIWg2a8D)H_=Sy@o{W5ua42q^j`K8+xod;&lVugI21zttIHk+bigmFg`qppR=jKroX&w{X|nJ@Yu*7JJi4 zTZ$9*&j?-L3d`WHjNaHxiLS8YuzA&|w|1%5n4<9=kkz>v!Ukfqs7%^2;(A3w9KNOO zB08nv{mFH;nV?7E6OKP`u^V0ntyR2}Q-kpX_wTx1Y6L?c6OsIb&Y8I$8Jz)R#!Q;UgT?ve&-rQ6y z7>8pOx3J*6P@|{$(bl{e)!HLdi7#{L$1X*MZP`la>_Emrwnz zWK3%OIB83~{SgN&A~_9>$l`iL>^_}BX0%VpSigK3sVC6(5~tgAXm~!fKC`>?zz^Ll zdrwa`UjF?3QKGRaAN;8OZ8`G*ahWdZ%~x}vLBLXq1nG$LMcKl3Mn+ruFc;Ea;-t|} zL=BYZFk4-h4?=$o;8ck(?tvlARSaE@#o|x*c0a`u1R6{JNT}EQ$@7m}C&G%jZil#% zcuL^AN!QD%Sm3@Jg?6xFX~)$7UgPij>+~o_TIy4FjekZ>OI|tqet!q^5+E65>#Aq5 zJ9={KjYa3SI2($tiQMm*K*LnJ(j{1i`R+AfmN=#OOF1k1Q4=+TnSDoOQ*_ zm-oxtVS`c78vDpu->$)xCCI`O0(T*oDi45+_T|;-b4h2}Fgow0 z5{ch>6lD2kj-4$0oa(j3Nl$YG?jnT{xHS958#s>T-%+1DSVVpMwkL11xKIXO+Yy=LkLB~e!;V*Ri6 z>g2P883+88zMM?P#;PcL`J#XHdse2pft-F(9ZN7M-zbV+iQ(pL!aQ}TE)3INf|_cr z?bktUoe~-yWmAS4^SmHyZThB0hTBNSDP^ci##_t@j7Co$w2Hp*Gr@)6yRFpzEfZCo&Sf0%M}qpCY(OtK|US&51DdnsPDXc`Cit! z&hcB$Bo!@za2*Mh#1L%BSM|R8AikN^30%Mn6uPsuK?s!1V;s_^rKdg)9AmPPjw;7m;c1A*mU93yeaG* z$l0)`T8_No&uYL8-B6r}>c32(!1Rb+qC?W$Z||7tC&eTzOLWqPWf}Yb>9|&-ZsnZc zcYglC1-a?v@%7fx$?e6aF~MD4)s9JyjPY;pK8~*w#o+`DFJtkc=?GyxV=K8;`Rwx% zOKM_RRCS8|=+1`x^IX$iY)s$2VOAJIV(L|F|5RyYHnDE=T5WEnw|lZ>QOVwogyUb* z85(+vdE9O?kb64#=K=QDcvMfOH^S{(Sc=xBNRjO)p@@`S0tbC78eQ!UDsKp#cJR+& z2+3?eBagjV`O{>@`@&gkA%4oy`I?x1f;wOFw1R#dm1%*Pp0d@{oW^dpmqK=|aH3|O zqJxBdTO!U-_O7pkfw5VH=$V`cj3U<7!Me4U`}hAms8O68F;v$i?vOneFhOeu{I(up zTb;#3rSqkKDlbb}trVfhXs+})eueprGs!&XdQBf>XPz+z@8jguMTJop{M>r0L~D6P z4Gx-%V8}7HB&=e~AE^P>-V7-&5YyW&5pbJC$5vd*uSOdeX6BFj>8H}BV}i-%PFaQP z5_8beS2je1I6gT5q^c@)XKG3O6T4L3{5DE9rz?t#ud$*b_S_z*CL$!3y`M1u_CT|1 z({R2KqoDnHwG@oB(XG+aExFZEpRs*IYl0M!hkGPHUA4!pnVzuF;tcP~hDYj!#&~Hs z3Ew2<)!ZC+K00?4i6kM@fq~KD1Bh_{7XP*-+&c;^Z`B{a(X8}_&=Y}uhA6N;{_-@) z4&HJkJ{fe}XFOx?CXsSpQLk~upM~>zsla%@6u!RF&+dV)L8}QbASP%D7CDTimJ7c0 zogrzI4T{LYr%&R1#meFymGj{h#zv*a@k!QRmkwD=%fZHe(CMo&E&oA*|MRbC7n_@q-%QJWqbwU&O45J^T?oHFaYs-a9)es{mT9U2lCWgHia;m6$_dTgB( zq&y=fKdnZ6$G*1zEB@&k7XDIk)`m27o&zi*bGa?MupFw6>T8GQTV55J>1(iby5!iV zpGauRV@x3}T|j=P*iGQ5?q%V&2n-X0yej=tLv$YEEvfMu^C}&uj)AvcK6! z@hPPHZk*VixTS}!xV2(8sc2dzyM6CZ7C3;IS>`q?7?S^nZOsLCcIR<6s;o_Hoz;Ni z?wo(;x;KD{ZnqGF#n+X<08I3s4{&HxV(fc$e+@RnweQy8! zk5xTxf04ThCtcv4*b@zR$}mWhm%|d{8Zpw<#68t_oMw#G=?*?~eI*J$pu1{HzoKo4 z-~5hRv+GY;6#*y>%kMIZqYqcoZ?k$LVLbfTUt8iKr-&#$?i&sfat~@`lBPBeQ|V~c z#P8E*L<7g$l8&b`vdJy;P9scU2lrK}e{%Qw9#U?X!M0v0)Gm=mw#Md1Xb#@nq&KZa z>>m~Cc_S1BUDSLPHv~MyR#x!Pq=IgU;{B4C(|z>BBLcW)fd66@dOKl^0LF2^gL!3f zZRmzqC*tbD^y3p)rXG8Z+la>6a3zl-9YZa$`dTWo@^`fG+q|4B+(PJ!4zKk=6sPw_ z50@4HpvKx!R;`uYO3$j9xEjKYoj)EIIx3i4h%b1`1- zqc0`WqLOdA#Sx|9rD+QH&Mq7=y&tVNBEMbHL|HBMJvAp3H2m@WSwFXe81r?pmRac& zX9EYM@L&(cUiL#>pQpl z)_c=cRUL$&y<54${=sx~Ylft4(|ya|3B>2&`r5a~3o4YJZ3nPyf}bK!*T`b?-fsj5 zRkA&|sXyN{5EkM#HU0R`R|PIK$OS)JL2y z{U|s37>$22Zqs5KUkhf{iT>LZ*g{5-y zb4+B0YIzdlN}#}3n>$tD`y@l{i`$&5S%j{`EY}1s74_jADTfJoau;RXzHUXkss`_b zAXVch4#E+t<#gTOK?$XAEsXv{*&4s*Ry?uQ71*OHLI`D0f14EZ_|9cNyb;}Z=oiY% z57!;`sWe+L{Qy-E?Zk27F(x`!P9dRSonpjzxju@*$h#~izqElQ3;L6rB6yN2G281~ zDHQsZ>aiN}>@!lA9@E(2a|=rSoQy|w?(OTf!u?Z-_4FuphrUJQEJ~XTb5wNbQjh$aRt1QF4wU;aZG2iek;bDe=JPk@SYdT&+iDab(D8}4sIrKes|Bm_o2qn9{F;k zQnKnDomgx_Em>is423w!Dt_HY#%}3ku_}^Sr9I){jt;F=Hn3Jy{W4&Wg=zVnq0u}2 zb;Xo5o!_)ysIiD`cU?7ctn(fEabWbz61`HX-GdA&+Ju8RPvgdz2D~IG6$5eyJ+n1p zwW%+k;x+Q<!G^WXVqKC-Dy;o*R?NT@1D_^fJZHP}+#*zY?cpbJGNl*= z`+B71bmYBxi2+x9uvb5C-BX1Ig5&x`21G@@lXjszD*xi(0G1=a;~{O9)o8uQC2Uek zXrFNL5tv{eQi~3}>ZZ+L1Ql86$L(iF%XQFpe0ZZ7D{nHg<+&f|h<-AzuJzF&;PjfX zh}S>kiG#w@oq z<4NPL&*M?$lpoV4N|z&Liro|c5NI~n*5=urYtA*NAiR9DVjnqNxnO+XY&OcgHl%(i zNylq{;=|{n^7?(Ck*+-IrO7`py@-5s58O35^^$%Yl4m~^HNoL{|H-edF5sm}V4~Zq zOSeLmm+@v6Tx2^BT1XC0VZ@alt7{1O2iX&+@Nf>x>lHJm6xSK1x_Ny8va}vc6?|a)4qj|~X z5R?q{txxgEBdDWEJx`mPfuFbEYWjzbDQZIzMj#3eZ_-thTHbXS&0-19qg_bA@{ND> zUBmZ>8GM(E3}T`e^_+$)r$$Jv9WhUoL!#?L4y*)^gtI2EOt5A{U;x3JZ=|96Fg>`6 z>4226HIizSC27ChSm&YUQ+{B(9F;h;RSzo!HR!G>Ey`~{6>8k;GR z=yI5if#-+;xeaM-jSujY7aPMURZIrLPj2i0CCHrfQr*okVG0)Zv6C9kNUIa>)QKqq zNoT3v3Ca{eB69AsDM#kmhO@-DH$?zEqSjttoUX--7p(6x9ePv}Prx#yEn1fsl>MsS zor6s){tf{S;HKigY|Doq`Zq>JB`$~s5Me9tb*$i)Mv?mmwkCr@jRnu{AqAND&q)QH z!LMDyx66pisKqsN3d!Qv$5{B1_um@aj5$awwT4Pf-xSUO{43h>bYA>9$10gY9?zPX zm%4{kKh8Uo&UwTL`*jPPw^ke7I6#QWN>}kWv~{P$bORnwq4&8Ki{n0LwAG6_2R6@| z$N^mSJZ8;i&`tQ5e3y%eu&ONs@@Z2wC+@g49*J~a>=`SM+eGHA3)f4;z+^)u;2P4} zy!GE&0Fq_Iw{{17b?nect5L1df_lOb$!m?QOqk8Vgk?hmQs%fSd#QL!<1}&%;i~<+ z`bf~+kpH-D2X@!&e*C6+k?Lt8*zEPl$k^Z(k~YEB8k~{Y5@Yb5vCsuP@C!%GBtE0u zXUYw`ok90QEgzrPpdQiCaAiJdTMaII;tw$*nyzhmNq@QC{~nhL14Fd$1!34S(QlN2Nitg}VcMR!I^%^GmC1Zci7nFZMV%7ha;lUVOlS3;W z!a4~zH>u`ffFQEf`jR4OlAVz0_1m-$H z40c-ZNw?d0Xl<7==RUm309+%S)bE50F-Y*)FhnFG*zc7WepcfZq-SN$UIyR5b`xxw z%TomB*S;I8`t1h8^YHdTa$ux2-B7GOMKo{w^>gY8`aM3F4c5Cw5Av0B5&id;rfvVE&HKiVzU$y_jkbvq zoo8SMe%Nga#+V{i<8h^j%W&T0I#l~YP1EwH%t6#YZ`07u@()&QlHQlu4MPZ;{D#hP znLl?&CvMcFoXMryujjpuo-+FxAw-=O?asDj*)d7tP*@Ipy2fr<^e4FV64nRI%-LCv)Bd@4bCOH_dTY}!gjO{4B`M!6U9j9NSz@-i{U;A! zx~indW^Lj%t$Sh z6v7o<%Ddv?#doAj&t-(ECd3E~KY@F>JsaI%3d}b3E-NmC|0qs5zOCcLjgA}!`ry^G zOh>m5qHsR^sR7v0V$xf~0}?+=QX~4Is(Zv^<ShP-RFq#CO<6&sG~+XrrFHa&Rv# zNX8GrOM5bqx?;GChq3-zN2rq<(L!VIxo2z%9Ez({LpbB%7kP#-FbJGgOXe?GO7dOv z5%o?S;(q_i##zBu`tg`5g><`QNxHZuu|Gn5=0=`6v8gT5FNyc5`57QNI0$$jTcUI8 zefbS~_su39&|Q2pd@4>1MYMW_7h~6l3k)+ZHgQBK=>RS;B1!VGn!LTGY&ga&e(IhT znh3ILM8%b~@CgYWML3)^1)2DW$vnSu))K@*3BR-?sc@#j2r|2boW(HorU zyk4u8V~Tw#<;c-|`!~chHV9kTT~=AmF%N zb?tO)PDh>UJGyZ#d(g$<8ss>eS7hPXH8RcHk6lnDz~QQK=9N^1H&Ui>Z+jl%`E7eQ zv$E}`8c-5D8Y!Z8NZFZm_@88@YxZAcg)EUr(}KR{!0rYITqNSbxz0Z$GfMNkttzHz zN-d-VFtK75k*Pw=m|p69pxZqTSUAVhgFk8?K>3$Y;m@sPkRzsMiW8sm&YOPa zuEWtnE#auWb=w|a(~8kaYnhhuR(Gp|T67-OL$Id9Hgp^kPsJw3AY2*B|tDhb~m2V)tYe?~Dt~X^TjVf$tw8Zw4M`sJW<%gwb*Vc9W@3yr{AksB+Y-tVtBR62l(Tv8i-9vWd|9MkUt784 zzZiv%q8&X0;$cmlDF&e~6yG0mEk)$p*ck$9y~D4%`P}u91^h!XVvJug=c{>Gg64R2 z$F;Hiwu!+wIttA~VK@5&@zTh`_YuGSh7ez>`aepD=Tm%~2lDN-83o)C!$WsEgLm`P zhh(l<;ht*^r)9MIIV0qctzk?=2_K=US>PDO@RaePD2Is=a+_Vn?S`e?u-4*a&KsKo z2(i-nBhy`~mAD|)Ab6MqS^f=Bu^-ShFyFn+xrQK%fQv3pkusf-< zRH09S_1}(o5fKxTm_*e0Oi5CAi@bSA8=wPn+xxs!kv_%`#40jUux@$R!Eyp5ezjNZ zQ0?IYY8MRI?YgRlmtt4y`Te?w7NhX6#n^N4_no)jsJx@=GIKE^uW%0w2BsA{n_tfz zK$gs(hVlqUXDh{?W;+_}0PE_P7CQ@yBUI(+j1x|*8(i^e@}k%)0#eSWQ`kFavN(gg zqJahOHDJ~FBf7=K4?=HigduduflWXd(cn#@^i-Uicv485b%$uoMC3#zvF@?i-PV(0 z_r{4C+`7ORLn(qWD*A@h3V=*b$<<8w9qaPQ*m@Gb)S+7$4V)i7pDX! zeu$k+zW(?S-8=cf%_%0Zz~ErOYAR+eb*=)XejZm?_Yx$kj<|c^kMVH@j7D3?_EaR>F9ZR!Ld}upXKP_a3$^L3JgZl zHgjSa(xU9IwfmY9SQBTxlr}8aRQKZogDvRBa9*+RV(D4s&2vd$^TI=xUvo{Vu>h21 zg`6ztjsUc#Zt{%d`3DAqQ#P$T0j@lHM}R4}%2sWbd#QO{ak6q>Z2Q=$zAii0W^_M| zEqko{8~KlT4f1+4f)k5=;o|kWD3i!4?EY@jryQL?jhZ^;5BgJotR#@v{T~2}&x4li zRBVFgBNyy12Lti6d9t{Fd&lGdztD6FlQ1h!-uR)jbnCwSB!Mu6UpZjWRTEE5un^s3 zjVS$UhAI8ZoCIN=a+I+)u2oeOsws_saF>9b1Z$-KAXp$SOo}YL|&zIZt|HY*&w3xh^rVem#FuA&9#O4Jkco zOsm=)5jzwgbjryLElfKqlJ7|{g4G(=9kOrP%V?0YTh)$t0zw{Fu@n<}W44X9@?2VY z(0ZKv9W>dk$eN^HIa^7JWhMJFI~OEEbdByyK5%qQWhK$CaI=o0PQ_2}S!SI%X1yP$ z-Ce(ZW!R9UX&THgtV$^KWvy+p-pAwt{60yrgDthrzD|*@c}QR|=}H)o?^>Ol9+6l2$>|0lPKl{R}LFDh$jDqJKJe=T`Sft9mAu2! zQ+BZJhtN|y!&SfEU@B1w;u0BAud~cAT+SYWm$j=>(ErNeS8JfgR}b zf-r)L6?g34f6=Ebn9Kb-N;h^wXb?u-KgzD#B|7MKEf=eoRR#n;$r0pyU8?=TH1Art zb1W7N-@Rd6R8$rPnjp1=lG42$Iq^)%)ue;j15$I^YBNIxsKE)PyO95sRX9mlqsM43 zL8~QOsYu+9nt%&qu&fb7V~1erw#>@jksFVdthF=#|+ zM}O%>$1!Ka;4feLloD1C|4;-OMpzKgjs`|#n*$gUs(0Z<;7<$pce%$qKIL=8j>Osw zm`hYC{`I9+kyEQ~_YHc%VV{c7YmSj?0@2#FB+L4wtjkiQUZ+3o$-0^`u`CWPw##r$ z%!-DPg@l~Q#Cyka0qT5x=v0O4E$NJfb#I+sHwel)b`%t;s`-^8Bs3ld`0T}$+N zd&0J9a|YSYK+!{tt;nuPm*cxtD6zK^-uHlf_;1x2G+QBJ5M3UOrp@Vp`Ql01!RhRX z3?-|=?6_jZA15KkvZbGcza|nEqNHogKMWH;EFgs-u}t=+FU?x)0qk-X5oldnQkqG4 z@v0;SI*C+|EZNp!k!`qDB9dX}Q$pyie?9yicxC#s`|7mZhd%B9B zme&p(jl3MIRRys?86vl#jm^559Am`)R$4fE zPtUy^_Hn-O;&zQk@apJWlMc3D>-UzO0e9@L|EPUO-Ua)GVM4UuO<{!1Ii2 zz?s6*egSyS(737J_3fad!ePGcd`atNDIci(Z>h!cQT8n#$LixizG+&CnQ2j-xbSSN zBXx)V9_^~TrYo*7)VKl~e$KdoAP8C$E)&Mja7Ml;mEOOo1??W9xOU9cq zu#zBm3VdK@zGpR;6yT}~$J^u|vS1+nVlU3YrKxl$J{XRu4}h?`A0j62*Ltwr4G+d$w)%f`ZoMshm3ZLs;N0M)+7hD>eJ(@b01KT%5J6co;SDbm=pZgRras>>Y3d zel@Pvq zvt_=@k6m4ICzRC4cyboWoy#aH;?);(DGzHkP51=t%f(zE|2R!wWhgE!1?IXj9y*&%x+hf) z96waDmUy0wbWD}HOREbF>5Si}8$M^WKE&5$R=yX=c)qT%EG+UWDsq%9bg=Q$$u5)` z-z;pt)|fV*?G9AzOaCmLdiCaivl9TwDgxu|15Tp6wU5K6WWt>=N*mTsY+f@xR#mIA z=J(5ZHq^P=&xRZ6*fYjX0Bl{~F08Ze)P(jG6=}>SdN3E>94C`Kd(0MhSx%hxz>ZULs8_bCYoA#5 z_aTD>SI>`#&iJw)^z)0H13LcFljxDlg@P3xu0_wge#YrvG?Ol)rp*lC++4Xl9C0$5;5R%dbNJ*KhyIR#Jl`y-+^-kfs#Qo@VvAa>D^Hd~(4 z-?7_Po(UR{KxgZykhmeIQ+K?`Ea|jWditM!7>C@Yb!rnU*R^W%J2()nKg*PdO0=hT zlc_2{&O^(9>8&40pb^cXxxiD47+F{Ka(y{ApZK0b&oMMDJwNki((BnP?yac4A(Qob zEN(WlxrUo2kv#!I&_ZX2!b_)LP|q2-!ws`uL8HN=s_s(jEsGQA+{LqH=EvM{-=*a+ zxUSBF!#3Dv`Lp!F^BD=Cy`cPlE(lDppY&S1-5{7JxENM8XV0K3VS6l3z0JJkh_W2- zvt2xD+;>gOV(iM{X4CVyV)cqzC(st3-tC+yqPzZj0$L3myARTN!qcT$y>a{O5z;jA zmJ z#arH(Qs~p>>Tsr-X9^X2QM$L>ljlGyLPjdZ^ZWu*BefBYsBE#972Y~eZV7W~?*(x; zr?ondnLfKiS8zEUok0J=8Y`j~9E{nb(ETvS=zNo_NGMLh#UYq2c|>{eNQD zcBcf$Zr#Fdh+s`j;cTen!b8AFoonufKPl4;c2RW>>k}$UxeYKB9qW+!^Mr-oZU!(P z{ws0+tP~>B?7WD45+tCkN)GmyiEp+Sb>GuHe$(+rlN*;NlAAKeoDj}06-v|Nz%N-K z2dI-47P>Hk{FN=Nspw2zRpFNKYZF|2r@XYa2d<^Yb<>)#RQFG;n{d!-AJ17+JG6ty&Ny#f z&{24~t@!|LoXsx+cY|y|91k7t*bwq;m`~6x_mbHB+%D}8paaiwb;`|Is@qGq&c$ulcRY5}qLcXkg z-R5WH>G!N}Qiom@vTH!|vF5iizi&rX+1XK{4iPE5{$7U6va*wH(BS+KTYjfarxlfJ zkqnVu{}VG0uXj$#w*mW>*l}HfLzdilYA4)2{z00qJyG3Y)vqfHftva?PDV&-!hq)8 zQe)WXsZ4T@|&7v z1lShrG)NE+)ox=&T7bN9&$ZG^<3uN?{BP7>G_R=PdceH=$8YST_;{KWTq4$!ws zkV=)Szsqc`cnBA>JmJY!>&YA*DjgZHAnCl1qr?@z z*Ll%lq3k{c-uOX+JeMLS_Hda)uTQ)Z!g38axAi&cWAv!~$M%TDh6jV+Jp7>ZbmaQ> z=+x)T!8vbpJn4b7k{5eOXsu!*%Q3fEAzU!ih!T;mLtg{AswD~o2bV;p}zZR%BZKi6u2?;Q7O<<(|6KJ z65Ao=nRJt|o&6&q;oR60<9bW7BDSc1`p>PRqIH@hQYh3N+IuG)DM_JKE0+-8Hm{CE z(Iahc;Ls<<-)?8n`z}8>7tfssaS%~wgnnodDY={xNCsqd zCSX|m+)sjKWsizg#+jm%H4beq$$c2v_}{8rZnTqiJo8aHq8*md0`vx>lk1k1R@tbV}@Q2dq@!9Hj859I9O ziW<&$)+-QyLX74;eTkb6&CaT+COC&4P|OO^VO(n33a(RdJkRD9K_?)|*c-_`o|MkP zIrd~`#tzp%>6pP5+4XH5S`pV=PNaEg;=9>@D8%S*e$;K5=s`#*C^$9WY(}&gJD@av zo$$bN`6-S9{JhF;(q_=r(q!$>pnKwrq>qwf66iHhc%8UUu4stTc*SR2uDcl$1ROMz>LA(Zxe5XBXx0uOL4{P0)4zy&X$4)l_6S+?kSM zYH-hBRE1rdoyS-%dvy1m1ze??Ppg-p-HP*@+w1!P$-@%NRZFGNfnwA(0*B+m#H5Sb zs!5AMM`gzg=f%scH|M^59=R?ph_4sSF)=%Mtwl{@MQrmiS$pq=$5a&gNu@nJ1DPXT z89@6F<%2RVJcrd!!Yqmj6B1E$C^@?QvUB|}{!Q;E2~(CeF)_JUNR*>YsACI{`7%kH*S0j`+6+5iM?E^AHe zXEnn~-4(V42YhTV@5>_AzmW#q?J1nV81#lW5+f@rV`s-;VVof&yn$pMbWs&cate0Q zc#uwyp#(7@#|Fp0F}wauXbO*PX81u)3zF5jO2V;UU4Qoq#=Fh>>eBc~-w~;r&)J?D z54NP-11kD1B_qL}a)B^eRWGmXKf?=mPaSEk>3|8`l;Md>57l)V zpU@n9>spLu*~L51&D0owh$igqpXJaS5q_X73W#o}Em!N$`PaN|$2?5`z*4Rx&AWnk z;!%!-hxFWmt(7OYIk1|)2L;wCY9+!;5xvB4!$uy~_7!GlFo!)t3-iPu(^$3cDWJUP z`ICEV`o2#Z#D>4u!&P80MFq5a+;cC5+s>%f9)f8cIP~yFIy>lp4JN#eK{I|^j~S1> z;zW334ocacG3Xb{J-3UKT76s7e`jzNrq*fdJr7eoJ#f&{@xXn~OAsJyQw#bpmQN!N z1e>AOkm-A0!@pS4R6Eg2^kc z1G5mP%ENb5x}Vr@Jw#5ONJ1z4rz)*pkjmOTy>(MQO0v^!_{~;j)Z4!uvC_O6-0mzB zDf-ETVwZBG%@isdf`VR#-9h1FAWv=I??LA|I*Q%-nP?xZ*_|nwS3X+9p1NSztaDr3 zJ1mH-O|TN3J2ZFgS$qJu=dp8WQDZ-XZ1%sk0D47B3X(I+UU(K*P2Q4H-asZJVu|cZ zq0c_c`&1&V8okzDWk^`KgBd+bQO6sr=gW7Ti`&gl3co5<#$tPk&%6r3&z0Z4gou-K z5C@z!c12hU1fkF_MXl~dTyEyr)S z6H<_XYL0RU4iG_@qMoB}JeRScn%H+nD&kAlPnle?CPsuFY6`jJ{W(sdng&N6TmgBF z1$qx)Hl9$9Q21CM6^eD3b?}YE-%`c8(sfBy)6Hl_wRkc%PaCG z__vD4$3mIMcl>X)U=9c&wVwtYX zF_FytuWq=H6g$rJ*>;@-?<(_3h>j>sr~l|>fY zm>=aDP?AvC>xTwexqtXD)=cS-)Rg(7-THQi-~fS-pZ`+=00i@0xVQJ7$DgR0)|u@l zW;WMxl+F5SD1+U1-YDG86;mBTZ?%-V+D5dv9XZ~OFXi8>y3WW#wb{k+pb~lLRZ(%0 zTOBv0HAQI{F16|0_~zU~*Mp{Pn5Ywkl}7!AKz?pXdB@HBm1rs^5a9=5{@|$r1I>~a zLoKxL|A7Afk9M)5|B|%oiN*cjXigmF|C_)IDRt`QmbOluw>EwSDlh^OKPVeaf3qzU_`e>ww-dBhw|5kVo2C^rYi z!MX0FprqomBq#sFn}L?=v4P#JIPr8K#tgVKu{PNMtG7h_&dj#c3Up5^EvX+Tn>gZu zX?cyG2;T02*Olm6p9AM5sZf)&n8qxk#^2?zQeCrAk7Ai8s?1}`uW`i_7&iU-_aUx< z8^|$yN{lKU{>jF#UWJ#o9<_XF5QBY+s6y9aAD;_FrO^^x(idqJ=)1t95?Gn*Ut2h) z!d=7XLb(!^kCgM<82AnMMDnnN2?`XOX;*w|b%ASA(C|W^k&0EzOgmWYw-JbnOKEyg zs>I7doVpLkw&#H;xmZZYi=!V~gS(YgO%;qW{-k(kpqo1idLDxJ&h(#J1` zhc7kT!h5d{^p`A1AQ=DuA|yqkDr^cJt~!OFkzk=Fb2nu4?K5;}TL0cuqq;0_r|BWj z7c@(!NhR*n^xB+U#(K4y><0H43c#lRNC};~dTi?-6O-{5x33KTr?PQlE`OgO0hjWA zcZPsQC@P_G6m>6)Uf0$KC1vnH7zO59<^$C%>J(EynYpJO=5H5E(5K8x_|Oh_H!YD~ zf7CgD@Jenz<$h#Mc-0~W_#m0F#_k5jD5Ms=QrN(%&mpTBSS+Ah+x~Q^I8^DvZ_syN zS^-!UwOKi@TV_OZmCY^g7aOx>>`>o*&r^Z2%u#m8!Q}iiQA@#UMB^aJr1M3G4|VR6 z0A@IfZC>1LF7=+oZqQ@eNjMi)2FZ1WeG!vOuWQq#fmDPBv{?P1}Z@<&s`)@VW; zgjaG;S3!dbdmX7U{ZmSG*z)-fSw2hmK;u8$qMJ;^%L@G`ps(F`iLPtc&2SxM7fg!W zg&MDPs`RoQ`CQ3ly5%N?73rH%VE?8VkVY-`USxp2C+UkQI61L zvlGFesb8eVX4{Nxx*()AEi%2?+c^s*F}0tA$AkU0)|0-c2a6f-r^9b{DN?S>g<&k$ zXPfq&2rzzctT4Q8Bz0ESlm3qOldJqs84d0$KO4Cob6i~MzKA|_V-UTJ-gf(p;Akpq z_U@u9Et8zNrw<;_1kvhwH!}(>>mX^rps^!%12Nl~3pY%`oP?t&c7Z)82!{i0jXya9 z=4?4WNUwjmXOnrhmN90n1#dnxP}sXO#UbD;;5+K*_@V2%QI_Ft-(ew0aQi-sy1!mI znW`2=rgO!fRfK*!ivu$^gMyGw7t_Z4sjVjPMjB=LS>uyfO4#HmO6eq7Eh)piH%|N6 zAkGFmjchwm^=_Ni0)jT)>&C>57I8pghtB_J0`or#W%zUOQkxkO@dfO}#-M&QrEQdT ztW^(AA3`;7#x;Z)~UE-k23eRJc6h4>WLA zYvJ30X(85XBfYXy4^mR-Y8PqV{*6|c0-xyNv5zUIAV8S5{@#4Vd-&zjU4A6As;F{( zv{pEFn!q~O@;ijzo4(q5NckLCrS0JiNz4vNy2;^$|0YW|vMQQllV;il_MA%+PybzD zA}@um-GothO6oJoJkNACSo9DQ-bFVvoPguu=i zj;%8J6*<4Z-Zj}>b+hH(;*)pD2s-a;nfZCzz7!tvoCJlru&qb{gyC^LI$-(^fCio` zvJmkX0Y2;O+?Bo}IQwZ8cd~J&8Yz#Lj_tRLd}3 z!dIhtJiy#KvcVgtI<^W7;Or;~-U-^30h^R=YcBTiSyNyVD+~@gKDE?S#WGe!ZR-njbqJJ@AfVXYn21TFRwO<2rcOIqKHh7sN|>YCmgkoMSGThf+R@!CWrub-!+17(`xR` z7Fh@FpGL`!Zs7T{-O@#fxJAP%S~8ifb6BCvm&v^+jLj2+m&|K2Eb~35Z$r*Y(cPx6lenWH&ay_qqWn; z9jvH1B>ci1WHHI!yaC1+9QMZ`3_|->yH?BsI$DA8M;>zY)^C=>RbDaPZ723*xw6{B zA*bS0ot`YimuxG?(GEuMGF))bq=>1>z0z(*dgY%#r4R45o!;A0eSInL$hZKPN9-nA z;g{sTRCEv~Qxq`KKZ%S=gg4)19>U^3-k9SjSulU!fqhTzLr*VA}UQ81{#xnkq2v(X()2pTh}xkR-fQQhk@ zK0TmaT1_2^rpDk!4ERc32wo6B?XodhmiF0xNFSL0dQ>ImJAGn9{db3=$Hu3&rJ|IvkK8#;?Mq54IY%VS#4 z<);xHmotFGbN3j^R=eHz%&N?SK|T*@mielS!k@FwgYnN%>{ESNy2QEb$ViaoOqDW& z=^5O*yd><|1wNZV9CW)%#OGf3D-Fr4TEzHm*NL2NW^vy0WPIPU?zjof$7XxI%|qlr zdvTm|n`}GFbZEKr;mq`V%6ey`x5&U?kLnqLvg&Jn@`b)!Cfj9lr~gr>VZb3G)Yt_i zeLcgvcTp8N=>dck=}OdQIRTik(K_3n>g(=IW%3yLVtD#^kIlxw?6ZU}Kb>@(e&+JF z9D8YP9#`&aOfI8!eVuY8H|hDRG;D*C1&$tPGub{l*R-{qK+_^QnmilGs=H$XKMT;_ zukxJzyjfS98`6uW3app}jE({}$Bt>pvA|8HqM<(++4bacOWC-gs+%|*RAOa1Wr3d+ zd3nZUcEgKRVx6nH7@H5x&5Opj!5t)*;^YO253L&ID5-pTGAZ0=NNHqzG`ajH$2UNnceipOPb3;w)9Tl| zTU~Yds4|g#CZgILmCVjadfvu7`gk1fkTI~h6kuyQ!7K$FT0ZCbLXIj_{^0W4g_aM5 zems}zau|zyjc!(Aoy+(sypg5j&`}i~d5Qa^l+&ib^X=}Ih68xep>_^^;s&dIsbmjF z^Nakpy7@)Z-qkh!S@RCJ8MnOPbAT{KWN@ck6{*}r&&P|rqiZ%G5X%S?tHvt#ZJ>5W ziA7G~qQ7IGh!f;+vuD}t<;K`xPq3^X_BN-gG6*CTb2C%cTvL~_$(U<(2+f+*HN(dd zg)9=jgm1y=aF0Kc&aewkk-3U5E%A2BXuOY4Am*4kKoM#ANJy|z>zG;sGDN%rXvTbn zK$}Yie7cXr6s%G{(j(O1{#YrMjO@gziq>pr0+vI5r?Am0ge9If{i_{bP@rRiN^tF7 zF;@w+Xsfx~fi=T}&ftEU*g~41%Ap`Ac`s&+&~S zK<-Bs-6)gKwl5H2=EnxG&6;*Zv+&1MSfVSyMFiSQCwhL$)*M58`YdOad|C*~CF0Gb zbHA@T+78hYxF@TsW~Sz|Rq4 zrj>J@Ww633y7{ELXprEzc_sN`#k5Cjl3om9fv1mY9FVDY(UuAn9mg8=uek%J0JWrjARjK ztYJn)TS)uB`81nH?<+gOf-%~GZN2@2{N0K|f%B?b4U}G35lm?F;}knPMb!Cj@0wR! z84DFKgCWBKnRWr^N9FY`Y$52uaZ{XQec|MG82!^mN}x@v$D3}!=R|QuVxp+b{5fs> zUJ07CTi*91vmT@Nid_xLeAcaAgFLh6u8}aa&tf#T1aG;JZ|j zm8zy!AH@!&R%(nvxj7D|@Tnn1fbC9p`Y+DPF2%rs<^~r z#)D_EB3=jkF*jVJYS*b1tXIZrJ;i1fd;3GPjD_MX_YLEGhU9>QsZ6?%mEDzI*hs{n z(<{o~2mW}?fZa&+4T=Z8oTAT4&^mJ5KzEUIDMl^6o6S+TvTIOnOy$;#4{n;ADXJ7cf%*QLtmEj;&Zx!XCMK#)#j*T zHuT1Y_xCSX6vx^0)HXmNkLFOO7>a5`Z{mpZVs~dbzOxgK+PxYkAU|(kd2#EIdUlu= zS|EQ@iZP;~esdzIO>Q=>f%sP(McD>l=<<(1d_>czA1}Lebq_Ch_)nA81$GQeQDU5z z5^prKX$3wxPOGa6&Myl$8p$3vKWox>F9zLbH`RFS)7($s-)lKYbFnAmmp1TsM`xR$ zow?`{Ac#Eu#wTvM&Q?pZEmDL3owI6PcqWO&Xf;fZuXnvZhbGTRpJ(SEwC)zPsk{I= z5W*xBaz_?t9qw(Hsx<=QgVE(SMqu{F2l-R{dQQBuRK}z%DWt`uI3a1Ca}D~UW62eR z#p)diEQjROt%roeAHAo7%q#I3QmY9R?RCEBKlh+-GdXVC zkqVs4nruG?36X)SRfQ4V8!^(8%F^SH(wXgdGsR zpuE)1D4J4u^w(MG(rpns3A|g1VuP$c&|jN47M~0WAAR+$-1gO-9EqG%h_?ldDZ12> z1zcsg8nOwNu5Og`)>ZIP5~tqyjr!tyB}F1CIpc}JAwiDaiDraLA(Xoy`qPKmDl=$0 z6->aVU30AZj=IoVmYP$D&S23>^Q_G$vrmy!YWhRf_wENAlIs=DvPmN#U+|pW(O{EV zdbq#+meFGOp46&V;))VcBr|Dn;oE0SwNlUgpp|>1$+s&w>VzR<^7r0GWWG_@PkQk5 z*z`}~J6(*-2P7mpcFzuZlsJ5YQf?$_U69q3%ADli1Vk1}&M9=g& z89uRqPbSA0iq&#bNmSs8`xSik70`f0TB>vJp6r3Pt`O zk^xO|IO9Pfij^iuonh`&}3)o{Bk2`a~j7C&sf%L8j zF@^-@%Rcr=;W??)H2>FMR2pz`Ou~LgS~w&126K=>L&Q9p{h|b(kQf`5ySi4yx`rk% z^gOZMyEwxiJCeJa`SrPZrMCx=;7DdtSM2?0m^{U&BOv*fQ-bJqYxOxt+R8)luxGM%^33N zHu=r2@G~I;l0N*dBE`*~Hf#H4|DFz__xZ`Kx_@vgE~YVS3KFId*iggxq0YQ#u8szW zIyd{|>G}GJ0q9Ysq%m{+r1y50S?HE+Q64zWCLmZ>pMrJIAoq&x5un5DOZ?uWm{==b zIHD~C=3>eHRxFj`nJ9|l+>^L90UvN(hN2N2b*|F&7Bw*$ai4kT^0$laLYtAPv+UiI^qe(1YcPqN- zdrPF>`Bp{ch`#oi&>wSO{W-Arn1w@ms*wXbIEFYcb(;@o4a_+`QXc79H2je0td~byStr4qRzdSj!Gxvn~R?IgE^xAMyLZ z7GpASnKvE8#}g|6O1f||_a5+MdV*;3g7rppHU+>bW?+(0Tw(#7tK9F$v~2N(-f7-4 zaXCc8O)d;Gw)*S~(RglqgYzo>RoftaS60bq9^ogTncHv!Lk+g_|0Y&la0wQ=QqY`eG7B20mYI#^p=il}6S<2VxuV#N2G#uIm1!EZc}JzqjxigiEHy*ClS7S=hS zl7C!d2XbqcdR}~Js2N?ZG0|bdgS4AMOeWafG=ww8qp@ZA*yc?4j?#GELxu77VUOvy zl?k80dDh$+ExStO*_@9)xi!`o|Ce_;Wi?D(5?1>ADY?%RSE_ou;i&~)kHlCPDczK4 z;9Unr!4y{Wx1FIes2yGlBE4z{$AIXfxPffS6wYqn);Uz1D~HL?BW;)J=RX!9aTd?vCnm&0}1?U=@sUs`Wgg@`4*>TnJ|%%?nXLLc6G9e*Yp z36^S8KQjZ&GZzZ{ken#mm+R>C8VEBRp!|McwoZr%)VT^PuDecvwP8Ordl3CzUessD zH{=e_Z}jL*OVh^7ciP!v>M(oSLl+#Qu}78UdW-1M4M7j$uTsOxH`{o98b zzBfyrW{?(c2hdEqOr-P^R8j;8p!M7DZNTy=(>#tvr3GTy`+7z{$v$fH8n(sReGnGs zf#3Xrlx+lwS<0q$!BrWxuQ#Mj?-RF?FKQ8V*n+YAquZMa+I6H45rPi-Bw0O%k}kCS zU!idLqQ#$uOSnDJ5hPqvc;?04m=?Q`TqW=uOni0sm$>hGfWr~RKIU_G1D5(ZV`%8v_g zl+Go!VFwAs4$_VY?+dzaZU>i~!uzBwoDj9^*<9UcEH1XLO|!}aSY!jGWk$%%0PB+3 ztBzTnOQ`)`QMj=X z)^N6;D*w`SISH6=zTU9adsKjFE1+lhXg9RKo^nKw29&P)3uX$O#~_U#xK z$D$eL?ZJ@qT}d1YQCUBzXG3rI7#DU-i%*`tCMXJj4uREHo^#5VdC)RLz&#<=r!ihU zd>z|sxmhqtTe%Q64_8ew`{Hp<$6b}i6!j`;1Q~nV#ZZg_U|D#?@QYfe9VYR6Jq={Hju~ zfP>sbzu?qe2AcfaCeLuS9|D~^fxO+51aj02Eb(E#j8Bg<#GHO69hTV7Q%91Y_?Xoc z#PP1rGYZ7E$H~C3C3H0;@5BXR7EyMYaeYFI?{u@pCP>oFzwOpZ!_*hHGz>>c<;H7z zNawtJMTq9q=G1<#n1q!>^_>eKnX_ev(Ds@kzSV_aB$Lcb2&p10d}gY0x)qLDIeN|8 zh*mI2}C-8wQNg%Re7j2>-o5SIy;cQd|Jz0rq~0}|XUukzE1llRnpm-$gv%IOKs1w@a; z^)Yx%DBdLJG8ZZsJ5E4Fl-$^Jvw)d76wElkozF0kNhVMDgkeNj!cr&k^nma4bOJZ` zU>ui=xbwx&GAyf(`V$Xi`ww$Iu3w=eNV*=}`aX`#{ZHY$)3)ls9#MuWeSsnm>Xud| z&Y?ME^F>7v4{Ow0A$EhdA9^NdCZ{I5;NFX!AWqEoLF!PYk`hvW;aSIA;1wjF7k|C< zgCs}y>HXswD%At!7q%z4J%w)y6P18KUJ63(bIJFEe97dx3I1DSkcZ?w^E7jf*|%6j z)#-d3v)_}&=4xe>mTQ|KGy;i|qH5&UrToM<@8$Y_ctw{jN|i*WiM<4*d<0s465#Vc ztD?dLzPacV5^S=de@eWlv32E>rI;(FUH);F`U2cz)b`o-7Jb6&6xCc;h(_|CJts35 z-Z2W2qA|hT$t~c;{T1u#F}$SAbY~ON1-!*UvAmFQbvSeHleE(@t0MPJUJ z#DT*XlK34(9_ll%G5O3INHmwU$W-k_z>_4~>ENANhZV|cQ?pITfxw zQEv9;7NEUIKK8Xb1ZL0n+Mc-Ibj@ON+8mY1^g#j+iC{2m1>rME{9pmpQT8X1 zGS@YCL&k5A84y3P0!|fKdt5?fB2hG9D1MxkL^<7v)Z#)t>vcDD#&#kaH+Wcc^1bR` z0e9%C!zQ zQ4-dxhwovHr7`RB(g#ve90E_e4*IMO?w{$W7S(!Ql@{fT7)|5Vrp7ERyvZiwjZb-s zA=_kO+z@AXgRkUXkz8i90O9#Pc}0~JAGAIB^V6d*7Bv#WIPPb%zx^PB%?7kW7)CsX5D)z4i% z2Q3Rj*t@}!u#zL{#I$d^#nO`p-+o(ak!=*|$|Npx8_2pz@nL}Yo(%<-H{uVD0v@|> z(WEMx{|i*Ou8&i|j=OK+GIlBo6>kEsYbJ|D@wsj1gP%a?E-Gm@jL?-RlD<#u$bpwK zRR^ov>`I^&owCZeSL+DIq~Gz*^R^#6ms{F`YHy z7(G!q^lN>?ccNHY)^F$9ha)eX*6>43^18zL0ITCY3s-wvzfXN%l1%T}rs02|=UP0Z zEBoR<$~Phwe*%GYbYA9(bzZCy;4Ry#$8!KFXs)Q=s$Po0x??@C)!5*l^L=2d_w3fZ*Ren<^L&NeNmwoHv@#3MdF#;h9ANP`USs(fOE_%VcAbIl5z|jvK?lcE zRhnVoWe=qHEXbiTGkVYxRJrQ4A=~ieft*Ewe`xoF*?t{$1lWLySt*LmXSMGML^j>K zNk5DrMc6XOyQlG2LWxrD6YV6E2cx~<3#;(L#}k^Dn1k56puTCto}r%oJ~Bek!k$oB zl=syCK%L`rIQTN;kwKpPlM_IjtLY3lVKn?u+XSd)vnp;@ z$_glsN>&Bn)}WIHAx(U;=L}RqtjGbGIVeq2KCHngrx-t8Eva*` znrxEBwbrsbYOP^~RwsAZGIq3nCWkC&{$!V1Ln%zZr#T+f5j#^R-}u4`@0_y}Q*_o9 zQwA)N%7lAc{4v|p?4N9zd{N&94vQ*6spGnsIvay}x?Zt=cq@b-nxjd=$ z5s}*4nUvP)(I&>mwrwP$&+7>v281%#(r-2N{jIMvKzrp)U(hBeOa?ajH{Y^iYk5ID zHq@ts4*;(6&SPVKKMcnk=KBn`X%4zv&))RVuK>`n^vcQ7uy#tioTBr9aQ1AlQl)-B z@XZXhwnT$2x6JZY%n`A@Kbil$96hyhWpGj1f6$e@KF7gxJy$yWIGAJ*H+FHz3%DWU zG5S}#2WhyjmAD|Z-5y0-^k+AZXY{$9nqoyo@ zfzB$nn$i4k?H=So3%i|E0#C5hF{_gtQ@C;Ch))`}&nT1(Y{PZa%D*a61&0;yr(9%2 z&Ik7TTo;uTR)hQZ-UKJT*bIKjZ*0F(3GhX7P zlWO9?Tq4<39@r*26}p4(#5@In++!82OWfXxUw_{-`_rMX_cE0|SHXPl?p4nZd{)`} z&Fd9cr<&|Gy2}o$F4_rf_$dyq;UBPV%ZQG=|K(X4wK{|nXi9ioPOA@|+G($s+-sw! z>2~i7z#(XOF_0QiOJB7e-PvQ)fbej6_FE|YQQf1g^ns8(O+*BxJ)MyQ>>I((_mi3V z9UDFid!Al599h!l@La+fR~-Crkfwd6C0;CSz5aoaFlz-loTV@EG`CoYii>R@lt$^% z(yP2fK#L83tQ~_Ksp)=1-#ZD{D(= zp5nXkmI|JKQ35S^vaWT-t{vIzywS2KSO7xt_@s%{ulie4VA{dDN;f4o(lWPKQ^uHS zrIevOtsQj6;4}4CZ52|_>5=pjqlf%YpwY~&*Bse3#9jx&!*U>GosY^I3NCH^P7VX- z!kaD0iemK(edLnuR_95MZj?jmCyd<`UlKbD^+x4?xe4o}ks&yW2V<$`6Ka20Y+#pl zfw4JRi>i0xAZM-ooVmM|cA=nuVk?JaiS^{~qyGPp{CBvm5J1Olyk96Q7=Tu>n6~Sp z^&N?zD)-qB=KFnHq6Oaw+`p24^+kM^Bw*r8c0G}m@I@>Pm04@DCy=wGu&Tg1ecF+j z18APB^Q!%%@lhPYP7H$V8}2h9u0oZQCY|DiX;%B4n@0r0^h8u_`Iljx5y6GaUWdtq zIiw{z6ej|XI_|aJa1IoOhea!=(uJd&lbF@9Z~*bI1g$YCaGt0QNiP**zYXtp8(-0u zx$5zoMqccY8%5-{&*?`JljIYsns)FCIFQ=lTml&)sinRL6#KiyZtkV;pZsEHI_{Ke z7kA2a*8El_b%%DWe0?Y@@5x{Gn>kS513(gQzcwvuT5B0h0Qd#A?)QYH7(AkrFBd|o zX0qotB=vj|;WwUW(~8-z>)A06{IFDmA}7keScpL$++OIC#i_?}UnCD~j654n4cP5i zlITxZ!jgF7gT3d8+Fy*|fT-zac;dYupo<)}X=Wn^>CL+TpaieFxCB}5*y~#GnTesC z@_56H3zQ1&?UDB^_>G>_4q2_?hQU>x19DFyO>&Ddfi2JYXo$!@RWv+|&U&k@sYHl; zSG8RWU(W}|5BCE)8Upwlv=0}l9_dy$`HtGw-O7Zd{!KIz4{Unp-#TDiZR(Cfx#E5r zR9BVDL>gPfEO&^j4mHHDu*k zQ8>o2HA$TlOr88P^=L~H8z0AHmDj@=M8&h9i9j;T{nNNYG|+MSgV=<+S`x{a-+|Z< zW7Y=Ubp8eC%SDC**<>aJ&_BSZ5#jv}md}ddc0EQ%R+kPrxFrfx*3+j7O?OAr&;3pO zN{=}0GblR7?-}3Zg!gvmWSt?`v)wP@on4y6(p|BK&tto*7s31mpwZ-utQ^2MUBkv@ z2My(s(C}sMZX8>3lPwo?U~Q#~8_`i0!k0{l5$y|UUAF0Cpm1X>Vlh1B@rH`QhWP#l zC8K-qM*H?or?|dg{-y)%#~>1Peh*eSKYdb2erRcPS}-Yj4~v>q>{znv^ra6@vM2p* zOVz9??Eaft_dalUU7TtenS$`R5CQB5$=h~$8zdfHj6_70v30gn4v&dlLg;m-EJTgj z|1_3rbX%ibr=UOZrR?IZ!JM)Hj-W9^X}QFyLP8b!RQJ7O&4uML_#&@CSL#)0V`n{GQpJnU`Aj9LR0ta34wW^F*7zDX?-$Wh2qL;u{Rabr^<2l7J8Nc##C9{7&CHDN~IoOQ|mo1A|1f#p~aWMTzh&ro*}ZZKyp! z#-!%38L;~`zO-ZUZzPpg|AmXd+nUlkd;91%(ZcETWaD9p-RU|?hHGB5=o2^p>?VaO zgS0RSbPkzKXP<5eQC4djRhQ45>-0($sc-m0Sv?sN<hmwhoBjcP^4%{NvxNn!&z!vo zAVdB?Nl66JKqXU5e{Pdpf|xLw>%)`}^sR_Q)}X849B&%luSijh`%`78ec@7Gd(;vf z^B>tK`z-FnF$D`ZvURY(X1?O{9K^cdWNXaDqT)fRE2 zUm~lRH(<}w%kkmyLP$@LC&k6fiKqE1&Gi+igACm{=s>PTG;(2M$e0!bo9%UVactn| z=)hKKbo_!pZp2i36>xHWn~2Cas63V(8nE$u1l;)7{Booo?q)&0k`~FEU8wdRp)At; zCvP|zemuqX+z4I!1{aacT>@&l{)9o$H2{|iIi^>b-t`LTx;znw{WmYsK-*=ezG<37 zcX^@1nr#uaKy8;Wd=+J<%Nn*XP8D8*M&Ho5{YGqwZi6ffsvzXGi#}{09A;FW)^17G z;1KvRTygWSE zcyF4n1JH$6UIv0L5?Z0w7zEN>&}eiI^L65=yv?8k%2FO#lF&2NF}v@BcuF$zhYl73&B?0ewhP5sbOkF9p}E0cs98D1<{;_Uf-9toWw z-l@@hKk@DJ;3Qg;shITdI&At5{lA~#!Xduy0jg5|QK-R$j)4_;KRWbZt=2!oj`L0* zs8cbjuC!WQ(#`p7`;WMX&lhC_R{iS$2}yE}VVX>PKjMSpYCXt5?EqTIjWL(TR`!fy zjd%%1vx*p`W3|U57^(2zbaB2G5(BOizmV*#su20?*o<8CNVD*dQ0%_$(?D@(H?O&^ z9t-muT8i$}L#K4E%k6emy;EDPc5g^5 z8jXFC2gvL^d?Vgr=K-8;=UqcPD9zJkVter4xb!g~7TLhRcYf0XV9#l|w{sVSdMjRD zRWIhE@T?!h5kj~GM}@-rUYl%X!;#g=lfMZs-JIG#K_Cz)eR3!jM@y0M>;4F*;Oj2O zR4335134ulLm#a)D;}R~OopmViq{MNqVx&n;VVT7 zJHJ&P$O1_{M0josMcTKGe~;5n2~i~PPNI&$n$?fIOaHQ<3?DIKLA%z!52Nt;Q}z|H=)*BhO^=LJQGj3 z0$vk;wtKUTh)~I3oJ&zWVD}U+{srBXyC!amFV26?E1_&!7lhBM0Mf1akt%a-M%e<- z?7Kf)3{qr)m|}n;ao{iSlF0l6s13}Yt30F`e$U2m0pj{X4w|lJCL6D2ZYFCYuyN5b3Sj8)F|ah*xjp-i z^ZIb5y7+HQS6mHba5lWL-{s-Cc)7=Lg~Vg{qr@=x+8f#s*Jm26QvS=kUOk;m+M#QQ zJneW-VJvp`+o8LI>*u`GSj-5;wjEv@_W>GFIVJZoJ6r}<|IZTck)HLaOK*h!fKvoU z@K;>C7^A7jVsss=2;t<^5Fa0SsCyrb;tfC#8f4*-`Z`kkW=_1Mn9rp8NW9m^Vqbg@ zA*0!D#6bNDgFc9JXvD0*>LQ($YX9=|*!&t&?y@COZ23VF=K3NOJu_i2#EbsZe{d3; zGcu(*E0DwIkn&>fC)jk!Li{g_NK7q7b=J)-wZ4(^lZgN#eGpI2qli|>RG0h5=B znwYMlmIl?akl50l2i>w9cI=DsHEsWguHX31Bq>80L_GSNFU8j-Wi+OX*^gI{W=})7h-k>mZevUFduSWG@)y*G9AWT z1`FaVM1!KvZlc8gkcPm7Zwb}2^rWhNanF$4gGvQu*EQe$S><4O5VtcW!9a)M$0k>W zyHLlJXCm=PB;HICjwFV^fkSJlxhh4KkrJ4Bq~`UPRgR6@9|q}F zGXBDJ-}^r!<3CGSOq$k_F6&y2TVt;=oOl5}*nc5*}_k5(yo=rPJfFy%GW2!}p4 zySY+B_AJobf~6|j-P3~Fu}*GSB)o+1{} z>&|xO{U+SDY}B8uA>eWxeT-4({pR~WVj}Nw{z{2#rHhQo2jiiW{(p-k0{$25VaI{& zBCl=Q>)td=`X>)ziVPPSM}ipi^-T*Apdjha_D4zN%=5O45=P^&dlu2Le0N-%z28-m zocdYI3IoZbi@nxGz=jTG?I)n@5$k;2K@+4Vss}7`5|l3MeD%$p#~9SV=mc~pN#)mh zZ|+NG*gt=GkY8WkKKtCA zc>z2fm>I$K9|DhMiMhTf?8>jCnKs8^zWwtf2gF@@pz9Vk2|iW4Hy)L9l;6oIzM|MG zmRVZ=XpRK=!zG=3|7pbkLr7ck1w2GK&02TJX4A!PM{ylu>RAFL%w=#mcG0g}5HKP6 zR)X#BKSav9MY|0rQt+TvM7FdDpU>C5e})0@TOi4oi?1#}6Gv6ynOA$6f6`KLEJ)vJ zvel#VFrCqEB{0MgJ)u~DmjquX7YyZ{AW4AOCF$@;A|bPzNHw(iJr;oB8lQ593#R!x zy9yl?M4s#>W#F%mLYCN&chod5ugtBy;l7_Fs)o?)liINOcWMl>k+Eff`B`?F-0x|j zFOFf^ik~`9^d}OjMl2_$4IQ_fg7d)k%)vk=rupiVK>6APzItt?*fx8vtMxnOITwBV z7J=M9z&tPuf8=#ll4P&lg00#)F>+>|3aRTeg-EZXs|g*pSDm+0iYg<;RV1sdR)!Gq5rCRYsw;j+ zExecl|JOk9hsmo^fP~(OBP@+E12?Ty33vKX(%3X?FvJKs(T>o6AE%_${6`A7eGdSD8ro(u3aPyy z$9S@=AA;vGt1%mvBg8TJUqm{iNmsAr$V8y4tbwdgL~?T34^-Sw>zX85d{Dv`?)5Z* z|05dxKqruq%w280J{BFE#d}ue{G-jDrL{%Rwn9(N8VA@g`bqXIPfOvCqz0~4s{MBw zC6+1QCE-y4^pJNAgH?J3Q%-HJ2zq2qOO& zgZHLA62B?6@m~rFzeBX2%~HBRA%5nF()_f-)>0c0C-jpOUi=>thwg8>ce)yb$oPO85E^I=LCw>cdiE!(w1c(TO|AIPnElv>sQctqxm_ zTX2l8*SoUS&G7D;*Qu=Wc11i1Y15Z>cfX}1>q)U<62<+2HU++=u%G5=oKMU8A6}o!bs;__X#!H zJeov{NR9W^$TatAN1LdRiNFf$EU~_~v+=&X@ljRA@l2Lv&;kNr`!U&T$A;o~oay{x zrXthw{))}b2Q%!0lljaga0gsk@Wnhkkd~`#=(PTtuDY!gFKN%Hlg2i~@1o}PxMM|fZ)jDE zBJjsv_4^(Ze_VY{y^-vKjj!n#4uS=7!I+R>P5%;&#Z;5Ma(@>!qc_4O**RU4IIcE( zeeNF`F~tAFlLDvX=0nkO>pcMDlyrw3l_i<6p^0=##Y1B^5^4NLr`D?(7z$kq% z!ra33os9QQ14HDkL8PI716+LhDyxHS0&(IY!%}W;bgx&%DwF1A6lty)!Yb8|BpZ1Z?+^6-}_}hd_hb83xh%zX;f?8epsn;bP zGPZj4#;%iDI-C$Nl(QWOg=akYXj+3#;nm_VKL)m*8LW>Wa#b|(gdp> zC2K{+BAt&g{>*(Irm-X#&f`~np8au`SNutgJTe0EOlbmLD7u z5e*N2XtDp)>_O<#gF}iEn0<6V5@55qA^jqYY@*TiQP5M_VYf?Td(iAsSk&lyTD>A( zU^*bacvk-qP1KdS^sg{$GilQthq&S_>Va>1(%Q_UrbXjI%f>wSL3Y$A5*!RTzqNF; z3yT{@h@F-rA)d*UZn`3S2Pje}-)@P~mWbizqh{Dvt<-)(bp0$t*^t~k@Rt>XakR+; z#^2xHuaCZIZH6`5&W^$HWthH+b+c}dn>hmAh79p(mA$5TF7i+Tqc5n4YhCQ;nn&Dd zPhcu<>#}fHquIE4J8!RR2!K60Fr>QCRvrK2&U;lV5_ zI?JOE@#BI0=V*rQjb|UZV{)nC`q!mcNR@$R_r8LC?j`=!k=Yy7>c@t8I`zhYep{;p zy_)_SMNhxrDk2f^j_6ezi%H!}|2igny@rp3l#480TDxJt_;l^raj`-!!k{~E~e`f$DSS> zFKBh^)il~2=pdh~tGZs|W{O=myS}!-v{z?5iBq4R*0^HCbJmdmT)9q z4BjL9Cm;9WHroyB#rO{=fBM1zeL{84$8wz*3V!ZB*12x9hpi(41buXpF+x8@6N@#E z^=%y#*L2^CEp&mYyc6YP>$2s#9=?`m_;(RSTxF+X0z9d=pc3pkXZxy6^J5<%$V&~?7G>QsP!MDYZQ6FX9$xpFOX1wNtx`P;wWCFzn6O7?RY&%G5TtD@NTd+H)@hRbjN zcAB4Ux0vL&_?5_*D&3SJ(=62>23cJWUwgXsQ^RkBCUf*uc5bA?b%FaV7{b}qT;ezI z(IyrFWy>Fy1@A*-5<)`s85X`~!>2L}?|Wpgprf1*Q#7pwj9XBDw83NvwUmb{>J_ru z&=vR=h%-XWpK@uFitEiz(6pOdl~{8fH5;_ak*G{5uRC0-eNkJ}jBY5z^sBfqZcYeJ zZOVjL?;%{$>duANmSn6T)IgqX1BVs6+T9nyBtNO4O4|t1tUbCtmvZZ(RCv^GZGCB{ zKi^kE*tsLvU@`8>g(3H*=*MpuTq*HQ=L8I#)8AXa{FqA%f_AulkVULsH?_QWKvT&o z20t5fB-8rpIRQ+VKN{IEtOJo!#Z=*9q$qPk(;e=uv5HT|i$loR8EHRq$_}LFakKuS zj@#Dl&C*WID23(Wza?NxO3=|Ud#E(2WA<-R$}{6S{o-J9;N{cw%z5y67fHQ3I?*tI zv;FX)-Rmck_bxlWG#hSS_X>~wS8zMX(p z_8l6HX%_|_7^~T44*_Ky!_Ka;M#U!c_JhH-G?4Bnr)>~Ndf>S9J^W4$M@k1fNn}Ty z5_MRiE5B$WQ9>}rAHWUW%_OAq<@_%NT7=dWp zhFbLc4es=p04g;!%u}(eT(&G1dzqNJZ(iW8dedN$Za@*4k=Qk)n{V$mkDi`!+#QcI zD?%EnWQ#j`BW({9lu)k^0kyApSGRLS`+LEb5{nvNi3LY=u`dN3!XjUsw5d9?>5Zp) zB}49N9D0;TTWt3zApQ@>3l#(2#DP(U*=ZFm_E>d$*psRNad&+@$DIUaLhX_GDlf^X zhYFg6hwI*SU#iAPa-SBl(LcM`XYFp^b-b1bQaKlmN z&aaRbO)jO%UDW*9a8ED3Nhdki_$H`Cqe`E+Olrw#Rl&`pQ8!1A6Cl)p+=ju%Nhqf& z^c%SAvA6@=eT!?4Bg96!jL9;&3b&`wnOThH^_t_gtYo+A-Wx*S_Bal3X6V(4qE3b{ zA(!2jBQ3D(X{_s(?&RVT$9n^>uh@L&sy{XvgMoMQ%2&9JVUsa^Pt#CKSK^=^p}iG# zJG_7LOJ_16zvf7evFfAH#e0X&%H1({U6Y?0gWvfXW~SE;Dtf!JT;>OuwHLo(6mPnR z;guh@@f4#H z=HmgZpB`$-wep9lB;sT~uY;26_wywOudMcFI#|if!BW z&V4`cdwke`_qX@{cdeuQ=&QQAuUg%8)>$>rxzlRAtuxbEgLg`w>t&!_Z3ZdAAcBH`A#M*lqr7l72b$8Hi_;zfAggzMM^}-9Rc~;n9$?;b$|z z+4zks;aQApH2||($g2bRi*q~k5O((v>``b^Jb1@_Tgm0Ix|k+|RntXX#i@NUoG{jq zG(98!W@{kn&~+7l_i(H&fh)6-rtg|sL+_VTPua1i^`~KG|D|!XMQyE`Blq8qe>J`D zO98m2V@0tX9MZ3yPQ`81656$Ie^j*@qy}e_XKE)I8pjbE>YT3tuKR8^R)9w(i2;J4Q7bh;^iys7dcU7*4FkWwi9^?Z?9{8plzbTU&!<(K=@ z9rJ5dXqDwsOk#ec@Dv6ROtZ<{b`?@h`W>xp)}H6JBuG|B<8^iF^NF|a_K?lgI*>`# z9T%$Y8W&Nu#U+tur|9FQkWPHoYjMBGr+nyrjaH?ooy98d-7s8xGJXO*bL7TtvE(6B zmWc<~cXNzP<#hmm984-T6Gb-Dtz=WgBIlJv%I7t;9LVn{g?%)h{2MPpa$2EKhV5%R z42sMoBeWCZIj33yWk?G5&@Au-I_ZAN>p2!5^qYjxu+{{+ z_UwA2-u0Pw1iUdiwZV#gyRl?+1+AS@2gpO|j-hv%7j5UfD%VEpn%|$i#Op|}q4m2s zQc0u*^(N+(zg`H2T5B{C61_j2BJLW_t=A)SZu3kNX=iFNk_xAB@XQ>I#zF|i)^t6wbmMwo-|8`?w`}7k2-O@)Xm6UmhpFwk<(=66H)g02sZTza}UFVQ= zx5qD`UF6^)sbV~+D`#WE_xYtKvo+-7bU;BB7o!IDNt1E2P5>y@@qdFh>EDXJ}^)Y^S3?7*8qk-yrCFCanSBpL5Pei20lv`)` zO^Pt@axj-nMGm&yt#;cb#c4!(C@T z#55eif+(UMwH31t%!gG2o@y`&(WzNgQMm{fdHE)OpdYm9a5rM=Ax3Xs2&TZ90d0UO zKSr^<=?&u$iB_jaoP{%>V-k-)-eIS|LgP=>5SK1rPyVSVUpf8XYd( zbzn)lsq@>Lpo4RJhasze&)UF`;p^M`0c%!cGDI0UI&lvM&SBw5oGn}46Gc@Z?{U?g zkpD>VJiIQmar@o@Ud@)lZYCgiJ~Q4${1u#ydulcBl_Fv!Hhp-s4Y!GSZyhXg^_I)E z{ufKwZHD7;1{^w!2z8)&!t*}vXyNM8Cu3zM%Vx;_pY;Kvb;2Ahf}LTJ2~Tna-JX*I zGu{oFLN)(ZdK8d1pXt?S*I_1<$5K%S((ka^pt&XU4Y?4P|ea>3Fjb0 zWM$)}I!kt#&YWR_L0C8b9dK0t;bVS;D`6iC487u4*N-wYujg`+t2Vw+O}4aMWvDesKkq|ML(sl7? z$+=IA$317aW$x4W3KB8km!K7NBz3|bYR&nLKj&-T9UV0;2GH^w1*_AS)o)y?t%4am zwbQUq;2Jj5W=6o<(N)4ykXLXy^MP)+ulc^lYfTB&tB{jEr(1KiSL|WELVSueG@ijk z`J152h-w+QOC@V5LUMulwj4cN`l#biWknr~kIxG3r9I>@``d+Lj} z@a%v5@%=N3!{Qw~aA`9^g8vXLhqeWcrptVK=NiwmYn%FcYNYV(lRpwhc07#Y>1c@>iGrL72attun&mYqi+eKmQ(!XR0v+ z7vOd5Gu!0!2lOGH2L>@%3*PgE9M4xx20uSGx;*!Ks>VG%@RHdaUks79rCe}VL&auF z_DEai0}4w%-N*oVc6%8`=oPZ@7=#3@v7yg8T!r^!09VRnNZq#F_RZYquGs56{H5M< zDulXQhRa%JGDO)7+;W0ZV~h4ay`m;D$wd{3YQx&m#5U{6u8iTAfVh%-l}eY{4HNra zgjbA0lA+V$J4p33CtHCI4&jl%;F(wpwj(X4@O^S^FXQj{)iX1AUlAlstN74PT?K?t z+0jog%a4Lc_(9rfV&1!H)0J%LW6lzi0FdVfVk@WP(8 z)0~>^Ka50QEJwzfId3MkLpD6WU4N5394jq<6h}#uwa&s=nUQYVUt?NCLil7fev=-4 z`ZM0L5j=84$#C1k%iMML#D{s+ySOSUH=CWf&;?qln)N=)G*!kqnChn<4i^JzgH-|} z_w;~08VjC)tl^(EJ8#I_kV`j&YoM3^hDqRtCGDJ6VdUpsAKMw#bs@MGe09&Fq__ul zy7nnUQF2@?g~jtxlNzD#mwa!tQ0w0#*_*vPJIgb3o!yXh@6b8I;{a^=K*0B{OYZB) z+0W+K+{?a?@p`)u#K1{Oyp4(?3cyqd7MBw9$A9r2!rlym?ngT_-L(Mn(tbv}+#``v z^aAI3xDIidu*cxy1*qczW)}S7g!E2?XlZQla# z*_Od%jE7(+GhMbB_9uez-nv0zCvEnQ5HzeS69zN-flIug@zD5o(I#Y)OD__^ZY@IX zhF^_K@ELiBvjM7F%G^?36?ZU8tc$h;9+Q7#7+%pgDf`%|8r;w84j)XXXC1G56brpp z(?7jx7L>1SPSfHu9t1;BhA+Wu3}2+ZPEb&xCqI?Zv@{SEejXN?%$*bFj)|uYzS!nT zkGk5a+V-6*bogXJXP?Un*D1FqnkL;6GJ38cmrt)4*TWF#g&*GrGf~`x5dBGaPO zOU3Q@MF-1zWKS{1>PH%EH@o7)*s*eMs_$^PzMK!Wq`vJ5`(yGbh^VpTQ)BiYn2J5dI7_qkPAE2XR`aEo~V`AEn z5i5zhH?{88cQV-XlO}M~e^yDfX%kHP5uz;QW|H) zH(dRVc9u5s)2q!ZsFP~N2o^7CEZmKTa~oqkFb~NRn_+V#80uVUB=OTail9JfeKz3^ zftf1C`eDi7w%y?aiyHy0uY??6@yd=K0G8uS(3>otctY>LsGIFzI#f7e^jID8mB9ZN zihSP)?#X8Dm&*`7z$H_mYIi$tCy1(`x!r_8<^wv18H@T5lF;>8%T)Il()v(M2!h=A zq&G)q9E`aAjrzp)J7AW2*XQ?Pro$Su(IaA@+UPOQyILw-ipaA@VU2$#Mg?8^Y4xAq z55!dKe>`WkQv+TpsYFEn;`4o-T*fqN3RZ79D`U11I=GQJ$fyFahk#xtQO_QfBFaN7 zm6Jw?t%N%-YBv#12z6pqkjquBi?GInn+J#<8k8-!_`vc^!SAIv~SmbhJ)2i*ouiYt)%9 z!)jsc#;$+FZbuEoV11rR>uc6WmOXaC>Fqs$Kmwc`6T^8_yk@!rLNyBzk*fsPj+Cntjzb8e^tOmqUD^tsx!Vwk0pQ z#2iGCvp^@KlGzAa)Zb3rb}+1jd_I4?=?}gpOm0d9!l&YaH077_500rcDZj-*KW}kW z29(#H?ZN8aEDZm5EWq12Vb1S2qP|90XQzOn(U!OD_I68X^;fjq z01&#mW5&%JYbZ8wSr#*OYfVP`nc1L+%Uo4+DWFo@ zFKYy55bz=Q?e>E!RS2C-U$5FfJ&%m5@8f}*OMzi$gUGpc9bY>51>%D94rdZY3E*iu z%;YT&XJWo1*bZz;+?dQf$KlQ4FyLUkY^jB9H65sUCCpi7j95B-8_p~@Al1KJs529< z;1OC}u3p=Kc27n(c#dgT`9NlnZ_nL#CcC-#=4HO@0vuuh7dncyFLU@=SWE!?WT$H> zx^9}Yt#aw8a03e9q+!8Ad`Rq{l$U&6QRK)QWjk`mrMM~;Iyd~_Y`tuu^=0(Ue0MQv zEvQrkq_(q5x^^iN$7a*7OvxD9IfkyAGdp7GW`~kKR@wUBEzmk0?99a(152ekV^Eq~8K4kO>o^d62a{rww%2~Bho--}S`v&6(D z(s-F2Av`|s92lInTG8W~Kg^j{j!8*2T-!lmk*$AN-{{Z4A%vY%S~92(-1v8!O; zQuLfdxA;CMzx)-)BjNY5E+Qu8lVIAcu9q{r(-d$8$1783A&^k7dBy%wv!>{p<@AF5 z2ASwQM69314YNt#1g7y*qH~;Px37%x`RZnN_DS+MB#TBf<+1LgCLUVO=emaeGbtwu zT5}O-F5HTsuMg5G>%XgIY{rrPajCcZtflv17PBefi~Ul24aq(8n7u&P6Ju}f*{hIy zaVFL3+HyV=A5I9xA-MJhfNdX4;hR{v!R}sKx67Fjdnn+q55l5pv^hlQdisAsXzxF> z_I??B27YFaTuKa3ygoDAl0xq&kqRM(Uan8Hs*wB#_xT5rdZV;oC=~a7m;Dz@{_605 z0Cf5X=K4PkZ2iCSj_o1mvdB9UZCe3%-q74y?7GK8<%KJgArZt*1A$DbxN|@iFJAz7 zjDgO?jv0;S(~9~7>VjXBU<2gWv(g<%+Rf6_qZ{sA6W|?(z-fXUZ}~C8n8A4WO%AuL zn#5&(%$C>8L04$Iai914aUFGK6?WUA>a5%Pqp@h1sk^z5jHl%60G@sA;B)>y)4(@v zeB~5+PWYjIIxzFOP9yoim;%~?dtEj61$TzFI(_$n`#~sQ+jdfth zS84-CX@|I!0zC0xJggiz>wE3SwEX+Uv>E{y6eU6mFLh{VT#qIad#?u(;!2|88y>hQ zZcC3&@-Ie{E{*L~yd6X}WfNaFvmqor-rOUYI;u+hH!|Qg-aQ~=4rGcZ@$ln@^Y)N$ z;FhF)WHT-7!5ogdRg}^B#B!(Yz58{;7GIwd^@qKRr_-vPKIInrR zvhEZ7G1b@zY1fLF-(!ODOr;X+m6cDxYFdVA`s?jn*73Nc;<~4(eP9)9L@WRu+oa|w zdAJ}Ta-V0YSHHIw5Cj|l)cFc3U_2B{){y%s2VGUQaP=+^48n?{c@yVP*>2^6may0-udmcWTaZnad$n_TFT$_0VNanhMitNk%*Ut_qb90oG(2bjl z@Ast*kxw7@4e#o<{`AeMF z&c*0wPg4$an#$f@c>?Y6ajk~Jrs!t}F_^a2;h#p|r#l1xLHjyzxpX4n80NkuN#PYc z?cHy$+m6+MD>Ta`57$o{=>olSCyGr!oSJgtbZ({=M>U@Zt$HsrrL6`2f$cmR;Z?p- z&O628VI+ueH{WEaFJd%o!?^0bu<+MMx*3Sq8fv5d2y}_iY}e?CT}Gn6?FtLmj{a=R z*0m!m=2qO~0VXEIndK@4_t-;*g>R!|AKNNv=#^+WS@XaESb0A!X-6UWj>p)Qa6dU+ zP~N()kj6IIS&ZzNiK@8eXRoVhoib91O3P!G0K1gW^X@<0$h@B@2SuNS?C5FItfiiO zsHd56-CPnd5B;OOVU4RaV_~3kA^oQ_Nu_PW%*2UoFx)U)!KI$&w#Bv)k5ejRB!1#TZ10jh5nD;5 zwe5;#EW&97tB1ZJ`&#NKQ4cA!VR|o#$G*-jBiHt zh}thNL&j@?w;Cm);*_LP#7236KlVMBm|6apqBdUtm4E_i9*!aX?r`eH{|Eq7zb7rYjm3Cm4$pz++QE- z1*YLUe0FziPtcU2i2n;{JgPyz?*Q08aXrqwM9eT@uT3hPmh>zyEj;H<)A&{1%pT|a zkqf~VX@p>ZMgF)nu`e`SV3f$kcJI|ywQ>`9wwx8D$h{Z7*0%}_1y3k;=RC$I8PJFv zZ=y}oAg)kT*p@WjHqpYt;Kzv_%MNL)Fn?l}^r(79PC!aGpHcMzWH3!iNb?ihPBZCD z9hO9`EHMv^@$S5SCO&l7R(B?EsN=Q%j#2dzWwi1p-u|KcNRhZP0X%m*f=xM+?IDF* zR7}wb1uBm0<45tb6OATv*qnMK8+rF@NZ{AH8rmS^@ciAn6jC*KqNFl;{) zuVLIakJFI5|NCx<+Y>&efi2#0*LP+K9_G>3_;DHA7Gog~ragIN)?~e;ql)km- zD{giFN-`^9O#ld(R8|%&{uUSWyjO68il?-UV6Leu5;_?34@y=E#3~S%9?%6}Qj-}I z1*Bt>A=CPTFM0Rbj++YB0Woh|PUC)*hze~vcWjr*UWcyU1e2mT-0{yOYKGN)w2;w& z;nL%=wXS}YDZ?I9w`mZDl&h8@T5|!UNQ;o15SzN6aXG+S>x|vRl!*+>L(&3}*!vz{ zZn9L6jNR|`%pH-#BSx09v%`PMnIBsyf3fo&pUy0;nQ;RyXOgaw);<@tU=^2`=7W*H zcwn|_yebl@E(e;7fZRw4=yU};i@uT#S)U!M-Zo9jjfN<=wl_^iF0jBl25KyR3+^&K zkP41O0@GxhSmGXxdI}lEhuqK~0$|l6p9^Qh7n0@=m;$_CXP(zV{rK?7ogf_-sj4c`!y_YLq)ijEfusuyIxP#Q9Xu>2x3)>u9zoWmn9EW zaj?HtUgm4Qm-vDLhSay0(4o<~?41p9AwmGpKVuEsx-x@yO^;-54^UvH?xPPiCKHg_ z5WIy>dY9ODxV}CFEB?vppA=|02i&sw`spz4fb&d7{7e?~qKNDrz0)NZ;$Vr_ZyK~~ z3l7L-9qrw^{Rd_~Tq_pnRI?~k^wh>zXHl@08!)Yu9`%CfV%2xK{cldp^*)QWJPOe@cya zj3FohzyY|?kHv}GOu;1wmWt^zl0N5z8BDWM%Bd>a`XPMGu~IA%gk!6toASqh2gDLt zMMVy3=6D}UFj1GN#+Lfs{cu?h2LJR5zLd*-hlO! z-oHlb&@J3p4Uw!oxD_=k_51qLBB+QtxcBnpGT)`+7`)}G$HLQLI)j*;&xJ#?3Z2s&gK`b=_TjS?#WMYa? zqnTqy^$=%MYJkdJA;adtVYn$PUv=?@s!=zimw? znOo)d%LK^sr|^ug+qf(t;{v-Y)rC6ZvCfq|7OBfuawUWJa_*-tV^(;QP*D2HhI6tU3Z4^>neNO z?G)sV1Gm+8swc_V{pY|62Bw^2>`s8ys0}k5bi)2}5RQE_34Z-nF!CMbrB6`}x|W54 zSzKS@U}YZn*_d(L!>i?7L3PU2J{Jk)~XH ztNwVp?vyu|b2t*0B|h(PqleKLJ-HW@_GqnWy{3w}E57`l<@)ybTqrrSk2Y#fU=?wK z2YXTC1xL*Gqve+`8bqawGFm|`zs1IOR!tbh#R=&juyKwPVj-u~-);lf*5_6Ghld?W zNY5L|;ChumRyogHJf|lF(w0e&E3~4w6An84YLI~zW9IpiHs8gxt8GKU(XV$1l#JS# zw_8qfHfgl±aX5B_vFbldv&<>Tc6t{_EEuGGvJPNg%6Jt&8(xGfIP|JNucJM<7`9guqV_3p@3D z0f-zoy!lNE}@;v9#fr1{>(j@-*MjUbX(mfn>r+)Lmc{t1A1-nl9%ZMKPCh zE0*mCP+xRb;hdP+uCVBZmTXV00*k-jY4JxGhxDH-RcN(+jOBLvNu*LV-jJ-xc%P>~CmZRT*xnwS za-BeU4|iIBxi{8+?o%I0NsaN6tn*P0#DQfnc>mIZ_WZ0dHX&8EsX-Gd{X3U=0yT@a z$+3rW^IP|s*cQTM+kHdywf#z&9}K7sEvCL@ViqD#i^I#gTM=4CQof2VS zjCysus|_?cf%+Qp!hB1$2`!PalfIbzLr(K45IpWWjCn-Ni$mY1PN%@jrR@9$ttcft zh@jjM4mZOMk(CQ@w-+*MpIynizx_=usDLKsM=YIi$%L5o;u9rfsY`gAzsfd(82&0p z+_yjj{6bciPr_yD<-PxSw+rkv1@h2w;H`C!`%@cp&p0DBbcURwQ)+kJk)%*Xbs@GoY*iV3dS_O?V| zE`Si{?qZQGe7Vwq)@@OpL@yg*<+NZ`x+2qsR-3;@J(;zCS>HDpN>SHmeVR{z_!Tm_ z#e-&*W=AR#P9$u%{jAd=b^5iM66|-FvyP*QmN&{4!|n}P49SMwJ;m3#FX&_A>7^9! zJhb{dH67+DNX9F7XhiJlo@2}W?Hv89_K~CRS{#dDIJN37CxX}a8HU~j6N5C92gh8E zxYssZ7S29Z32{!~ym{Yrm8Qd^AArL5;wP|NA`bYGZl1Jy%%Zr(4 z_KgC$67EgkcdEDw1F!}P`OkA@O*whgHYs;}a`uMf&$Iq6E8P(cX=;SL$KM3TyDlCu z{NOaH5^AzyTMn#Rh^MN7a*l{qrB<(l_nU_{%{#( zO4Wi#^%8d#;%lmBh3X{YK4ukmbh?exztT7=%k-9~FE&NB-3*i^NRy8ehekO@SJ&5B z*2DtlE_~)6PCpFA)V#=~-HgXto%6D&KMm*NmM!TdG*(kKf($>D6Qs~&JPxm2H&yea z6}Ov1jGZ>*dYFW+=NXG@`3+|B0FcP%y9eol^NahQ4@a9f(maB3_}Q`pTlBy#KGlBi z%lu3?@A01N_@2m4T)&?z8+L_49_`2}yAl>ffy-OC=oOPNwQQ;u!G2Rpt z94}P$!u9ivEh}QL@9w_)0Qg%&{B&b_5+nd|6}L;x+oYf)2+X`}i0jp8rmlB?_nk^plX)FGoKI_w*&SGl8{m?t%*+J_2@y5^j^CAJYm7AJ?+%ovCuY zl~nyVc&`WE&C?%y*k3oGzzK*Py41E)n;H^&JY0(kp{;|pCiWRo z43+q^V3w$V#;c@l!`P6c=~PWUGQVtKJ;Z?QD;E-eolLHJRNp1Ww!33Al>&tJuW zbKe0q?YF+j>&2vOlkV`{3rqJhg085$Z!H|s3DT|@XH$w2MxE#?r;H3%RFzP~{6t_W z3E=087}+$si#&h;%tU@|W&rL`-xpecM4OgXu*XC5b3)UF?E|jih=KF=#umN>0xq$` zG`=Dg-<;l{mHkyG)ko)lWhvDZ*T!AxJxJK?lj&2oYEA{{d|N7`rW9Tc@)s++>rvIe z7>2yM@bA4Ky6vHxs+-2_df&5$qvI^ePi{Rj>W1Bi z$m4eWOzHD<)E2prpgLyn!v+_yjMSMQh(8v1ZT$P>3 zw3gTzn0T5oyXrP^9J~4X=Reu}t$kK@Km(CdVHx)zz&LfirJk?bm0+Pw+<;C?FyF$f!W_2W`nydxwdiOWVqk0(bcYx2ijf1%EUlZ0j z&e6igNRtv;(T%bTMg10BmioNi`;B)_F}!kd1IHw0i!Nnd?`but^IQ>~w;!ol&Y_FibE})seJ-DRw&j| ztDW&-aLl}DJ*0fRsZaldTxAN`2>Fvnakv{`L0s0M4tNhtJyhn#3<&$Jm>^QArQK`C z+&vo;nDdU6yyH9mty>_~3Slv+(S!}AYHL}n?@EBYpk+F#l8*TE)DS{o&bAu~bHGpy zy#?H@9|3=0{KU^z=PTV9GgjT7580=V%)1gqO3gALoVu~r+zNX8 zQOZ9VuonSk!lj#?%PLc)9M5ym4{dN3-B&H zE5&FQ%T=4bez?)rk6iuoDB6@9f(bgw@h=ScDW#>F|H}I9djyiTZ`zwa%JBtgP6z9g z{b~dgQ4mPCJ9|vMM?*jVgK7nh7X8f5Kl374>a@d&Nm%YFph+yC;3P=<@xRmIUI1nM zNThc>BRPu;PW@dpd3?dYA>yMk7w_$#=1d=KHwSkVx>YVjJZC7CU;kIm-}80TSr|H^@UUeTd`_Qq8VR(2g#FFThsQ(>VA4rL*a2p&!uY) zKCA*CIMhj!$FATM_G-7WdCLk^8AR4MpO;cwvY1^Mr9J{92ogKa3hh=ztv(?=S*hZ; zPgvO*Hbn&=4T;-c{wVX1gXGXB97k~Lip&Se(F))U6^>|hVY>@0atZ}9>jBy*=l;+= z5}W=t3C^F@6P+L9s!(o`3TXgYCp#purt5WZw-`hebvOhk1jsMCT8z2~f=!pe*Y3sX zs`9ahdirq4`SbdO=Y3V<+qaQv4lpJjG=E0r<4t^{%hdE;gUKLH??F+CdW4J~EBm2d z;4Y6p+F2&*Hq?-7_s}j9atUd6H2ufAB;gcl@_&n2xi6Rl<(s=2k*K*}rT*_&0Mm^U zqCBUcr;U80k+N7~cMIQWy|uz;1kyJ?;cLhGqJKHaaU#8!G`St4xjIK0ypm+gnGfuUuS1EDT(yB{18*f` zCQ^oA_V9D@L9(un`zS0}DHX|4rz=utdu(cL+^Vj5zi_%fXgrZ!=v0zVfu0i~XQWaV z*EwW@j(!mapnONXnY|}n`12|>$ z7;XiqP5>DNh*dV+Q*);nJ1R`nqdTAa0J9AD9jQa@~4+GmT3o;7+`o^`Jfs#xwMdJ%er zKT^+in**d@FXf?oJq&@XKnM|H0FTTk-CMqI1qUJ~CY0ow^BA4nEi~jI^HA=Zv&=1@ zPW7wTdYRKewb^93-(CTc*^Pn^NS>kuqBn1H%Bge91WIWd5~D9`l(F#a^AW%C{z9@Y zJpTCJboL=>zJPz^7b@>+ZLIv?xj2MN$o~gi|Nn5qEQHR6dd|krh?c6j|0ZQR3?iLUzBPmR^*tgz zD8z-GxydYhj=~3ixt!52Wo4arF|ps=j!qerk~toIS#bF8w3aLge|dBw%0Pdea1O@DNECF} zUi?q4`l4l|tk>-S8NN!%d`kav^q+T2jmo;)x-$?5a74{XI+RYB?JLuL=63ib#FPUG z1U|5XjKo2O7=>!x)|6`<(W*joEx-6=`C@V~mF=1NscCLBBkXuRtQ57Oq*m7Dbb6*vb3-?<17LuV-$x{=QI_(&jnY6FOt zF~K*oXmEB|awi-eGv2&G(ErN_;tth#4X@84!legZ8ygJs^Jtw@E5Lm14L`Tx$NsaC zy(3qSSq)zP?KopFk8sc9bo+JJD-?kb2RMcCv+to4^Mv^n4kkKta88wj?K=!nD57xW zss;es8id|_g~@&elh-cR_rsi(=hE}H1qH+(0YFS7oPhBe57=X2RpoO}&^1eLkxYfK zysiG&?+IW#D-hXGxe)Ot&*0Z{Wm?`M7Xq<=>y)?{3NSy%>2rpB#U zFvhH(H85slxqIv9>y@pY%=LAZsO@KBtPhwqT(IT#2ubpbiSIq8KS1Qj1SUN}$ci1~ z{9DHRBH?&f|J6yFK+?(~5icOX*sngZI=MG}Z8|lpe^TqKr29nxRuCv9zguh;)@=GC zqz)&3U(7OJI>#c$;*79!wCg-oq_`<1iP0$Aqa(qaG8or8c0-S23jPhP;XhKdM>9WuAPZP5 z1Czf>5bV0mHy{c_eJRrHx0>Q?T?j?a1BjiIUDa-M4+@_k4o23GSY3KH7qXGaa=&V)oDwK`FPwt)nj{f_gI%ZjFZZuY(`jBF~7iD3)}Of7{S^ajAEg8^_oi! zHi&oHQ3f5rVgHNrrw}CT_QYRa>K|q=Ho6#y1a?QIW-G)vnJnf z#nQFV-_Stv#RXZ<*O?H=+$bhv$R zhPm!YC~iQ1a5-&3ap|=mt$}g7Ur3725Glui+;Ao)soi@B3_3s6nMq&opK({So4V|X z66w8N6vSl4XMGkWtE8G<2CBrsWN}%9&+z(l*;Y#07|D#GgX@mebdkQ$>toI!)PBa2 zy$wsfBpNhl=HOXB-4v)E!fPJtbG08Za-D3A5H3btIl$^P2&P_{^_ zQSy>vG>sNAeRkwezoL8MuhoGf62<#{p7ML;=o7X7myFJ{?SKy|$V>bqi`g#>Z^3|1 zDB($a^NGw$qr<)6Z3O!L$)ErB99w<1o%ivzvWck@S^joucf7U@=F`cvgJb(wpUAmD zp)0=fnKp?-yKT!q4X65HA;f#N4QuNY#YVkLA{YSa2jOY-l;dSzm=5e*VaW|yd+zzr7)RCQn3bc==J$t zA0JP=6ADv}523rNX^|8V)aTQPm<%}pZm#_b>6~ja@73mh3TAL#vY$?P1~N%+C#COD zc!>HK?HfO~f6Jh8>s(9c{y_hByq(DFoE9G+;Ap#SGo++Q>P(<#3t@sIewUklW9p_C%YzwZdO zoFOem`q!igvsxj~{imJiH!HODf7%h{et&2CPkSV}Trl=OU)$GuCX)-r{nxNk$^Rb> z+sMK5jYC*l{9pnY{2y5+qUO!4x|e#o(S$kAWK5;~qy@BHu&$;eE4cHEYJ(0_tE(5tX^bA84! zhxZ9Oirw_AB^#-P!WtXfd>>4jVJ@0Y@4#ii8 z;G6XJ={ed>jVmuVZsTCK#J=mKUCS<>j<4RA=h;?ogk&I|C}_OL8R%Yg(viCQ{W7w1 zcx$&=T>6SYPh}BG@JU8&sObp`h_mIzdRj7x?d-}nd-Hzv;9GqluX$k-Or~<+*U2Rq zyM48H=kGrseK5D!3ZHo}yIuh2wfv^bj{N>YtgpEcpHB9{xpCIc7QWFvH65Or9aGG3 z`J;twWhEJK8!$8;DbBQ?>_c+lRc66o-4Vgkns-Y*LQw*;vU%+FjoJO%oN6PyAJ%(j z7VOBusb1M##+KYU-$~PS>6e)LkDyO_Pmwh&5GXtTRV-Fv9)^e;W(5uRGMNy->QuYe z%XRhumiYwGljXE^?wjGU3L|HcQ?|6kZF85dYN1(j|e`cml zh}j&TGkbyKfF-baC?g{PosAY-2_#QnSU+4}IoW zFQyJMf8Dt@yq+Y?F&wA zjFj!l-zd53A*r$1oddosj6RX0*_Bq=U5-l4hLoCwoC3+qk?s=nI>zBZc7_bOiUpK+ zy&oC%c{YMjHRCrWe9QfInj_IQNiCW?f+hY$v|0NK6jg2=qFjh8_sGyCb;Iqh;2fJJ zUQXf$xF(xB*N+I!wIIc@9}q)1C{I3~VvaaXw%iPt{1WsINKe^#dLjlEUXDD@$sSi9 zqY3btvstoUkFSiukXu}Sn}MF-?cCbx9iMz(4Sh*cG;U9tWhHW zAhbFWT{!mhGJE%2d$2Rr8wcHqO{1LgVYa=&)yFl5W0bCwda}i6aX0Ct42cDJS zM*0`M_!du<%)qlBzaR4&FoxBjKY^50=5~hGZx=W|86iM-D#A3;+eJBGA@Iy`X%-Y{3=2yRezJDKdHTSPUZNy_VZU@;=pxer5y!LIrCZqtG zZ(}nibC^GAu87*bA3MvPTK@3c*)$$L;fd50$f0K!)N>1X#yN-=5Y5%f!CiL#Ig543 zgoVEwmlkrQ{4TAaj|^?mhNSv@GPiG7ZSBM9mj&Rvemud8q;g$(*B9B%x7tyb`RSzIeYmP~@=i?Nl9acjTRj=R-2>2AV^<^M{mi>)|Oy`yH?M5bl6drB!5zJALHCrxo zNnVzh`}vxGr}O{?*)wyp2T#~to=)~VEoJ}9JmtSh;!g8v2G z1*)}o7!izkRUIMmmMG(CoqBo5t-9Oj>GBW1giVX$@j_nsWi)ug3wdZKS+8%ZCyu@x zJ0x;3L(Qg6kfQQ-#}LGoMAXLn-6umH-DvX2Q)Cg{Dzg{Ee~cR-3g@z$o!7qM%Fd! zYoFeeqxtvgl4zr4Q80~$-P(Z~md*`tx&v2RR2ud(qHVBdEMKS$rj+S*Iy#HSs}(U&<8c`~NXg6Z9W4Dzr=_td;@!UTw2$Q0Xp0vXM^*F9 z&A;pAwA6|EESZFQwo+B;MCd-(;D&ZAmrp*<(9Cctt2*D7+;xDBt9Om zEJ06Q6wN%BnwF!KTmV$9Oha;+_Yu&%P4p4%c)x4D@!dc3RC`v4RsO%4d&{6Wo8Vs* zmk=OGfgL1zw%y>KyizXX+Ni_)^V|cWosw$oNHMV)#GP7G8p*QVaMv%wK)9UrP#W@Z6LT@f&4H z)0JPHZO1ngsLMw?9E96>Z6X1+#OKFfZ%4UUU0e!I3mzlG0lWgd%{w{9yxB3TQUkns z8{7sh9=rghxfaUqk&jc_^U3H5iX-`NwNAgVUuNFIU34thRm$>?R|h1I&o%2C8#6eh zxP9p9gubT999yk%vAPQvw9uIp{+=a;F;lCUsJeo@<^Q7zsouewO3U;BDA zuUttbQ?)pVeyE7jv@2+3Q$>Hc=zVr{exim=<-Efq<5L9HDxXP7J5W}h`0bl57SI$Y zu69>>kDpYM6hnRCIW7=g%BQUUx@=^%t3%s()$tGUR~|CO*CMNz7niSh2dbmbX{9XZ znK<`l#7ATWKeL|9`)T%c7w*T!=2r6tN=W&CdhtH?nkK$)?o@QvTM@rMS@(ii$>|iK z-yhxU&qDV&hMRU}zP=0*)lUO_lsAhF->CtWwMQ5~6{<;nigD%C_zLzI#j4en!^Fl& z3-+odCBhWP57Mx~W(ATXec}nb4Y7EhjQN_kWQM+vE_cZG!pz*uI1Juqz&9(7Kx2nSpfu_6k#Eyf*mbgPypiKuZsy*`sxCaXW(U7d^;m zKf}fECj7^OYss&BqmW&NUu`NPI5T6r!lbs`Qjjq z?z>|z$Icd!XkD7_P9j{>gDXZ?R-I%{F7Utg1-No5IW2@%_kpV>#QmvFzD2KRjqpz> zVw#)!WR-C=Gnsz3=rkIDx6w<`-^%)fy6yZjpMp4P47+2w>Q2!;U0%h8Koq$uakbi$ zoom^R$u1wclFiRkwWW@8@ThhHHVbqpcWWWP?rO4RxL^!w-S#HWFUacj&`GHsvOfK# zx$JhuZpF14Vp{m9w&1wh5eQXBLRWbra3Kkr9_VU!q~!7(c*+pP zb6$nJy{%YT)#USRJjpW~a?8534p|t8zPo?cSp7jf-(YPv)KGqqoo45|M@@Lwbce6s z4uGy)Z)LO4oY`_&@M~*sun{>ccq!ZXwQvMZfSd4fL4!XfWmw&r093sU(KdRl>#smv z?%l=N%I>nsjduQ8@`ouM+Yr#J_fa0uGF?6s&s)$Gb#5K2Vj}_el`G|bec;U2lZVSc zcfs1@wez-*KF8|$a4ewrxvSePUh;TTbg9uMXt%&<2PwNZ-#WzgyzKl_u|mL;OTBf> zz1=Td)Q2arQR_vF&S(6*d$5<%Mn@dE;OM)-){3w?CCWdXO?tL3^hLm3f%g^AyhBD` z>}BMA11`BUHe1rAE1u~rl41Wj^?elAedmtLVw(;B?48Lr8Hkyc^f6=O-Zd+`HMH$s zhy2gP8_y2jc;S*G6g2j7`dk{UX(qza;S?`vvW5_=3 zs{y%m>631r7QU708KCC&ANf1iZz*)>j$9`2G3Qk2Dv=VFU6WAe2elMjEoqbq4JtI; zlUaO-Yj$SV;T0dZ?%bR$M^|k6$gP0+uKBD_Hkxwpx-QOhql#^AzjV%9E~W7ROrRgL&_K>pb=s73)M-fNmnn#C)J3F&7vFL6Nm$6_^a{J;J>#o)9 ziVl=v(+d#}hOJH|irD7&lU$=Cpi{)%yxHuHJ1Hp(i=#{{fSAs9Hpb2GsQ??i63zWb z8oT>))We4JEr?b+`|3c+$2$@udybXjC>j8ERKo$B$t0$>PpiOJ4cJlH5e=YoJw2GG zwN%DP^6b{t>hyuWZymyYEJFpg%_bhnq$>|^onDL6tFtDloU)Pq5o+E2x!Zhn(}uHp z?;6#LlT5fog!bf&FCJ_FJ1-X`x;~S>x*QqC)7m4>norJr@}jmgBDLY3wV=#(V{gYX z)Tl&!jq$>C_OX)fx^5CZ_euSn#s<}Ohh5fK8%eS(zmCFvAy3__^bbVXYK`Iyb5D>5 zYZ4fn^PDyD++t(ph@!h^ZJl8+q-x{tK$-FCk5_MX&{XM}b#hdL!fJeJP(@xQs}Z-M zVYAEymFsN|vZXkOa-?C8{b%p2Lb-IjHt!4E!chs5W>4nU!c&8QrQCV8@`}a24#iFD zlQ$KRd^N54O~*_5~pKmOBINRI_LkA62HoXW?otts~xk{5&ux{7cP0Vh{_suQi>b4>;3>}yOuBt67Q(M-uy+11#YrMtx3>i&VT+RCX z5!C(m8hT2-^H833?N`lpJ}mES6^H0w`l^cENpQzMG+5=c*HX$nj?Y%zv#$ixjj_vZ z<3l3m<3*C0Lzcz_fN+lDn5$PXEOgt`3&UXsNe=f8&)$wt?-mq|>EOu=oL5GM(ULa3|&M>G3SyerYR0Ync*`?5`= z1RpYAUGrtNtmtTRuIcz=mk*l7lIWowj%i|*}PR2LocA|3JgN^5J6)!>P z&ZR}(+69bzot9q&tabGkV??=L?Vvk{7hqd#)z%kM1y<_(>*gGP%==6gJ1>7Rb-lQ1 zE&2Sam$oPH5P!}n+OJu=9V~xD^uJ;OewDEy>@FKHu)LIC+_r2Rw@I$Qmhi1S9d|IA z>fqs4(GL-0*wLmp1|VOnPIRA_vRBMsu51A2lp7J}mpYV#5Ue$XRvyb%?PX4^b0qte zy?GbvtlgT=I@S(d8FWfrc_FPLn zOkIH2f|{@FD!f}?V+GvaEeSlMM7*BM*abRNv*AY8+v8JqxzU=L0_ilqnRu7xM^PIsF(O%$g<5v;eRn}}KR-ZEcg9G4! zXetyQ_PmI6BG%D2k~F$h#gt+Cq>U$>bi~xgAALDp9^FW+=XXm8I_t%<7ANzV2Oq6K zOi6gS*nMUAqws?3*MMhkn;^L6dccrYBk^={ZuL}O*qlDpo?**d2HRYORCTl(4w)ef z-g}f#o%uDJG(i)h5JAGLeKNG<+3wdF&&1Px=e85Ryj^rtW{7z0(fFah%SV({Rue+9 z$IvTl8d}nuoal4`iOWEC?@vYP6<5%sT{f=}!I^clOpXi=E`33~R~k0^m5sI55G%F> zb#=ZgHd6*4*1Ji+3UTufn=2I+tKpvW={IH9F?R{8a6Y}^vIfM{)rP?M;6QojMqrs4 zh~78I<+FDaGdv?`Iqjf#s>W2{uTJ-+`N!N9m>qcVy)fcX>>FHxcgT*|KD3m}SYl{d zPdM)UtiH9nB9-FL#gtwqv3d#)ta%Gj-;k@pySQQKINQJ zt>b)Of$^9d|mf*YU}w}I(AZ|%&M)Bm$qIYacW zf=c7^{iQXo)sc95_)%|!%pCNsqhgPB>y*2W_}Y#~A|+z@^%hJj4@o)WM~U_7RB^}J zmqn!w3+&j&*>~=kTt^w;d~Kd`Ur1!Km*BK=qvEEnMo4gAtcJI=(;81W=dZlBz zX~XN6vthd2V1ZoJ?rq6I2;S`en0%)NQ7AQRwc-#uR>QNrCP3~g&Se)HQD`u;lc(Me zR1~YWAE;o>&#bG=_5c-rg%rCBTAwbL8-WeyZ1=vvw$Szqd-k5@)5m^6K#qyT?VDrcA4ZxN(t(s)#Pmui#9x7bka(7W!ORY9e1X!} zWY+U6Pcd2Jmhf~uhngtgo9@mcwzni9AO~lBm(-qMW&T{6s$M$N-oJcgoYa5(3<*EF z&qQKJBUC6>LwnR#P}_(%Mr|9}V?U`G`5AvSwwk`mAf?)1cUsTI?b*{%i+4P}NZ$|( zl_!%Dg_}cR*1tP5z`gI41O5q|=cfzn;6OrbKLQ-n>*`eu^dN9sc(J5BLmzPk_le0~ zP*t`ZFtJq~5nAJ(X#Q%^GGFu+GW%1(=yoIA=Zv95=l6b3`Sb8q{59C?k$C=Hmo05k z7+#5sH8Ug~CIzG2;Rn)fsHBtcRwrt~k-CC6mr|+HE^cxq)==H0cWxohtWnT*bR zKYXNFxzxRqW*sZ4Zd6*>^B7eFdORt48lZxF z`<#zUO6gqg8?^ivJ!hXMcKfgpALdF?QD~%|3NU?Qi&=3LHBz8}t;rN-|fq(_|+bbQ%&4%7{eF8ds*@DTO4c3kQ z1s`!Z!V2UsY)=xuvNr`cAm45%-VL~eW!)0*LN*B*KGHK0GzFOFZY0@QTMZKD22^i8 z@&p7F)!yj6`ZwgAOD9iWkZf0&9ujcGRbgVby9wX{3xO3eY2w`O8Tm>jsB>!rETjdJ zNik|6drBybDcM{Biij94Ek%go-NC!pJ{SyAo{A3L4)L! z_5~*TBpG1ak2=DPR4n!*{>!v@bw+Pq*==A7tZiW=Bu&~Q%Gfh3@ImEWfKQhm&|ioT ztTnl*KZCMG*=+BRjXD4BP;q-(-^i_mDm4h~-roDl{!SniZbEK7JKC1P9mI-e6Zbw# z>_hO^@1j{{w|7X@h=@@6c8?&_U1FA{kvh)&p6aiw^svaj(-4~+7nYumlQU(;vUeF6 zV=TBpuV`r5$e|S9h`Dki!4y1X9&}7YSljq!CMfcmSc{3HE48CN?DF45^)MUY0zm)BwVe4r$NIq_q|M zbo2MEpC85n#6M(JH&zdI z{9%3!ZRZ+afDULx)^TmlYrT~E#vV}1t+TOy>EK!&scX9GaLF>YzKRe~>)dmHeN=uv z@iG9z*=dY}@d05UqTz>!-j4Xs9){NSxyG^WnEs`;yIp3QtjC6oT4g21HQ#1$TY8n* zB~G&?szP-F)d0v$^9g2bg4d1I^B461!tCsDOL77#ds)=qNVwk$fc_*veEiJjt*a^{ zP8fhY^vwG9ko$?z(QsKtVVR^7fqbr$n=abp2A?m|gLlKBY~_;l;fVF24WP!C-h-EL zS#_;z}$?9zJzEHFWl5=cN(+2?&u=Pcxu3$Ev3B9Eq8r^U&hbC`_4; zzQp6dq03XNvElP?=a~YP?cF+kz7|vaNQOxOaUkkgUWH;j#4S2jmVou=)L#cU*iRk% zxJtDmdLz#J6w;&qY(c??cbk0sYG5UzjgC+3(NM3A1;gm5c?zO5v?aL~dz$({w>=Nm zM@}SOtz!a#v%!WepiOIP)qKjVoS;|QV0^&~iq`OXHScgkMeZI0tpxbWn*S0O(ULd) zTz!)0?A>`9vM~`byjp&}%$+ddf-k+Hpmw`Z!?pS?wj=W*ZvE13Zg-kGv3?M!Ly%+} zz|wIVGCE{dGd`%cUna225uV!90>UUJCU7oK2Rs?>yO4PvUZ&v;#}~a^eAIMs{l3kO zy8L!b;l)E^&E}-tU*Wp!nZi(-Qn2pkhG?)45Vq+;to=|d;o5ncUw|hY&erM9vP!!_ zMWR|rI8F56Y|gA9j1*K`7>Yk?UPKQeD38M$L@%{r>gbYvu0I;Th~7@yTY+3?LYRX3 ztRqN!9nlHjxR^Rpg}0O}hp{oc_fzX5>(j62{`3tCXO)u6{TL|Hk4$Y({tRSLsZ>oCo2dEhDl$2zp1Y;*Z$(C zN)9=(FPsboi;92VogdRdWD$ovsDnUyGAk*!V>DQGlP4(J@l9k7mtdwbX|FyuRU|a;GX*6fAKw_sTx#lk=P{?_+VJt;%`KM6wsTPgSIl>!i^s`Sr!{#zcG$ zSWlEspl?mG#Mxk|&KG@ioP9TB)#eGAB%^PGD5Rn!+GbY<*S4@HY(s3%)Eqzd}Z71A+c``8e@hlX5RXG5MJ)vqE5;TQAejq2#LK<{4&BhO&Upt6cO@{%rPMt9`BJ79Y2u zCQ&g&=eVGXwb;EHTDh~4TDp0at1NbM^ix75AAY6Mgq)QM^q%{zB7$!X84h(yeV1a2 zO^3xsFh`P(`2r%HP}&;0Glfg7##5kxLPs0T*RSPDO)`qQ>Y%3Y9{Drd<$?`V1rF?G zUrZBih9s5Q7Jp3&QO#KX%&Cd)F9H5OC;F%;UXtU)l&2Vs+IV9+qt=hk)n2& zIVK%1aoRSF(U;PSuw!RUHa_|e1MPX0PGzm67>_3t#Mf#MPh~1?Zyw@cZp8^w|?D4@hq9t-?!bE&r}Hj&^LXzo@7h`jC@$;YW+=c=jpS%BU_woAc{< zR46GGO4YSWnva~mx;}z%%YDzD}M=&x*PWz5?|KJ7(F8q z2f?o{XB~Hk>hU}6e~vW$MESxA!HD}#7iFA^JT}) zCnrCNm-np<3bAQMEkONa?Lp`!rt>ciT$vLsNC;qf#2YIUBZD`6V;8-87lb$7qt+fD zKY}xSGez8s$w2q zJYkpBeLKCy%CmC3wWKhEkgF;LiR0LNeg|Z>T^#D2g&Y%& ze$|3|r9vWg&cf$d<=Va0z@-jRm0sj_+d`UqCW&#R?@>7=w6Z6fMi7M0#1VCU7m75~ z!-^ybWOURf)c1^{IV~dI4G`$VHQTxtnxNxY)qrUDTm{2cUw+VY8YrhFm~lRc`|O2! z*2i>xRw2qg7#2g8r(>Sa$%dQTMtu6X$Jo=!Hk=Q!G04Ef^hs9}3^I3C%z&dK`J$h(kAT^M#)o^_b9S0_E2nU=k3p29ul9<5bm?uD zF;e2P&nhzShpKMY=VWRTEXFY}s*HLNQ5wHJmVG^*ss}d1;U|eT?BNyqhJ+=*gkk6X z3YefGgRnQ2mY3Ssi}`~YPnpleU5}V?Xrd?`WPn50eP;J*mrt*~smn#NOO5+z&D75s1{E1vqje8WvwX9SNa>4 z*eN?re|#vA_i}RH%RjqYOyy$veA7RQD|S;fcyV>#yWSlGw)yeC3nx>+_~B%HprQSrm1;BlD4 zC4l{QEcMB&g{7_3k;Qlf%#s9BeSHd%fcC^w^4Z~uGJd(>BZAsTdhkOe8pHhvOU5TT zqB>1l0Y6U)l!y;F>yy*aOi{5m{nqA_8nKPebp*}C%^pqP4p_C0ny4ekS5;6g7*IJ~ zQT5AqhH3%?wdue4Tz*CQI0^7(K;bVpPR_I;?qVV&h`<}HgKs1}X*d`9WB>g2yVJZz zPYY|?XPw!U8oSWwC6R>G$GAAZ<%A213x6OpVcG9cxVx=G*z2@TDwN(iiJ!iO_+TL@ zf%MtJz}Yxy0Jd!TwX}&!3sZkPc;p2ODyt`Y00aypMHg~*EAQdU&(rs|x{1GFw6TNn z-+zuQp?i4r1fPIo}L^-?e1vZrVFU~X^=iuf+sM%JEf} z*|G&z`0K2HTJCT|^(Q7nF~lF9ao%1)!~d8RfyH5k`E@&A^{_!zUg3pSTFR33g^P(M zueDYhCMIO3>+3$PYnlbwt!^^5OtWSzZgq~Yhz=xu6}w9yzK8xuB08U3(qgJHy;CI} z=?%tT;~2j&DS`8-A5Hhk#Q%TV5@;&tmX@?M87psH1zAXsOlO8CcqFw1VV2#OF_C>m ztP}~)gdi>w{QikwAOBIyQR-^uZ0(w^3px)plZ5ytg0gLCG)xSmwc@eGu1^aEBz5V{ zhiO-r`MYo2QCXz1;fHKelo!|W0VgFbaNY!UV;T7EVdQVt*$1Z9=wU>gZ$-vnH;0W=3&rAGH!4UHjgnEe#+e zZ)hIr&e7;1YzhWEu8Z~D7AJ&DiHe`h|+wqzop*gtf@(KT7*6im_=t$YTCBXm(H3e#$ z6b;$Zt81wvCRyq~$2m!){O@vOl3`g)2?@QZ4^eHrbA%To97ClO=75 zJ}d{=e#f?Dv41i955DbFPOHx!&A2bkv1C)Tf|-@a6^ITPYyD5L9lf-)uf6sHMX|Gv z?@Ju^!IPu%uD(mgypKD(A^SJu#dy!giDLjb@CFY7OtiY~zRu~k~l zY7l&@=68M>=eERGkOLh36bf@Meq(wT0al7V)_eOPj?DTC&*s!6FL25-2=+ZPP0}?T zl&L#AJ@#QL{N16a^p|tr+M5iB{*Me@Ixo*jALrn{pBli&>3(DKai`-{JzjECN%uXj z#Pt|&8k6EL!-?mYu(dhDZrL=QWcF!j0yq2c^)RKIA}`Dg*fhWV70?r+R3{2{VhB&P zjgIV6>UXa*oY2WC^R}wD?-G1V8o6;0o$^BeBNu<$mo#)<9a6+2J6fiR^r!andQzjecK5)Hfj zIY`mrN+6eN5c7?{Gjv8tJl+New;~ue(s@R8iF+~ zpFF08B%92fMCVr5ifdbgk7)$?vozh9)h?0!YR zO#n(CN0=^;?udTJB-%BAt!xw+U*;$COi2pJqRe8XyX#pYpx&oRaO#Gat4%&i0v+fff|d#H zeBFZ@JE^w?OM?0D<}QyhX*27wITFP-wXw;ZX!b&C!x5Il8+e^)@bmg-MIjU8uY?F~ z&lxU-zSUXVjy@>Ry6?n%I6`0cKGA!d5LJVnXj{RB&&p_XifZC!DUj#i3d2DBB0z&8!uY;k zoi%i*?d5w+Btpy0=rn<<n8xT>kuOD5t=C zt&00P{F(RUaNlB3?;f=QkFvgy+Qru;X#DO@xP4#vW%K67wchwm5Z2^B}WDTAE~>W4>P)Bk6;j!=Gnk*|uF}!9JLT zJ(XW0kJQQ+u%3Jm>1eQeeI+0d97nhhkbGDN5&HCd<_ zIp`gAX{%id3qYYwJ6`|Q0B9L0TI#I#d&Rr4@@!?joCLW)AY<22+^HA&nrGraz#^_- zA9q^u`l9~ji52&aC_6=V>Ec+Bd`H)^f2d8c$2EO))O3V`u&5}j9szgxDL!jL3_-09 z2vva283JVR&P&V6jmGs_3{!rq;$!;AMxG-Xu6~~eZ=FMYY0F)GN=b{nmRoHE1r}tX zKYDVYc^Mc1^Da-`;XNK_?kYI3P0eGNc}@uOS`^VCe1h2PJXWTa96@*dAAqj1wpUk@BV+9Ba<15e#W*c4JUx$!R6+#-$=LXk zGksqnFKx?>ELGIGaMv$sBXxQ<`8S7>hoh~$$pN}!pOhT{zI^O4^=Zh}R#G$)+#4o4 zey#Sk{-dzTjE|Y`j9cRrZ`bQ&$+x`2)DfM31Ph$)+Cfbv=4=9pT6jt$O_8Gy<9NW+ z24=DL;9EyLv-6c-by5ZLy{upOM!Z&0&wpf~BJxyY3^<0K;xUp4)LQOeOe4V{T1d;GR^Z zJepmWO#ZYyog38i(5YBg!I~jMOV~)WUI>khJhQU^!E)Dda{^X!K)c8L91up3Q5Oii zvTah-J**`49$lZfsVo)h%t<+)obVJ728s0by4u!K%vT73g;K=pi618metvr|MkLRuN22KP#sTu~U``!KH+86IoXPUYm*0<%i+ zq#kbkuw)PrZIEK)wZ^J(?XK!}aU~ zkjV!_Pk-FU{CsAuAJGS-`mGoVRg_bN-Aa39|85>(X{ zfUMokV)!}NBi~q4r3To_+mQ|r^Q;6VgwJEV@7qk$5rUXQ4D;4f80@oiPZp1hcS}SA zp(EDfJouEuWNO^a6KB@Bqpj5=Al}6BIjzl5E10S6Hh4G*BB`lBR@#j{yDp-@i>c_V zBqdo#x}34|-7Y$sZYDk9b%m9RW)TK>E1f00OU<@F9X1;QQ`~W0KWlH_*){FOeO*?R z;-`vsn89x&9&DwFW251TOJ4Yo9R`mg4YY^0RC_(f{l13R@TJN7wu%(m1zvk;>hD0o zj**$E+6csG3JL{usvtvk^+e+|Fbz$q7RB(faz#690Z$o`mQt7Ca>7xGahf3R(s5Xd z5h_w8VE5KfXRbMAz6T}wviHr6Wtd@wHPe@)<@L1o_En)8S<+Z5v|*UhFR`dJu`&_^ zemM|{C2)37qNJaLBslk&V5pluBYma7Wf+Q$g&_XS_zi9cP8{ybpd>^b^*1(uxT3^- zhTl0is7ajdf6koPnOnItkw3oZd25+HJ#9TF&wFoO_}6*YYm1cm@6WUPUx5I(o9EsK z8(sWGt&ITZ2oWoHy^QT=o*g~Axo&zNmPkBj-z{;qb}pK8Z*Zese2J!Dj69?_xt3im zC%}Fy0Yt?9N2sj*cVloI8;WkX$mc?P7L>$Elb`p_q%*N%o`c361$ zUa+U*b6CF)W9V|9iN4KF)jrjVPcE3yJU7$(p5LB5M-wS$DJ5zdvRI8CoA|bdzWXR8 zrKTv&*H+&X;P>NcZh$3c_-|@k^bV=V&hH=SyuMSyg8`7bg3BdSVc#*HN62@2uK~{M zKCqYF0yyvWw>V8Vw&~ZHOg;z)skf6aTc(!W*Xia?Q6(kJ^S*d`S?>GkU* zPp_`P<%cpH&&%^pm#B$)WZVdzTbh#2Mjc>}+H3?n-MA3M7rTo!NaBYgZ6qJx`rrc7 zJ;sZsDE?(5eK>5D3hHgZ?(P#BCI^yQ&F{b>x-s|HdHaiUImjnYT3JE!6zdtM{25N?~@-~EEZu5EQpR|gmou8MH^}>J^jHOZp6_omW*kP`^c>VRrG5XWmq~&yGYDY=u?#}3otRwT+kYLkg z`Z@XfM^8*_ba$Jax4PiaBzL$@Ig|4YVE&JT-K^Fdg%IGd&&U#E{L|?YzWAz@qlW9o zNN?^SMz2>&ME+M|UM~Fm)Sr59&oFqQ113Rwog4#l{kBA_S!zVy>Q+Zh0w?sP45Hs!e6z+Msh$9>7Rk-kOni=lNk>oUaS8HAqlfdPCJpMxdjdl0( z-irB>-gcQ|Hy^j-HLp4}m-GI{%j>-%3d0w=oQw5fmNmn@G+4Jyr#mNcsk;5w4kzzJ z_W+a6OB8BHi`I42WV>jsum82`SB8i1FoLtoiM`0VDoY?FJfh|C3U{=oZ&SGTvyqWk zZ{Ex+dJ?M8vlUPKXo)VI|ML97?kK&EPFFl6+nbjuqRM!8$2wV-H}}s?9N$;2nbryB z4d;BQZ`lRAV|i?6uY**<7O4BVR60I+pB$^B>k%y6{tSNGz)Ol%LFD$aG0aY2`N#%k z#eP}4$r1YnQ!S_2XNY=pJ84q)d)w8;GXb_Q--j0jDr=Q37%!yllAwI2Glm7UD#Y4$ zQ${5%X0Tbw$n!vEH*9(}mAn0-484^?#@4y}L(pJyQijLjdx*2?!S}(^r6)8T&jGMM z{z2Dk;Lw}4KP<5K@4=t%RrMv|kH!f)SuTgTZO{+2eLJ`BIv#0Woxu0rSuVG1{IB_w z(yx4V=Sq}RL1|c%nBRc?QL138Y#YIYI^M_xpJVhceBZtzuivn)y%!>K@g5K+->>dX@?d-h$>@Q zO-Z@m$}J6h&cf~{79*KT=>@Rzsy(6Pw2jR+1lNgho<~NnPP$`!t$qz-^g1Kcn;t}H z(ig#}KSNqeopSbm3=&!E)Y-W^5VgBwhKNW$R%?`YoV0@4r{4h*gN-rUO%g!d^KyqH z_!8;g98`HYqml2dqO0~&$-VcFi2}`r)GuD-j4=m+qe|uFZ;w!tA zjx_!3t>%8Mt9Sid?D7Iqx3||*%+DX~a(T3JkUiz-#~IU_+huywZs}&PkfZDXr>`9& z-9w)zqY&b4Hb2Uu&$*7;{R?S_*YqnURi@u2rz@n5I`P!|GSKk)Ft2I1l74#|l6dMM zBhs<56}~9rXK?im^Fy6C``MBgc-H%f#Ob7MFrkI)?94*Z6a~s{TcGc+H>)4lIt!yP zu&~t^5B^~|?SuXS0S{9dZ-4H;b3p&qnfSMLYqBga(l<&oKb!*htzgCH#m2HFCt%PU z{q2s2Eq#jf(7@@vdY7{C+Ux3RO!gDhv2FCu2?h(#-6>J>KKj$o82aB*ZEfyQa`Z?2 z$=9qweDk|`vYA|9xO_~B9^fDW~5bu81n%GVsfM-^wA*v^JSLK&wa_ybxXHJ}}E-cqbvjdxJ%0 za+w~VyzLsp2s*NRgOdP?*@qydBH4CCUPL9{ful+}uG7cJ%4MI0NK6ShI9qn2>oI12 zyek}fX1j?a=(Y<$5M&7W4}|dk#S=mUGb=rJ~T%By8QQRF*`-lE-#iMeW+Ah|7u1s^MN#aai;oXpQn5&Ov zr|3h}&c}1Dnzjk%tRdz(lPoPnio{Ug3m_i^<-q`}hi&bXhzB3C{UG{F-41f1G(Ln^ zAGSj*S->M-_0B-*6Sl~2;XungB*uI6!mkGe6t{ow#Q8tkRtC=F{<%JSH<@J3*8t8N z6+UzLd}QrWj8cJ{c5<4H{hixeLp8K_tn2hW?SGi8nQJJcz`oQa{t|2{?rgD|BzpxSDMaj%&gOB;TA_bz>~IpJ9IrxR(uk9j|56BYiKzI-t<2@0rNV^#`zSsskxy_>>LVThUx&mO z!J$a9?l}@$s|YR{hl@+XmQd1)2c9vWu>U0&r)GMDFY)4>I5l;m z`-kzs*f65Z%_%NpX$XBnJ{rIOy7`vvYj!X^QXPk_Y`RR>MS=7$3;AD_rAdY`+qI;pnZsOfSQSa}sdN^j~*$#%@OF z!TNy{b0pbC5==ZAWuJ~M^LbQ>p(G1*BGcHUXKlO@|LaxN5WqGdjZ>fq^Z==sU9)`J zj3ZJ^um+`C#(GU-gC~x9b9gz04g~)rr&#q!^OcX^aC;5}b$;1#)?WNFvyO0s+!w#O z?ufw7c_Pie&9%bK->V|jT~?}=s4(#G{L1r9AH z7j7xYuOx07O!hl{fHet3A(@e|S95T(nZ++>h?XMG=)Pk2&&lSwMQVZi=s6fy;Rj0p zm$0=vLU>(2Y(N@9NvB=5chddSqCA5VAV{j$N zK3f3Nk$xS_C>o3R13hQNPET-FOL8HAj9_hFQO8>Z-1j!;E z@^F}9`x@iot}d|9Q9h?ULs|kRq-d8!_!^nwb)|o$DlE>N=yUf!M`vH zsQ+fQ9}qRxslmb#2D#nN?US)k7F3oOQek4SNdAK^bM{0w9x{E)td8ny=pPjmGWms` zDT;bwq1{(;yaI*nBKULwkK!&?xZimgAV)3G&NdvKQD&_p@C)MgjN55*a}`V<7J&#J zB_VX8_3p++*J1u~^*lUTe$=KhACLcEnDcSWc9G|?(u)_Pj%6WC2JV$Y?IP-b7<4?OQAttzVlTN{87A`#uRR1nNTry>cQBaq`c8 znzkBE{rry_9kI;3Q;lD-^mC@yeSks+j8VTd?yhF0F~ZT{BIL!RPlN2$Ice{G6>9Q6fsupjv(j0slJBEZw256(oyVh za3AX7UlWa;z8{p3$Ce8&$|5WsT_X!MVEpge_%p6eMH!Ga^~5&cP?vfr-83PL2`E>< z2Ky4NgzO&JBFVyfDq=}wjT5!P&h}PQw@+3oWNwEF`gcAtiPU*%Xh;!5=Cd=8x8O0e zRC^o6%z%XzlKCJ2%EW?LrUBxsq5{;00jaf&!~0}v>^u=oT!dUVuZ%3B|LA7E;igP# zRYPQ_=vps?A^>aCxfTmMN!=XfLs%lHne{i)h&fsyS0W9x;&eS94XpGBpWgu~Ze4HZc2encj1HlDOx|#lO zEOP}pYpY)YQaJVS$z_PLO|r3*+071LNV=UNrJ)=4288K4`;}S%fVw!55 zD|LWJy0FqNj<4UsG(duh4XYtd+g@z(Y zO0yqL8ZeDVjM7C0V1KW#hJUETd}r`6+*P)iHm&8+;4cmm!zUfip*RPFWc^2uM!#d@ zFv6|v#wwC9J) k9w5q~%Gk#4`@Hd|$%TIV22mpm_VJMrkrl2K)cf`S0PfGvBLDyZ literal 0 HcmV?d00001 diff --git a/docs/source/using-diffusers/loading.mdx b/docs/source/using-diffusers/loading.mdx index 35f1e0f928..2cb980ea61 100644 --- a/docs/source/using-diffusers/loading.mdx +++ b/docs/source/using-diffusers/loading.mdx @@ -71,7 +71,11 @@ If this is a private repository, make sure to pass a token having permission to Therefore, we need to make sure to *click-accept* the license. You can do this by simply visiting the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) and clicking on "Agree and access repository": -![access_request](https://github.com/patrickvonplaten/scientific_images/blob/master/access_request.png) +

+
+ +
+

Second, you need to login with your access token: From 72eae64d67a4c8430aced38faaf8c09f79012469 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 7 Nov 2022 20:57:45 +0100 Subject: [PATCH 46/88] Fix dtype safety checker inpaint legacy (#1137) * [Stable Diffusion Inpaint Legacy] Fiix some things * uP --- .../pipeline_stable_diffusion_inpaint_legacy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 91dcefc91d..a92e23bfc1 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -414,7 +414,9 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( self.device ) - image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_checker_input.pixel_values) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) + ) else: has_nsfw_concept = None From bcdb3d594c7414f3465c083e2058148c1715ec2b Mon Sep 17 00:00:00 2001 From: Alex McKinney <44398246+vvvm23@users.noreply.github.com> Date: Mon, 7 Nov 2022 20:06:52 +0000 Subject: [PATCH 47/88] Community pipeline img2img inpainting (#1114) * adds image to image inpainting with `PIL.Image.Image` inputs the base implementation claims to support `torch.Tensor` but seems it would also fail in this case. * `make style` and `make quality` * updates community examples readme Co-authored-by: Patrick von Platen --- examples/community/README.md | 36 +- examples/community/img2img_inpainting.py | 463 +++++++++++++++++++++++ 2 files changed, 498 insertions(+), 1 deletion(-) create mode 100644 examples/community/img2img_inpainting.py diff --git a/examples/community/README.md b/examples/community/README.md index bb3964e1a7..a7a9ab807a 100644 --- a/examples/community/README.md +++ b/examples/community/README.md @@ -17,8 +17,8 @@ If a community doesn't work as expected, please open an issue and ping the autho | Wild Card Stable Diffusion | Stable Diffusion Pipeline that supports prompts that contain wildcard terms (indicated by surrounding double underscores), with values instantiated randomly from a corresponding txt file or a dictionary of possible values | [Wildcard Stable Diffusion](#wildcard-stable-diffusion) | - | [Shyam Sudhakaran](https://github.com/shyamsn97) | | Composable Stable Diffusion| Stable Diffusion Pipeline that supports prompts that contain "|" in prompts (as an AND condition) and weights (separated by "|" as well) to positively / negatively weight prompts. | [Composable Stable Diffusion](#composable-stable-diffusion) | - | [Mark Rich](https://github.com/MarkRich) | | Seed Resizing Stable Diffusion| Stable Diffusion Pipeline that supports resizing an image and retaining the concepts of the 512 by 512 generation. | [Seed Resizing](#seed-resizing) | - | [Mark Rich](https://github.com/MarkRich) | - | Imagic Stable Diffusion | Stable Diffusion Pipeline that enables writing a text prompt to edit an existing image| [Imagic Stable Diffusion](#imagic-stable-diffusion) | - | [Mark Rich](https://github.com/MarkRich) | +| Image to Image Inpainting Stable Diffusion | Stable Diffusion Pipeline that enables the overlaying of two images and subsequent inpainting| [Image to Image Inpainting Stable Diffusion](#image-to-image-inpainting-stable-diffusion) | - | [Alex McKinney](https://github.com/vvvm23) | To load a custom pipeline you just need to pass the `custom_pipeline` argument to `DiffusionPipeline`, as one of the files in `diffusers/examples/community`. Feel free to send a PR with your own pipelines, we will merge them quickly. @@ -501,3 +501,37 @@ res = pipe_compare( image = res.images[0] image.save('./seed_resize/seed_resize_{w}_{h}_image_compare.png'.format(w=width, h=height)) ``` + +### Image to Image Inpainting Stable Diffusion + +Similar to the standard stable diffusion inpainting example, except with the addition of an `inner_image` argument. + +`image`, `inner_image`, and `mask` should have the same dimensions. `inner_image` should have an alpha (transparency) channel. + +The aim is to overlay two images, then mask out the boundary between `image` and `inner_image` to allow stable diffusion to make the connection more seamless. +For example, this could be used to place a logo on a shirt and make it blend seamlessly. + +```python +import PIL +import torch + +from diffusers import StableDiffusionInpaintPipeline + +image_path = "./path-to-image.png" +inner_image_path = "./path-to-inner-image.png" +mask_path = "./path-to-mask.png" + +init_image = PIL.Image.open(image_path).convert("RGB").resize((512, 512)) +inner_image = PIL.Image.open(inner_image_path).convert("RGBA").resize((512, 512)) +mask_image = PIL.Image.open(mask_path).convert("RGB").resize((512, 512)) + +pipe = StableDiffusionInpaintPipeline.from_pretrained( + "runwayml/stable-diffusion-inpainting", + revision="fp16", + torch_dtype=torch.float16, +) +pipe = pipe.to("cuda") + +prompt = "Your prompt here!" +image = pipe(prompt=prompt, image=init_image, inner_image=inner_image, mask_image=mask_image).images[0] +``` \ No newline at end of file diff --git a/examples/community/img2img_inpainting.py b/examples/community/img2img_inpainting.py new file mode 100644 index 0000000000..f7a107136d --- /dev/null +++ b/examples/community/img2img_inpainting.py @@ -0,0 +1,463 @@ +import inspect +from typing import Callable, List, Optional, Tuple, Union + +import numpy as np +import torch + +import PIL +from diffusers.configuration_utils import FrozenDict +from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput +from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker +from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler +from diffusers.utils import deprecate, logging +from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def prepare_mask_and_masked_image(image, mask): + image = np.array(image.convert("RGB")) + image = image[None].transpose(0, 3, 1, 2) + image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 + + mask = np.array(mask.convert("L")) + mask = mask.astype(np.float32) / 255.0 + mask = mask[None, None] + mask[mask < 0.5] = 0 + mask[mask >= 0.5] = 1 + mask = torch.from_numpy(mask) + + masked_image = image * (mask < 0.5) + + return mask, masked_image + + +def check_size(image, height, width): + if isinstance(image, PIL.Image.Image): + w, h = image.size + elif isinstance(image, torch.Tensor): + *_, h, w = image.shape + + if h != height or w != width: + raise ValueError(f"Image size should be {height}x{width}, but got {h}x{w}") + + +def overlay_inner_image(image, inner_image, paste_offset: Tuple[int] = (0, 0)): + inner_image = inner_image.convert("RGBA") + image = image.convert("RGB") + + image.paste(inner_image, paste_offset, inner_image) + image = image.convert("RGB") + + return image + + +class ImageToImageInpaintingPipeline(DiffusionPipeline): + r""" + Pipeline for text-guided image-to-image inpainting using Stable Diffusion. *This is an experimental feature*. + + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + + Args: + vae ([`AutoencoderKL`]): + Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. + text_encoder ([`CLIPTextModel`]): + Frozen text-encoder. Stable Diffusion uses the text portion of + [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically + the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant. + tokenizer (`CLIPTokenizer`): + Tokenizer of class + [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). + unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. + scheduler ([`SchedulerMixin`]): + A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. + safety_checker ([`StableDiffusionSafetyChecker`]): + Classification module that estimates whether generated images could be considered offensive or harmful. + Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details. + feature_extractor ([`CLIPFeatureExtractor`]): + Model that extracts features from generated images to be used as inputs for the `safety_checker`. + """ + + def __init__( + self, + vae: AutoencoderKL, + text_encoder: CLIPTextModel, + tokenizer: CLIPTokenizer, + unet: UNet2DConditionModel, + scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], + safety_checker: StableDiffusionSafetyChecker, + feature_extractor: CLIPFeatureExtractor, + ): + super().__init__() + + if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + deprecation_message = ( + f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" + f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " + "to update the config accordingly as leaving `steps_offset` might led to incorrect results" + " in future versions. If you have downloaded this checkpoint from the Hugging Face Hub," + " it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`" + " file" + ) + deprecate("steps_offset!=1", "1.0.0", deprecation_message, standard_warn=False) + new_config = dict(scheduler.config) + new_config["steps_offset"] = 1 + scheduler._internal_dict = FrozenDict(new_config) + + if safety_checker is None: + logger.warn( + f"You have disabled the safety checker for {self.__class__} by passing `safety_checker=None`. Ensure" + " that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered" + " results in services or applications open to the public. Both the diffusers team and Hugging Face" + " strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling" + " it only for use-cases that involve analyzing network behavior or auditing its results. For more" + " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ." + ) + + self.register_modules( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=safety_checker, + feature_extractor=feature_extractor, + ) + + def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): + r""" + Enable sliced attention computation. + + When this option is enabled, the attention module will split the input tensor in slices, to compute attention + in several steps. This is useful to save some memory in exchange for a small speed decrease. + + Args: + slice_size (`str` or `int`, *optional*, defaults to `"auto"`): + When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If + a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, + `attention_head_dim` must be a multiple of `slice_size`. + """ + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = self.unet.config.attention_head_dim // 2 + self.unet.set_attention_slice(slice_size) + + def disable_attention_slicing(self): + r""" + Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go + back to computing attention in one step. + """ + # set slice_size = `None` to disable `attention slicing` + self.enable_attention_slicing(None) + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + image: Union[torch.FloatTensor, PIL.Image.Image], + inner_image: Union[torch.FloatTensor, PIL.Image.Image], + mask_image: Union[torch.FloatTensor, PIL.Image.Image], + height: int = 512, + width: int = 512, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, + generator: Optional[torch.Generator] = None, + latents: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: Optional[int] = 1, + **kwargs, + ): + r""" + Function invoked when calling the pipeline for generation. + + Args: + prompt (`str` or `List[str]`): + The prompt or prompts to guide the image generation. + image (`torch.Tensor` or `PIL.Image.Image`): + `Image`, or tensor representing an image batch which will be inpainted, *i.e.* parts of the image will + be masked out with `mask_image` and repainted according to `prompt`. + inner_image (`torch.Tensor` or `PIL.Image.Image`): + `Image`, or tensor representing an image batch which will be overlayed onto `image`. Non-transparent + regions of `inner_image` must fit inside white pixels in `mask_image`. Expects four channels, with + the last channel representing the alpha channel, which will be used to blend `inner_image` with + `image`. If not provided, it will be forcibly cast to RGBA. + mask_image (`PIL.Image.Image`): + `Image`, or tensor representing an image batch, to mask `image`. White pixels in the mask will be + repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted + to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L) + instead of 3, so the expected shape would be `(B, H, W, 1)`. + height (`int`, *optional*, defaults to 512): + The height in pixels of the generated image. + width (`int`, *optional*, defaults to 512): + The width in pixels of the generated image. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + negative_prompt (`str` or `List[str]`, *optional*): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + num_images_per_prompt (`int`, *optional*, defaults to 1): + The number of images to generate per prompt. + eta (`float`, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by sampling using the supplied random `generator`. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a + plain tuple. + callback (`Callable`, *optional*): + A function that will be called every `callback_steps` steps during inference. The function will be + called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. + callback_steps (`int`, *optional*, defaults to 1): + The frequency at which the `callback` function will be called. If not specified, the callback will be + called at every step. + + Returns: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. + When returning a tuple, the first element is a list with the generated images, and the second element is a + list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" + (nsfw) content, according to the `safety_checker`. + """ + + if isinstance(prompt, str): + batch_size = 1 + elif isinstance(prompt, list): + batch_size = len(prompt) + else: + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + # check if input sizes are correct + check_size(image, height, width) + check_size(inner_image, height, width) + check_size(mask_image, height, width) + + # get prompt text embeddings + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] + + # duplicate text embeddings for each generation per prompt, using mps friendly method + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(batch_size, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + # get the initial random noise unless the user supplied it + # Unlike in other pipelines, latents need to be generated in the target device + # for 1-to-1 results reproducibility with the CompVis implementation. + # However this currently doesn't work in `mps`. + num_channels_latents = self.vae.config.latent_channels + latents_shape = (batch_size * num_images_per_prompt, num_channels_latents, height // 8, width // 8) + latents_dtype = text_embeddings.dtype + if latents is None: + if self.device.type == "mps": + # randn does not exist on mps + latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to( + self.device + ) + else: + latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype) + else: + if latents.shape != latents_shape: + raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") + latents = latents.to(self.device) + + # overlay the inner image + image = overlay_inner_image(image, inner_image) + + # prepare mask and masked_image + mask, masked_image = prepare_mask_and_masked_image(image, mask_image) + mask = mask.to(device=self.device, dtype=text_embeddings.dtype) + masked_image = masked_image.to(device=self.device, dtype=text_embeddings.dtype) + + # resize the mask to latents shape as we concatenate the mask to the latents + mask = torch.nn.functional.interpolate(mask, size=(height // 8, width // 8)) + + # encode the mask image into latents space so we can concatenate it to the latents + masked_image_latents = self.vae.encode(masked_image).latent_dist.sample(generator=generator) + masked_image_latents = 0.18215 * masked_image_latents + + # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method + mask = mask.repeat(batch_size * num_images_per_prompt, 1, 1, 1) + masked_image_latents = masked_image_latents.repeat(batch_size * num_images_per_prompt, 1, 1, 1) + + mask = torch.cat([mask] * 2) if do_classifier_free_guidance else mask + masked_image_latents = ( + torch.cat([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents + ) + + num_channels_mask = mask.shape[1] + num_channels_masked_image = masked_image_latents.shape[1] + + if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels: + raise ValueError( + f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects" + f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +" + f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}" + f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of" + " `pipeline.unet` or your `mask_image` or `image` input." + ) + + # set timesteps + self.scheduler.set_timesteps(num_inference_steps) + + # Some schedulers like PNDM have timesteps as arrays + # It's more optimized to move all timesteps to correct device beforehand + timesteps_tensor = self.scheduler.timesteps.to(self.device) + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + for i, t in enumerate(self.progress_bar(timesteps_tensor)): + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + + # concat latents, mask, masked_image_latents in the channel dimension + latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1) + + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + + # call the callback, if provided + if callback is not None and i % callback_steps == 0: + callback(i, t, latents) + + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + + image = (image / 2 + 0.5).clamp(0, 1) + + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( + self.device + ) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) + ) + else: + has_nsfw_concept = None + + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image, has_nsfw_concept) + + return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) From 01733238a67d6a23a9c11349cf799172df60597d Mon Sep 17 00:00:00 2001 From: JuanCarlosPi Date: Mon, 7 Nov 2022 15:11:59 -0500 Subject: [PATCH 48/88] [Community Pipeline] Add multilingual stable diffusion to community pipelines (#1142) * Add multilingual_stable_diffusion.py file * Add multilingual stable diffusion to examples README file * Update examples/community/README.md Co-authored-by: Patrick von Platen --- examples/community/README.md | 72 ++- .../multilingual_stable_diffusion.py | 436 ++++++++++++++++++ 2 files changed, 507 insertions(+), 1 deletion(-) create mode 100644 examples/community/multilingual_stable_diffusion.py diff --git a/examples/community/README.md b/examples/community/README.md index a7a9ab807a..b83b2ff4d4 100644 --- a/examples/community/README.md +++ b/examples/community/README.md @@ -18,9 +18,11 @@ If a community doesn't work as expected, please open an issue and ping the autho | Composable Stable Diffusion| Stable Diffusion Pipeline that supports prompts that contain "|" in prompts (as an AND condition) and weights (separated by "|" as well) to positively / negatively weight prompts. | [Composable Stable Diffusion](#composable-stable-diffusion) | - | [Mark Rich](https://github.com/MarkRich) | | Seed Resizing Stable Diffusion| Stable Diffusion Pipeline that supports resizing an image and retaining the concepts of the 512 by 512 generation. | [Seed Resizing](#seed-resizing) | - | [Mark Rich](https://github.com/MarkRich) | | Imagic Stable Diffusion | Stable Diffusion Pipeline that enables writing a text prompt to edit an existing image| [Imagic Stable Diffusion](#imagic-stable-diffusion) | - | [Mark Rich](https://github.com/MarkRich) | +| Multilingual Stable Diffusion| Stable Diffusion Pipeline that supports prompts in 50 different languages. | [Multilingual Stable Diffusion](#multilingual-stable-diffusion-pipeline) | - | [Juan Carlos Piñeros](https://github.com/juancopi81) | | Image to Image Inpainting Stable Diffusion | Stable Diffusion Pipeline that enables the overlaying of two images and subsequent inpainting| [Image to Image Inpainting Stable Diffusion](#image-to-image-inpainting-stable-diffusion) | - | [Alex McKinney](https://github.com/vvvm23) | + To load a custom pipeline you just need to pass the `custom_pipeline` argument to `DiffusionPipeline`, as one of the files in `diffusers/examples/community`. Feel free to send a PR with your own pipelines, we will merge them quickly. ```py pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", custom_pipeline="filename_in_the_community_folder") @@ -502,6 +504,74 @@ image = res.images[0] image.save('./seed_resize/seed_resize_{w}_{h}_image_compare.png'.format(w=width, h=height)) ``` +### Multilingual Stable Diffusion Pipeline + +The following code can generate an images from texts in different languages using the pre-trained [mBART-50 many-to-one multilingual machine translation model](https://huggingface.co/facebook/mbart-large-50-many-to-one-mmt) and Stable Diffusion. + +```python +from PIL import Image + +import torch + +from diffusers import DiffusionPipeline +from transformers import ( + pipeline, + MBart50TokenizerFast, + MBartForConditionalGeneration, +) +device = "cuda" if torch.cuda.is_available() else "cpu" +device_dict = {"cuda": 0, "cpu": -1} + +# helper function taken from: https://huggingface.co/blog/stable_diffusion +def image_grid(imgs, rows, cols): + assert len(imgs) == rows*cols + + w, h = imgs[0].size + grid = Image.new('RGB', size=(cols*w, rows*h)) + grid_w, grid_h = grid.size + + for i, img in enumerate(imgs): + grid.paste(img, box=(i%cols*w, i//cols*h)) + return grid + +# Add language detection pipeline +language_detection_model_ckpt = "papluca/xlm-roberta-base-language-detection" +language_detection_pipeline = pipeline("text-classification", + model=language_detection_model_ckpt, + device=device_dict[device]) + +# Add model for language translation +trans_tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-one-mmt") +trans_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-one-mmt").to(device) + +diffuser_pipeline = DiffusionPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + custom_pipeline="multilingual_stable_diffusion", + detection_pipeline=language_detection_pipeline, + translation_model=trans_model, + translation_tokenizer=trans_tokenizer, + revision="fp16", + torch_dtype=torch.float16, +) + +diffuser_pipeline.enable_attention_slicing() +diffuser_pipeline = diffuser_pipeline.to(device) + +prompt = ["a photograph of an astronaut riding a horse", + "Una casa en la playa", + "Ein Hund, der Orange isst", + "Un restaurant parisien"] + +output = diffuser_pipeline(prompt) + +images = output.images + +grid = image_grid(images, rows=2, cols=2) +``` + +This example produces the following images: +![image](https://user-images.githubusercontent.com/4313860/198328706-295824a4-9856-4ce5-8e66-278ceb42fd29.png) + ### Image to Image Inpainting Stable Diffusion Similar to the standard stable diffusion inpainting example, except with the addition of an `inner_image` argument. @@ -534,4 +604,4 @@ pipe = pipe.to("cuda") prompt = "Your prompt here!" image = pipe(prompt=prompt, image=init_image, inner_image=inner_image, mask_image=mask_image).images[0] -``` \ No newline at end of file +``` diff --git a/examples/community/multilingual_stable_diffusion.py b/examples/community/multilingual_stable_diffusion.py new file mode 100644 index 0000000000..c71c1f10c5 --- /dev/null +++ b/examples/community/multilingual_stable_diffusion.py @@ -0,0 +1,436 @@ +import inspect +from typing import Callable, List, Optional, Union + +import torch + +from diffusers.configuration_utils import FrozenDict +from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput +from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker +from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler +from diffusers.utils import deprecate, logging +from transformers import ( + CLIPFeatureExtractor, + CLIPTextModel, + CLIPTokenizer, + MBart50TokenizerFast, + MBartForConditionalGeneration, + pipeline, +) + + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + + +def detect_language(pipe, prompt, batch_size): + """helper function to detect language(s) of prompt""" + + if batch_size == 1: + preds = pipe(prompt, top_k=1, truncation=True, max_length=128) + return preds[0]["label"] + else: + detected_languages = [] + for p in prompt: + preds = pipe(p, top_k=1, truncation=True, max_length=128) + detected_languages.append(preds[0]["label"]) + + return detected_languages + + +def translate_prompt(prompt, translation_tokenizer, translation_model, device): + """helper function to translate prompt to English""" + + encoded_prompt = translation_tokenizer(prompt, return_tensors="pt").to(device) + generated_tokens = translation_model.generate(**encoded_prompt, max_new_tokens=1000) + en_trans = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) + + return en_trans[0] + + +class MultilingualStableDiffusion(DiffusionPipeline): + r""" + Pipeline for text-to-image generation using Stable Diffusion in different languages. + + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + + Args: + detection_pipeline ([`pipeline`]): + Transformers pipeline to detect prompt's language. + translation_model ([`MBartForConditionalGeneration`]): + Model to translate prompt to English, if necessary. Please refer to the + [model card](https://huggingface.co/docs/transformers/model_doc/mbart) for details. + translation_tokenizer ([`MBart50TokenizerFast`]): + Tokenizer of the translation model. + vae ([`AutoencoderKL`]): + Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. + text_encoder ([`CLIPTextModel`]): + Frozen text-encoder. Stable Diffusion uses the text portion of + [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically + the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant. + tokenizer (`CLIPTokenizer`): + Tokenizer of class + [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). + unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. + scheduler ([`SchedulerMixin`]): + A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. + safety_checker ([`StableDiffusionSafetyChecker`]): + Classification module that estimates whether generated images could be considered offensive or harmful. + Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details. + feature_extractor ([`CLIPFeatureExtractor`]): + Model that extracts features from generated images to be used as inputs for the `safety_checker`. + """ + + def __init__( + self, + detection_pipeline: pipeline, + translation_model: MBartForConditionalGeneration, + translation_tokenizer: MBart50TokenizerFast, + vae: AutoencoderKL, + text_encoder: CLIPTextModel, + tokenizer: CLIPTokenizer, + unet: UNet2DConditionModel, + scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], + safety_checker: StableDiffusionSafetyChecker, + feature_extractor: CLIPFeatureExtractor, + ): + super().__init__() + + if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + deprecation_message = ( + f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" + f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " + "to update the config accordingly as leaving `steps_offset` might led to incorrect results" + " in future versions. If you have downloaded this checkpoint from the Hugging Face Hub," + " it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`" + " file" + ) + deprecate("steps_offset!=1", "1.0.0", deprecation_message, standard_warn=False) + new_config = dict(scheduler.config) + new_config["steps_offset"] = 1 + scheduler._internal_dict = FrozenDict(new_config) + + if safety_checker is None: + logger.warn( + f"You have disabled the safety checker for {self.__class__} by passing `safety_checker=None`. Ensure" + " that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered" + " results in services or applications open to the public. Both the diffusers team and Hugging Face" + " strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling" + " it only for use-cases that involve analyzing network behavior or auditing its results. For more" + " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ." + ) + + self.register_modules( + detection_pipeline=detection_pipeline, + translation_model=translation_model, + translation_tokenizer=translation_tokenizer, + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=safety_checker, + feature_extractor=feature_extractor, + ) + + def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): + r""" + Enable sliced attention computation. + + When this option is enabled, the attention module will split the input tensor in slices, to compute attention + in several steps. This is useful to save some memory in exchange for a small speed decrease. + + Args: + slice_size (`str` or `int`, *optional*, defaults to `"auto"`): + When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If + a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, + `attention_head_dim` must be a multiple of `slice_size`. + """ + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = self.unet.config.attention_head_dim // 2 + self.unet.set_attention_slice(slice_size) + + def disable_attention_slicing(self): + r""" + Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go + back to computing attention in one step. + """ + # set slice_size = `None` to disable `attention slicing` + self.enable_attention_slicing(None) + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + height: int = 512, + width: int = 512, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, + generator: Optional[torch.Generator] = None, + latents: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: Optional[int] = 1, + **kwargs, + ): + r""" + Function invoked when calling the pipeline for generation. + + Args: + prompt (`str` or `List[str]`): + The prompt or prompts to guide the image generation. Can be in different languages. + height (`int`, *optional*, defaults to 512): + The height in pixels of the generated image. + width (`int`, *optional*, defaults to 512): + The width in pixels of the generated image. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + negative_prompt (`str` or `List[str]`, *optional*): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + num_images_per_prompt (`int`, *optional*, defaults to 1): + The number of images to generate per prompt. + eta (`float`, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by sampling using the supplied random `generator`. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a + plain tuple. + callback (`Callable`, *optional*): + A function that will be called every `callback_steps` steps during inference. The function will be + called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. + callback_steps (`int`, *optional*, defaults to 1): + The frequency at which the `callback` function will be called. If not specified, the callback will be + called at every step. + + Returns: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. + When returning a tuple, the first element is a list with the generated images, and the second element is a + list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" + (nsfw) content, according to the `safety_checker`. + """ + if isinstance(prompt, str): + batch_size = 1 + elif isinstance(prompt, list): + batch_size = len(prompt) + else: + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + # detect language and translate if necessary + prompt_language = detect_language(self.detection_pipeline, prompt, batch_size) + if batch_size == 1 and prompt_language != "en": + prompt = translate_prompt(prompt, self.translation_tokenizer, self.translation_model, self.device) + + if isinstance(prompt, list): + for index in range(batch_size): + if prompt_language[index] != "en": + p = translate_prompt( + prompt[index], self.translation_tokenizer, self.translation_model, self.device + ) + prompt[index] = p + + # get prompt text embeddings + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] + + # duplicate text embeddings for each generation per prompt, using mps friendly method + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + # detect language and translate it if necessary + negative_prompt_language = detect_language(self.detection_pipeline, negative_prompt, batch_size) + if negative_prompt_language != "en": + negative_prompt = translate_prompt( + negative_prompt, self.translation_tokenizer, self.translation_model, self.device + ) + if isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + # detect language and translate it if necessary + if isinstance(negative_prompt, list): + negative_prompt_languages = detect_language(self.detection_pipeline, negative_prompt, batch_size) + for index in range(batch_size): + if negative_prompt_languages[index] != "en": + p = translate_prompt( + negative_prompt[index], self.translation_tokenizer, self.translation_model, self.device + ) + negative_prompt[index] = p + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + # get the initial random noise unless the user supplied it + + # Unlike in other pipelines, latents need to be generated in the target device + # for 1-to-1 results reproducibility with the CompVis implementation. + # However this currently doesn't work in `mps`. + latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8) + latents_dtype = text_embeddings.dtype + if latents is None: + if self.device.type == "mps": + # randn does not work reproducibly on mps + latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to( + self.device + ) + else: + latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype) + else: + if latents.shape != latents_shape: + raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") + latents = latents.to(self.device) + + # set timesteps + self.scheduler.set_timesteps(num_inference_steps) + + # Some schedulers like PNDM have timesteps as arrays + # It's more optimized to move all timesteps to correct device beforehand + timesteps_tensor = self.scheduler.timesteps.to(self.device) + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + for i, t in enumerate(self.progress_bar(timesteps_tensor)): + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + + # call the callback, if provided + if callback is not None and i % callback_steps == 0: + callback(i, t, latents) + + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + + image = (image / 2 + 0.5).clamp(0, 1) + + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( + self.device + ) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) + ) + else: + has_nsfw_concept = None + + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image, has_nsfw_concept) + + return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) From ac4c695d97779cc4d0c92585f8e87ad39aa4aeb2 Mon Sep 17 00:00:00 2001 From: "Duong A. Nguyen" <38061659+duongna21@users.noreply.github.com> Date: Tue, 8 Nov 2022 03:26:59 +0700 Subject: [PATCH 49/88] [Flax examples] Load text encoder from subfolder (#1147) load text encoder from subfolder --- examples/dreambooth/train_dreambooth_flax.py | 4 +++- examples/text_to_image/train_text_to_image_flax.py | 4 +++- examples/textual_inversion/textual_inversion_flax.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_flax.py b/examples/dreambooth/train_dreambooth_flax.py index 84493b1d94..078a66e4ac 100644 --- a/examples/dreambooth/train_dreambooth_flax.py +++ b/examples/dreambooth/train_dreambooth_flax.py @@ -452,7 +452,9 @@ def main(): weight_dtype = jnp.bfloat16 # Load models and create wrapper for stable diffusion - text_encoder = FlaxCLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", dtype=weight_dtype) + text_encoder = FlaxCLIPTextModel.from_pretrained( + args.pretrained_model_name_or_path, subfolder="text_encoder", dtype=weight_dtype + ) vae, vae_params = FlaxAutoencoderKL.from_pretrained( args.pretrained_model_name_or_path, subfolder="vae", dtype=weight_dtype ) diff --git a/examples/text_to_image/train_text_to_image_flax.py b/examples/text_to_image/train_text_to_image_flax.py index cacfacef49..89a8dec728 100644 --- a/examples/text_to_image/train_text_to_image_flax.py +++ b/examples/text_to_image/train_text_to_image_flax.py @@ -379,7 +379,9 @@ def main(): # Load models and create wrapper for stable diffusion tokenizer = CLIPTokenizer.from_pretrained(args.pretrained_model_name_or_path, subfolder="tokenizer") - text_encoder = FlaxCLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", dtype=weight_dtype) + text_encoder = FlaxCLIPTextModel.from_pretrained( + args.pretrained_model_name_or_path, subfolder="text_encoder", dtype=weight_dtype + ) vae, vae_params = FlaxAutoencoderKL.from_pretrained( args.pretrained_model_name_or_path, subfolder="vae", dtype=weight_dtype ) diff --git a/examples/textual_inversion/textual_inversion_flax.py b/examples/textual_inversion/textual_inversion_flax.py index 84ff97c39a..be2b7ffb54 100644 --- a/examples/textual_inversion/textual_inversion_flax.py +++ b/examples/textual_inversion/textual_inversion_flax.py @@ -391,7 +391,7 @@ def main(): placeholder_token_id = tokenizer.convert_tokens_to_ids(args.placeholder_token) # Load models and create wrapper for stable diffusion - text_encoder = FlaxCLIPTextModel.from_pretrained("openai/clip-vit-large-patch14") + text_encoder = FlaxCLIPTextModel.from_pretrained(args.pretrained_model_name_or_path, subfolder="text_encoder") vae, vae_params = FlaxAutoencoderKL.from_pretrained(args.pretrained_model_name_or_path, subfolder="vae") unet, unet_params = FlaxUNet2DConditionModel.from_pretrained(args.pretrained_model_name_or_path, subfolder="unet") From fa6e5209a8d500893417d81e32c53aad1f8e9ecf Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 7 Nov 2022 21:59:36 +0100 Subject: [PATCH 50/88] Link to Dreambooth blog post instead of W&B report (#1180) Link to Dreambooth blog post instead of W&B report. --- README.md | 2 +- docs/source/training/dreambooth.mdx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d3ac5702cd..5c7b911c2e 100644 --- a/README.md +++ b/README.md @@ -346,7 +346,7 @@ Textual Inversion is a technique for capturing novel concepts from a small numbe - Textual Inversion. Capture novel concepts from a small set of sample images, and associate them with new "words" in the embedding space of the text encoder. Please, refer to [our training examples](https://github.com/huggingface/diffusers/tree/main/examples/textual_inversion) or [documentation](https://huggingface.co/docs/diffusers/training/text_inversion) to try for yourself. -- Dreambooth. Another technique to capture new concepts in Stable Diffusion. This method fine-tunes the UNet (and, optionally, also the text encoder) of the pipeline to achieve impressive results. Please, refer to [our training examples](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth) and [training report](https://wandb.ai/psuraj/dreambooth/reports/Dreambooth-Training-Analysis--VmlldzoyNzk0NDc3) for additional details and training recommendations. +- Dreambooth. Another technique to capture new concepts in Stable Diffusion. This method fine-tunes the UNet (and, optionally, also the text encoder) of the pipeline to achieve impressive results. Please, refer to [our training example](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth) and [training report](https://huggingface.co/blog/dreambooth) for additional details and training recommendations. - Full Stable Diffusion fine-tuning. If you have a more sizable dataset with a specific look or style, you can fine-tune Stable Diffusion so that it outputs images following those examples. This was the approach taken to create [a Pokémon Stable Diffusion model](https://huggingface.co/justinpinkney/pokemon-stable-diffusion) (by Justing Pinkney / Lambda Labs), [a Japanese specific version of Stable Diffusion](https://huggingface.co/spaces/rinna/japanese-stable-diffusion) (by [Rinna Co.](https://github.com/rinnakk/japanese-stable-diffusion/) and others. You can start at [our text-to-image fine-tuning example](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image) and go from there. diff --git a/docs/source/training/dreambooth.mdx b/docs/source/training/dreambooth.mdx index cf7e5dbcec..238dcb24cf 100644 --- a/docs/source/training/dreambooth.mdx +++ b/docs/source/training/dreambooth.mdx @@ -23,7 +23,7 @@ The [Dreambooth training script](https://github.com/huggingface/diffusers/tree/m -Dreambooth fine-tuning is very sensitive to hyperparameters and easy to overfit. We recommend you take a look at our [in-depth analysis](https://wandb.ai/psuraj/dreambooth/reports/Dreambooth-Training-Analysis--VmlldzoyNzk0NDc3) with recommended settings for different subjects, and go from there. +Dreambooth fine-tuning is very sensitive to hyperparameters and easy to overfit. We recommend you take a look at our [in-depth analysis](https://huggingface.co/blog/dreambooth) with recommended settings for different subjects, and go from there.
@@ -148,7 +148,7 @@ accelerate launch train_dreambooth.py \ ### Fine-tune the text encoder in addition to the UNet -The script also allows to fine-tune the `text_encoder` along with the `unet`. It has been observed experimentally that this gives much better results, especially on faces. Please, refer to [our report](https://wandb.ai/psuraj/dreambooth/reports/Dreambooth-Training-Analysis--VmlldzoyNzk0NDc3) for more details. +The script also allows to fine-tune the `text_encoder` along with the `unet`. It has been observed experimentally that this gives much better results, especially on faces. Please, refer to [our blog](https://huggingface.co/blog/dreambooth) for more details. To enable this option, pass the `--train_text_encoder` argument to the training script. From c3dcb6749b744bbf3ebfd1329de6a50009b849ee Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 8 Nov 2022 11:31:15 +0100 Subject: [PATCH 51/88] Update config.yml --- .github/ISSUE_TEMPLATE/config.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index fd8eaa8d13..d8cf414a8c 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,7 +1,4 @@ contact_links: - - name: Forum - url: https://discuss.huggingface.co/c/discussion-related-to-httpsgithubcomhuggingfacediffusers/63 - about: General usage questions and community discussions - name: Blank issue url: https://github.com/huggingface/diffusers/issues/new - about: Please note that the Forum is in most places the right place for discussions + about: General usage questions and community discussions From 20a05d6a506e10fe82e998c5b2f22fce6a298229 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 8 Nov 2022 12:30:51 +0100 Subject: [PATCH 52/88] Fix small typo (#1178) Unless it's intentional, lol --- src/diffusers/configuration_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index d830857a30..39762e41d3 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -101,7 +101,7 @@ class ConfigMixin: output_config_file = os.path.join(save_directory, self.config_name) self.to_json_file(output_config_file) - logger.info(f"ConfigMixinuration saved in {output_config_file}") + logger.info(f"Configuration saved in {output_config_file}") @classmethod def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs): From 5a8b3569226c47667113d8a55cfc10f1de0ae67a Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Tue, 8 Nov 2022 13:11:12 +0100 Subject: [PATCH 53/88] [DDIMScheduler] fix noise device in ddim step (#1189) * fix noise device in ddim sched * fix typo * self.device -> device * remove duplicated if * use str device * don't use str for device --- src/diffusers/schedulers/scheduling_ddim.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py index 8d4407c16c..1acb81764d 100644 --- a/src/diffusers/schedulers/scheduling_ddim.py +++ b/src/diffusers/schedulers/scheduling_ddim.py @@ -288,7 +288,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): if eta > 0: # randn_like does not support generator https://github.com/pytorch/pytorch/issues/27072 - device = model_output.device if torch.is_tensor(model_output) else "cpu" + device = model_output.device if torch.is_tensor(model_output) else torch.device("cpu") if variance_noise is not None and generator is not None: raise ValueError( "Cannot pass both generator and variance_noise. Please make sure that either `generator` or" @@ -296,9 +296,14 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): ) if variance_noise is None: - variance_noise = torch.randn(model_output.shape, dtype=model_output.dtype, generator=generator).to( - device - ) + if device.type == "mps": + # randn does not work reproducibly on mps + variance_noise = torch.randn(model_output.shape, dtype=model_output.dtype, generator=generator) + variance_noise = variance_noise.to(device) + else: + variance_noise = torch.randn( + model_output.shape, generator=generator, device=device, dtype=model_output.dtype + ) variance = self._get_variance(timestep, prev_timestep) ** (0.5) * eta * variance_noise prev_sample = prev_sample + variance From 813744e5f3af32a81cf31427940d1a2d3abdf578 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 8 Nov 2022 13:11:33 +0100 Subject: [PATCH 54/88] MPS schedulers: don't use float64 (#1169) * Schedulers: don't use float64 on mps * Test set_timesteps() on device (float schedulers). * SD pipeline: use device in set_timesteps. * SD in-painting pipeline: use device in set_timesteps. * Tests: fix mps crashes. * Skip test_load_pipeline_from_git on mps. Not compatible with float16. * Use device.type instead of str in Euler schedulers. --- .../pipeline_stable_diffusion.py | 9 +- .../pipeline_stable_diffusion_inpaint.py | 9 +- .../scheduling_euler_ancestral_discrete.py | 10 ++- .../schedulers/scheduling_euler_discrete.py | 10 ++- .../schedulers/scheduling_lms_discrete.py | 7 +- tests/models/test_models_unet_2d.py | 3 + tests/test_pipelines.py | 4 +- tests/test_scheduler.py | 87 ++++++++++++++++++- 8 files changed, 117 insertions(+), 22 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 9c7edabf69..30be4156f9 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -360,12 +360,9 @@ class StableDiffusionPipeline(DiffusionPipeline): raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") latents = latents.to(self.device) - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - - # Some schedulers like PNDM have timesteps as arrays - # It's more optimized to move all timesteps to correct device beforehand - timesteps_tensor = self.scheduler.timesteps.to(self.device) + # set timesteps and move to the correct device + self.scheduler.set_timesteps(num_inference_steps, device=self.device) + timesteps_tensor = self.scheduler.timesteps # scale the initial noise by the standard deviation required by the scheduler latents = latents * self.scheduler.init_noise_sigma diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index a7af1c9d33..83d27926dc 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -416,12 +416,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): " `pipeline.unet` or your `mask_image` or `image` input." ) - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - - # Some schedulers like PNDM have timesteps as arrays - # It's more optimized to move all timesteps to correct device beforehand - timesteps_tensor = self.scheduler.timesteps.to(self.device) + # set timesteps and move to the correct device + self.scheduler.set_timesteps(num_inference_steps, device=self.device) + timesteps_tensor = self.scheduler.timesteps # scale the initial noise by the standard deviation required by the scheduler latents = latents * self.scheduler.init_noise_sigma diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index 7f44067325..33505c81c0 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -151,7 +151,11 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) self.sigmas = torch.from_numpy(sigmas).to(device=device) - self.timesteps = torch.from_numpy(timesteps).to(device=device) + if str(device).startswith("mps"): + # mps does not support float64 + self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32) + else: + self.timesteps = torch.from_numpy(timesteps).to(device=device) def step( self, @@ -217,8 +221,8 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): prev_sample = sample + derivative * dt - device = model_output.device if torch.is_tensor(model_output) else "cpu" - if str(device) == "mps": + device = model_output.device if torch.is_tensor(model_output) else torch.device("cpu") + if device.type == "mps": # randn does not work reproducibly on mps noise = torch.randn(model_output.shape, dtype=model_output.dtype, device="cpu", generator=generator).to( device diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 50a1bd89f8..9f707c27a1 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -152,7 +152,11 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin): sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) self.sigmas = torch.from_numpy(sigmas).to(device=device) - self.timesteps = torch.from_numpy(timesteps).to(device=device) + if str(device).startswith("mps"): + # mps does not support float64 + self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32) + else: + self.timesteps = torch.from_numpy(timesteps).to(device=device) def step( self, @@ -214,8 +218,8 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin): gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 - device = model_output.device if torch.is_tensor(model_output) else "cpu" - if str(device) == "mps": + device = model_output.device if torch.is_tensor(model_output) else torch.device("cpu") + if device.type == "mps": # randn does not work reproducibly on mps noise = torch.randn(model_output.shape, dtype=model_output.dtype, device="cpu", generator=generator).to( device diff --git a/src/diffusers/schedulers/scheduling_lms_discrete.py b/src/diffusers/schedulers/scheduling_lms_discrete.py index d636fe6fe8..373c373ee0 100644 --- a/src/diffusers/schedulers/scheduling_lms_discrete.py +++ b/src/diffusers/schedulers/scheduling_lms_discrete.py @@ -173,8 +173,13 @@ class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32) + self.sigmas = torch.from_numpy(sigmas).to(device=device) - self.timesteps = torch.from_numpy(timesteps).to(device=device) + if str(device).startswith("mps"): + # mps does not support float64 + self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32) + else: + self.timesteps = torch.from_numpy(timesteps).to(device=device) self.derivatives = [] diff --git a/tests/models/test_models_unet_2d.py b/tests/models/test_models_unet_2d.py index 71ddf1a134..81437311c6 100644 --- a/tests/models/test_models_unet_2d.py +++ b/tests/models/test_models_unet_2d.py @@ -456,6 +456,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): # fmt: on ] ) + @require_torch_gpu def test_compvis_sd_v1_4(self, seed, timestep, expected_slice): model = self.get_unet_model(model_id="CompVis/stable-diffusion-v1-4") latents = self.get_latents(seed) @@ -507,6 +508,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): # fmt: on ] ) + @require_torch_gpu def test_compvis_sd_v1_5(self, seed, timestep, expected_slice): model = self.get_unet_model(model_id="runwayml/stable-diffusion-v1-5") latents = self.get_latents(seed) @@ -558,6 +560,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): # fmt: on ] ) + @require_torch_gpu def test_compvis_sd_inpaint(self, seed, timestep, expected_slice): model = self.get_unet_model(model_id="runwayml/stable-diffusion-inpainting") latents = self.get_latents(seed, shape=(4, 9, 64, 64)) diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index b8316075fa..2b19b08b37 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -41,7 +41,7 @@ from diffusers import ( from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, WEIGHTS_NAME, floats_tensor, slow, torch_device -from diffusers.utils.testing_utils import CaptureLogger, get_tests_dir +from diffusers.utils.testing_utils import CaptureLogger, get_tests_dir, require_torch_gpu from parameterized import parameterized from PIL import Image from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -124,7 +124,7 @@ class CustomPipelineTests(unittest.TestCase): assert output_str == "This is a local test" @slow - @unittest.skipIf(torch_device == "cpu", "Stable diffusion is supposed to run on GPU") + @require_torch_gpu def test_load_pipeline_from_git(self): clip_model_id = "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 056f723835..70201d1e67 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -83,8 +83,8 @@ class SchedulerCommonTest(unittest.TestCase): num_inference_steps = kwargs.pop("num_inference_steps", None) - # TODO(Suraj) - delete the following two lines once DDPM, DDIM, and PNDM have timesteps casted to float by default for scheduler_class in self.scheduler_classes: + # TODO(Suraj) - delete the following two lines once DDPM, DDIM, and PNDM have timesteps casted to float by default if scheduler_class in (EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler): time_step = float(time_step) @@ -1192,6 +1192,31 @@ class LMSDiscreteSchedulerTest(SchedulerCommonTest): assert abs(result_sum.item() - 1006.388) < 1e-2 assert abs(result_mean.item() - 1.31) < 1e-3 + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for i, t in enumerate(scheduler.timesteps): + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + + assert abs(result_sum.item() - 1006.388) < 1e-2 + assert abs(result_mean.item() - 1.31) < 1e-3 + class EulerDiscreteSchedulerTest(SchedulerCommonTest): scheduler_classes = (EulerDiscreteScheduler,) @@ -1248,6 +1273,34 @@ class EulerDiscreteSchedulerTest(SchedulerCommonTest): assert abs(result_sum.item() - 10.0807) < 1e-2 assert abs(result_mean.item() - 0.0131) < 1e-3 + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + generator = torch.Generator().manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + print(result_sum, result_mean) + + assert abs(result_sum.item() - 10.0807) < 1e-2 + assert abs(result_mean.item() - 0.0131) < 1e-3 + class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): scheduler_classes = (EulerAncestralDiscreteScheduler,) @@ -1303,6 +1356,38 @@ class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): assert abs(result_sum.item() - 152.3192) < 1e-2 assert abs(result_mean.item() - 0.1983) < 1e-3 + def test_full_loop_device(self): + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + scheduler = scheduler_class(**scheduler_config) + + scheduler.set_timesteps(self.num_inference_steps, device=torch_device) + + generator = torch.Generator().manual_seed(0) + + model = self.dummy_model() + sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) + + for t in scheduler.timesteps: + sample = scheduler.scale_model_input(sample, t) + + model_output = model(sample, t) + + output = scheduler.step(model_output, t, sample, generator=generator) + sample = output.prev_sample + + result_sum = torch.sum(torch.abs(sample)) + result_mean = torch.mean(torch.abs(sample)) + print(result_sum, result_mean) + if not str(torch_device).startswith("mps"): + # The following sum varies between 148 and 156 on mps. Why? + assert abs(result_sum.item() - 152.3192) < 1e-2 + assert abs(result_mean.item() - 0.1983) < 1e-3 + else: + # Larger tolerance on mps + assert abs(result_mean.item() - 0.1983) < 1e-2 + class IPNDMSchedulerTest(SchedulerCommonTest): scheduler_classes = (IPNDMScheduler,) From 555203e1faa32cfa07c6128c09a8352031d7a969 Mon Sep 17 00:00:00 2001 From: Yuta Hayashibe Date: Tue, 8 Nov 2022 22:31:13 +0900 Subject: [PATCH 55/88] Warning for invalid options without "--with_prior_preservation" (#1065) * Make errors for invalid options without "--with_prior_preservation" * Make --instance_prompt required * Removed needless check because --instance_data_dir is marked with required * Updated messages * Use logger.warning instead of raise errors Co-authored-by: Patrick von Platen --- examples/dreambooth/train_dreambooth.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/dreambooth/train_dreambooth.py b/examples/dreambooth/train_dreambooth.py index 9c512ef571..610c18533b 100644 --- a/examples/dreambooth/train_dreambooth.py +++ b/examples/dreambooth/train_dreambooth.py @@ -66,6 +66,7 @@ def parse_args(input_args=None): "--instance_prompt", type=str, default=None, + required=True, help="The prompt with identifier specifying the instance", ) parser.add_argument( @@ -205,14 +206,16 @@ def parse_args(input_args=None): if env_local_rank != -1 and env_local_rank != args.local_rank: args.local_rank = env_local_rank - if args.instance_data_dir is None: - raise ValueError("You must specify a train data directory.") - if args.with_prior_preservation: if args.class_data_dir is None: raise ValueError("You must specify a data directory for class images.") if args.class_prompt is None: raise ValueError("You must specify prompt for class images.") + else: + if args.class_data_dir is not None: + logger.warning("You need not use --class_data_dir without --with_prior_preservation.") + if args.class_prompt is not None: + logger.warning("You need not use --class_prompt without --with_prior_preservation.") return args From 11f7d6f3cc07ed305c162d96bcdddb2ee6802832 Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Tue, 8 Nov 2022 14:39:11 +0100 Subject: [PATCH 56/88] [ONNX] Improve ONNXPipeline scheduler compatibility, fix safety_checker (#1173) * [ONNX] Improve ONNX scheduler compatibility, fix safety_checker * typo --- ...ert_stable_diffusion_checkpoint_to_onnx.py | 73 +++++++---- src/diffusers/onnx_utils.py | 28 ++++- src/diffusers/pipeline_utils.py | 2 +- .../pipeline_onnx_stable_diffusion.py | 23 ++-- .../pipeline_onnx_stable_diffusion_img2img.py | 17 ++- .../pipeline_onnx_stable_diffusion_inpaint.py | 18 ++- src/diffusers/utils/__init__.py | 1 + .../test_onnx_stable_diffusion.py | 116 ++++++++++++++++-- .../test_onnx_stable_diffusion_img2img.py | 81 +++++++++--- .../test_onnx_stable_diffusion_inpaint.py | 82 ++++++++++--- 10 files changed, 349 insertions(+), 92 deletions(-) diff --git a/scripts/convert_stable_diffusion_checkpoint_to_onnx.py b/scripts/convert_stable_diffusion_checkpoint_to_onnx.py index 8e0b58c56d..f0e0b178af 100644 --- a/scripts/convert_stable_diffusion_checkpoint_to_onnx.py +++ b/scripts/convert_stable_diffusion_checkpoint_to_onnx.py @@ -81,6 +81,8 @@ def convert_models(model_path: str, output_path: str, opset: int, fp16: bool = F output_path = Path(output_path) # TEXT ENCODER + num_tokens = pipeline.text_encoder.config.max_position_embeddings + text_hidden_size = pipeline.text_encoder.config.hidden_size text_input = pipeline.tokenizer( "A sample prompt", padding="max_length", @@ -103,13 +105,15 @@ def convert_models(model_path: str, output_path: str, opset: int, fp16: bool = F del pipeline.text_encoder # UNET + unet_in_channels = pipeline.unet.config.in_channels + unet_sample_size = pipeline.unet.config.sample_size unet_path = output_path / "unet" / "model.onnx" onnx_export( pipeline.unet, model_args=( - torch.randn(2, pipeline.unet.in_channels, 64, 64).to(device=device, dtype=dtype), - torch.LongTensor([0, 1]).to(device=device), - torch.randn(2, 77, 768).to(device=device, dtype=dtype), + torch.randn(2, unet_in_channels, unet_sample_size, unet_sample_size).to(device=device, dtype=dtype), + torch.randn(2).to(device=device, dtype=dtype), + torch.randn(2, num_tokens, text_hidden_size).to(device=device, dtype=dtype), False, ), output_path=unet_path, @@ -142,11 +146,16 @@ def convert_models(model_path: str, output_path: str, opset: int, fp16: bool = F # VAE ENCODER vae_encoder = pipeline.vae + vae_in_channels = vae_encoder.config.in_channels + vae_sample_size = vae_encoder.config.sample_size # need to get the raw tensor output (sample) from the encoder vae_encoder.forward = lambda sample, return_dict: vae_encoder.encode(sample, return_dict)[0].sample() onnx_export( vae_encoder, - model_args=(torch.randn(1, 3, 512, 512).to(device=device, dtype=dtype), False), + model_args=( + torch.randn(1, vae_in_channels, vae_sample_size, vae_sample_size).to(device=device, dtype=dtype), + False, + ), output_path=output_path / "vae_encoder" / "model.onnx", ordered_input_names=["sample", "return_dict"], output_names=["latent_sample"], @@ -158,11 +167,16 @@ def convert_models(model_path: str, output_path: str, opset: int, fp16: bool = F # VAE DECODER vae_decoder = pipeline.vae + vae_latent_channels = vae_decoder.config.latent_channels + vae_out_channels = vae_decoder.config.out_channels # forward only through the decoder part vae_decoder.forward = vae_encoder.decode onnx_export( vae_decoder, - model_args=(torch.randn(1, 4, 64, 64).to(device=device, dtype=dtype), False), + model_args=( + torch.randn(1, vae_latent_channels, unet_sample_size, unet_sample_size).to(device=device, dtype=dtype), + False, + ), output_path=output_path / "vae_decoder" / "model.onnx", ordered_input_names=["latent_sample", "return_dict"], output_names=["sample"], @@ -174,24 +188,35 @@ def convert_models(model_path: str, output_path: str, opset: int, fp16: bool = F del pipeline.vae # SAFETY CHECKER - safety_checker = pipeline.safety_checker - safety_checker.forward = safety_checker.forward_onnx - onnx_export( - pipeline.safety_checker, - model_args=( - torch.randn(1, 3, 224, 224).to(device=device, dtype=dtype), - torch.randn(1, 512, 512, 3).to(device=device, dtype=dtype), - ), - output_path=output_path / "safety_checker" / "model.onnx", - ordered_input_names=["clip_input", "images"], - output_names=["out_images", "has_nsfw_concepts"], - dynamic_axes={ - "clip_input": {0: "batch", 1: "channels", 2: "height", 3: "width"}, - "images": {0: "batch", 1: "height", 2: "width", 3: "channels"}, - }, - opset=opset, - ) - del pipeline.safety_checker + if pipeline.safety_checker is not None: + safety_checker = pipeline.safety_checker + clip_num_channels = safety_checker.config.vision_config.num_channels + clip_image_size = safety_checker.config.vision_config.image_size + safety_checker.forward = safety_checker.forward_onnx + onnx_export( + pipeline.safety_checker, + model_args=( + torch.randn( + 1, + clip_num_channels, + clip_image_size, + clip_image_size, + ).to(device=device, dtype=dtype), + torch.randn(1, vae_sample_size, vae_sample_size, vae_out_channels).to(device=device, dtype=dtype), + ), + output_path=output_path / "safety_checker" / "model.onnx", + ordered_input_names=["clip_input", "images"], + output_names=["out_images", "has_nsfw_concepts"], + dynamic_axes={ + "clip_input": {0: "batch", 1: "channels", 2: "height", 3: "width"}, + "images": {0: "batch", 1: "height", 2: "width", 3: "channels"}, + }, + opset=opset, + ) + del pipeline.safety_checker + safety_checker = OnnxRuntimeModel.from_pretrained(output_path / "safety_checker") + else: + safety_checker = None onnx_pipeline = OnnxStableDiffusionPipeline( vae_encoder=OnnxRuntimeModel.from_pretrained(output_path / "vae_encoder"), @@ -200,7 +225,7 @@ def convert_models(model_path: str, output_path: str, opset: int, fp16: bool = F tokenizer=pipeline.tokenizer, unet=OnnxRuntimeModel.from_pretrained(output_path / "unet"), scheduler=pipeline.scheduler, - safety_checker=OnnxRuntimeModel.from_pretrained(output_path / "safety_checker"), + safety_checker=safety_checker, feature_extractor=pipeline.feature_extractor, ) diff --git a/src/diffusers/onnx_utils.py b/src/diffusers/onnx_utils.py index 142174f6e1..b2c533ed74 100644 --- a/src/diffusers/onnx_utils.py +++ b/src/diffusers/onnx_utils.py @@ -24,7 +24,7 @@ import numpy as np from huggingface_hub import hf_hub_download -from .utils import ONNX_WEIGHTS_NAME, is_onnx_available, logging +from .utils import ONNX_EXTERNAL_WEIGHTS_NAME, ONNX_WEIGHTS_NAME, is_onnx_available, logging if is_onnx_available(): @@ -33,13 +33,28 @@ if is_onnx_available(): logger = logging.get_logger(__name__) +ORT_TO_NP_TYPE = { + "tensor(bool)": np.bool_, + "tensor(int8)": np.int8, + "tensor(uint8)": np.uint8, + "tensor(int16)": np.int16, + "tensor(uint16)": np.uint16, + "tensor(int32)": np.int32, + "tensor(uint32)": np.uint32, + "tensor(int64)": np.int64, + "tensor(uint64)": np.uint64, + "tensor(float16)": np.float16, + "tensor(float)": np.float32, + "tensor(double)": np.float64, +} + class OnnxRuntimeModel: def __init__(self, model=None, **kwargs): logger.info("`diffusers.OnnxRuntimeModel` is experimental and might change in the future.") self.model = model self.model_save_dir = kwargs.get("model_save_dir", None) - self.latest_model_name = kwargs.get("latest_model_name", "model.onnx") + self.latest_model_name = kwargs.get("latest_model_name", ONNX_WEIGHTS_NAME) def __call__(self, **kwargs): inputs = {k: np.array(v) for k, v in kwargs.items()} @@ -84,6 +99,15 @@ class OnnxRuntimeModel: except shutil.SameFileError: pass + # copy external weights (for models >2GB) + src_path = self.model_save_dir.joinpath(ONNX_EXTERNAL_WEIGHTS_NAME) + if src_path.exists(): + dst_path = Path(save_directory).joinpath(ONNX_EXTERNAL_WEIGHTS_NAME) + try: + shutil.copyfile(src_path, dst_path) + except shutil.SameFileError: + pass + def save_pretrained( self, save_directory: Union[str, os.PathLike], diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 628e632012..4e7aeb8c91 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -541,7 +541,7 @@ class DiffusionPipeline(ConfigMixin): # if the model is in a pipeline module, then we load it from the pipeline if name in passed_class_obj: # 1. check that passed_class_obj has correct parent class - if not is_pipeline_module: + if not is_pipeline_module and passed_class_obj[name] is not None: library = importlib.import_module(library_name) class_obj = getattr(library, class_name) importable_classes = LOADABLE_CLASSES[library_name] diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py index 22f5bf6c43..0c50e424e2 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py @@ -2,11 +2,12 @@ import inspect from typing import Callable, List, Optional, Union import numpy as np +import torch from transformers import CLIPFeatureExtractor, CLIPTokenizer from ...configuration_utils import FrozenDict -from ...onnx_utils import OnnxRuntimeModel +from ...onnx_utils import ORT_TO_NP_TYPE, OnnxRuntimeModel from ...pipeline_utils import DiffusionPipeline from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from ...utils import deprecate, logging @@ -186,7 +187,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline): # set timesteps self.scheduler.set_timesteps(num_inference_steps) - latents = latents * self.scheduler.init_noise_sigma + latents = latents * np.float(self.scheduler.init_noise_sigma) # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. @@ -197,15 +198,20 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline): if accepts_eta: extra_step_kwargs["eta"] = eta + timestep_dtype = next( + (input.type for input in self.unet.model.get_inputs() if input.name == "timestep"), "tensor(float)" + ) + timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype] + for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): # expand the latents if we are doing classifier free guidance latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t) + latent_model_input = latent_model_input.cpu().numpy() # predict the noise residual - noise_pred = self.unet( - sample=latent_model_input, timestep=np.array([t]), encoder_hidden_states=text_embeddings - ) + timestep = np.array([t], dtype=timestep_dtype) + noise_pred = self.unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=text_embeddings) noise_pred = noise_pred[0] # perform guidance @@ -214,7 +220,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline): noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + latents = self.scheduler.step(noise_pred, t, torch.from_numpy(latents), **extra_step_kwargs).prev_sample latents = np.array(latents) # call the callback, if provided @@ -235,6 +241,9 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline): safety_checker_input = self.feature_extractor( self.numpy_to_pil(image), return_tensors="np" ).pixel_values.astype(image.dtype) + + image, has_nsfw_concepts = self.safety_checker(clip_input=safety_checker_input, images=image) + # There will throw an error if use safety_checker batchsize>1 images, has_nsfw_concept = [], [] for i in range(image.shape[0]): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index 04ecdbecc6..f85069b969 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -8,7 +8,7 @@ import PIL from transformers import CLIPFeatureExtractor, CLIPTokenizer from ...configuration_utils import FrozenDict -from ...onnx_utils import OnnxRuntimeModel +from ...onnx_utils import ORT_TO_NP_TYPE, OnnxRuntimeModel from ...pipeline_utils import DiffusionPipeline from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from ...utils import deprecate, logging @@ -338,14 +338,21 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline): t_start = max(num_inference_steps - init_timestep + offset, 0) timesteps = self.scheduler.timesteps[t_start:].numpy() + timestep_dtype = next( + (input.type for input in self.unet.model.get_inputs() if input.name == "timestep"), "tensor(float)" + ) + timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype] + for i, t in enumerate(self.progress_bar(timesteps)): # expand the latents if we are doing classifier free guidance latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t) + latent_model_input = latent_model_input.cpu().numpy() # predict the noise residual + timestep = np.array([t], dtype=timestep_dtype) noise_pred = self.unet( - sample=latent_model_input, timestep=np.array([t]), encoder_hidden_states=text_embeddings + sample=latent_model_input, timestep=timestep, encoder_hidden_states=text_embeddings )[0] # perform guidance @@ -354,7 +361,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline): noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + latents = self.scheduler.step(noise_pred, t, torch.from_numpy(latents), **extra_step_kwargs).prev_sample latents = latents.numpy() # call the callback, if provided @@ -375,7 +382,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline): safety_checker_input = self.feature_extractor( self.numpy_to_pil(image), return_tensors="np" ).pixel_values.astype(image.dtype) - # There will throw an error if use safety_checker batchsize>1 + # safety_checker does not support batched inputs yet images, has_nsfw_concept = [], [] for i in range(image.shape[0]): image_i, has_nsfw_concept_i = self.safety_checker( diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py index 517242921d..f763291c77 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py @@ -8,7 +8,7 @@ import PIL from transformers import CLIPFeatureExtractor, CLIPTokenizer from ...configuration_utils import FrozenDict -from ...onnx_utils import OnnxRuntimeModel +from ...onnx_utils import ORT_TO_NP_TYPE, OnnxRuntimeModel from ...pipeline_utils import DiffusionPipeline from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from ...utils import deprecate, logging @@ -352,7 +352,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): self.scheduler.set_timesteps(num_inference_steps) # scale the initial noise by the standard deviation required by the scheduler - latents = latents * self.scheduler.init_noise_sigma + latents = latents * np.float(self.scheduler.init_noise_sigma) # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. @@ -363,17 +363,23 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): if accepts_eta: extra_step_kwargs["eta"] = eta + timestep_dtype = next( + (input.type for input in self.unet.model.get_inputs() if input.name == "timestep"), "tensor(float)" + ) + timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype] + for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): # expand the latents if we are doing classifier free guidance latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents # concat latents, mask, masked_image_latnets in the channel dimension latent_model_input = np.concatenate([latent_model_input, mask, masked_image_latents], axis=1) latent_model_input = self.scheduler.scale_model_input(torch.from_numpy(latent_model_input), t) - latent_model_input = latent_model_input.numpy() + latent_model_input = latent_model_input.cpu().numpy() # predict the noise residual + timestep = np.array([t], dtype=timestep_dtype) noise_pred = self.unet( - sample=latent_model_input, timestep=np.array([t]), encoder_hidden_states=text_embeddings + sample=latent_model_input, timestep=timestep, encoder_hidden_states=text_embeddings )[0] # perform guidance @@ -382,7 +388,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + latents = self.scheduler.step(noise_pred, t, torch.from_numpy(latents), **extra_step_kwargs).prev_sample latents = latents.numpy() # call the callback, if provided @@ -403,7 +409,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): safety_checker_input = self.feature_extractor( self.numpy_to_pil(image), return_tensors="np" ).pixel_values.astype(image.dtype) - # There will throw an error if use safety_checker batchsize>1 + # safety_checker does not support batched inputs yet images, has_nsfw_concept = [], [] for i in range(image.shape[0]): image_i, has_nsfw_concept_i = self.safety_checker( diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py index 3fa477e7dc..a00e1f4dcd 100644 --- a/src/diffusers/utils/__init__.py +++ b/src/diffusers/utils/__init__.py @@ -67,6 +67,7 @@ CONFIG_NAME = "config.json" WEIGHTS_NAME = "diffusion_pytorch_model.bin" FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack" ONNX_WEIGHTS_NAME = "model.onnx" +ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb" HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co" DIFFUSERS_CACHE = default_cache_path DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules" diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py index d8356675e9..a1946e39f9 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py @@ -13,11 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile import unittest import numpy as np -from diffusers import OnnxStableDiffusionPipeline +from diffusers import DDIMScheduler, LMSDiscreteScheduler, OnnxStableDiffusionPipeline from diffusers.utils.testing_utils import is_onnx_available, require_onnxruntime, require_torch_gpu, slow from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin @@ -36,32 +37,87 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes @require_onnxruntime @require_torch_gpu class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): - def test_inference(self): - provider = ( + @property + def gpu_provider(self): + return ( "CUDAExecutionProvider", { - "gpu_mem_limit": "17179869184", # 16GB. + "gpu_mem_limit": "15000000000", # 15GB "arena_extend_strategy": "kSameAsRequested", }, ) + + @property + def gpu_options(self): options = ort.SessionOptions() options.enable_mem_pattern = False + return options + + def test_inference_default_pndm(self): + # using the PNDM scheduler by default sd_pipe = OnnxStableDiffusionPipeline.from_pretrained( "CompVis/stable-diffusion-v1-4", revision="onnx", - provider=provider, - sess_options=options, + provider=self.gpu_provider, + sess_options=self.gpu_options, ) + sd_pipe.set_progress_bar_config(disable=None) prompt = "A painting of a squirrel eating a burger" np.random.seed(0) - output = sd_pipe([prompt], guidance_scale=6.0, num_inference_steps=5, output_type="np") + output = sd_pipe([prompt], guidance_scale=6.0, num_inference_steps=10, output_type="np") image = output.images image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 512, 512, 3) - expected_slice = np.array([0.3602, 0.3688, 0.3652, 0.3895, 0.3782, 0.3747, 0.3927, 0.4241, 0.4327]) + expected_slice = np.array([0.0452, 0.0390, 0.0087, 0.0350, 0.0617, 0.0364, 0.0544, 0.0523, 0.0720]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_inference_ddim(self): + ddim_scheduler = DDIMScheduler.from_config( + "runwayml/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx" + ) + sd_pipe = OnnxStableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + revision="onnx", + scheduler=ddim_scheduler, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "open neural network exchange" + generator = np.random.RandomState(0) + output = sd_pipe([prompt], guidance_scale=7.5, num_inference_steps=10, generator=generator, output_type="np") + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.2867, 0.1974, 0.1481, 0.7294, 0.7251, 0.6667, 0.4194, 0.5642, 0.6486]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_inference_k_lms(self): + lms_scheduler = LMSDiscreteScheduler.from_config( + "runwayml/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx" + ) + sd_pipe = OnnxStableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + revision="onnx", + scheduler=lms_scheduler, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "open neural network exchange" + generator = np.random.RandomState(0) + output = sd_pipe([prompt], guidance_scale=7.5, num_inference_steps=10, generator=generator, output_type="np") + image = output.images + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 512, 512, 3) + expected_slice = np.array([0.2306, 0.1959, 0.1593, 0.6549, 0.6394, 0.5408, 0.5065, 0.6010, 0.6161]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 def test_intermediate_state(self): @@ -75,27 +131,61 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): assert latents.shape == (1, 4, 64, 64) latents_slice = latents[0, -3:, -3:, -1] expected_slice = np.array( - [-0.5950, -0.3039, -1.1672, 0.1594, -1.1572, 0.6719, -1.9712, -0.0403, 0.9592] + [-0.6772, -0.3835, -1.2456, 0.1905, -1.0974, 0.6967, -1.9353, 0.0178, 1.0167] ) assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 elif step == 5: assert latents.shape == (1, 4, 64, 64) latents_slice = latents[0, -3:, -3:, -1] expected_slice = np.array( - [-0.4776, -0.0119, -0.8519, -0.0275, -0.9764, 0.9820, -0.3843, 0.3788, 1.2264] + [-0.3351, 0.2241, -0.1837, -0.2325, -0.6577, 0.3393, -0.0241, 0.5899, 1.3875] ) assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 test_callback_fn.has_been_called = False pipe = OnnxStableDiffusionPipeline.from_pretrained( - "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CUDAExecutionProvider" + "runwayml/stable-diffusion-v1-5", + revision="onnx", + provider=self.gpu_provider, + sess_options=self.gpu_options, ) pipe.set_progress_bar_config(disable=None) prompt = "Andromeda galaxy in a bottle" - np.random.seed(0) - pipe(prompt=prompt, num_inference_steps=5, guidance_scale=7.5, callback=test_callback_fn, callback_steps=1) + generator = np.random.RandomState(0) + pipe( + prompt=prompt, + num_inference_steps=5, + guidance_scale=7.5, + generator=generator, + callback=test_callback_fn, + callback_steps=1, + ) assert test_callback_fn.has_been_called assert number_of_steps == 6 + + def test_stable_diffusion_no_safety_checker(self): + pipe = OnnxStableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + revision="onnx", + provider=self.gpu_provider, + sess_options=self.gpu_options, + safety_checker=None, + ) + assert isinstance(pipe, OnnxStableDiffusionPipeline) + assert pipe.safety_checker is None + + image = pipe("example prompt", num_inference_steps=2).images[0] + assert image is not None + + # check that there's no error when saving a pipeline with one of the models being None + with tempfile.TemporaryDirectory() as tmpdirname: + pipe.save_pretrained(tmpdirname) + pipe = OnnxStableDiffusionPipeline.from_pretrained(tmpdirname) + + # sanity check that the pipeline still works + assert pipe.safety_checker is None + image = pipe("example prompt", num_inference_steps=2).images[0] + assert image is not None diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py index 3ffbfc3d4f..61831c64c0 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py @@ -17,7 +17,7 @@ import unittest import numpy as np -from diffusers import OnnxStableDiffusionImg2ImgPipeline +from diffusers import LMSDiscreteScheduler, OnnxStableDiffusionImg2ImgPipeline from diffusers.utils.testing_utils import is_onnx_available, load_image, require_onnxruntime, require_torch_gpu, slow from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin @@ -35,45 +35,92 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes @slow @require_onnxruntime @require_torch_gpu -class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): - def test_inference(self): +class OnnxStableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): + @property + def gpu_provider(self): + return ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "15000000000", # 15GB + "arena_extend_strategy": "kSameAsRequested", + }, + ) + + @property + def gpu_options(self): + options = ort.SessionOptions() + options.enable_mem_pattern = False + return options + + def test_inference_default_pndm(self): init_image = load_image( "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/img2img/sketch-mountains-input.jpg" ) init_image = init_image.resize((768, 512)) - provider = ( - "CUDAExecutionProvider", - { - "gpu_mem_limit": "17179869184", # 16GB. - "arena_extend_strategy": "kSameAsRequested", - }, - ) - options = ort.SessionOptions() - options.enable_mem_pattern = False + # using the PNDM scheduler by default pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained( "CompVis/stable-diffusion-v1-4", revision="onnx", - provider=provider, - sess_options=options, + provider=self.gpu_provider, + sess_options=self.gpu_options, ) pipe.set_progress_bar_config(disable=None) prompt = "A fantasy landscape, trending on artstation" - np.random.seed(0) + generator = np.random.RandomState(0) output = pipe( prompt=prompt, init_image=init_image, strength=0.75, guidance_scale=7.5, - num_inference_steps=8, + num_inference_steps=10, + generator=generator, output_type="np", ) images = output.images image_slice = images[0, 255:258, 383:386, -1] assert images.shape == (1, 512, 768, 3) - expected_slice = np.array([0.4830, 0.5242, 0.5603, 0.5016, 0.5131, 0.5111, 0.4928, 0.5025, 0.5055]) + expected_slice = np.array([0.4909, 0.5059, 0.5372, 0.4623, 0.4876, 0.5049, 0.4820, 0.4956, 0.5019]) + # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues + assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2 + + def test_inference_k_lms(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + init_image = init_image.resize((768, 512)) + lms_scheduler = LMSDiscreteScheduler.from_config( + "runwayml/stable-diffusion-v1-5", subfolder="scheduler", revision="onnx" + ) + pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + revision="onnx", + scheduler=lms_scheduler, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "A fantasy landscape, trending on artstation" + + generator = np.random.RandomState(0) + output = pipe( + prompt=prompt, + init_image=init_image, + strength=0.75, + guidance_scale=7.5, + num_inference_steps=10, + generator=generator, + output_type="np", + ) + images = output.images + image_slice = images[0, 255:258, 383:386, -1] + + assert images.shape == (1, 512, 768, 3) + expected_slice = np.array([0.7950, 0.7923, 0.7903, 0.5516, 0.5501, 0.5476, 0.4965, 0.4933, 0.4910]) # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2 diff --git a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py index 81cbed4e51..4ba8e273b4 100644 --- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py @@ -17,7 +17,7 @@ import unittest import numpy as np -from diffusers import OnnxStableDiffusionInpaintPipeline +from diffusers import LMSDiscreteScheduler, OnnxStableDiffusionInpaintPipeline from diffusers.utils.testing_utils import is_onnx_available, load_image, require_onnxruntime, require_torch_gpu, slow from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin @@ -35,8 +35,24 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes @slow @require_onnxruntime @require_torch_gpu -class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): - def test_stable_diffusion_inpaint_onnx(self): +class OnnxStableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): + @property + def gpu_provider(self): + return ( + "CUDAExecutionProvider", + { + "gpu_mem_limit": "15000000000", # 15GB + "arena_extend_strategy": "kSameAsRequested", + }, + ) + + @property + def gpu_options(self): + options = ort.SessionOptions() + options.enable_mem_pattern = False + return options + + def test_inference_default_pndm(self): init_image = load_image( "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo.png" @@ -45,37 +61,69 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - provider = ( - "CUDAExecutionProvider", - { - "gpu_mem_limit": "17179869184", # 16GB. - "arena_extend_strategy": "kSameAsRequested", - }, - ) - options = ort.SessionOptions() - options.enable_mem_pattern = False pipe = OnnxStableDiffusionInpaintPipeline.from_pretrained( "runwayml/stable-diffusion-inpainting", revision="onnx", - provider=provider, - sess_options=options, + provider=self.gpu_provider, + sess_options=self.gpu_options, ) pipe.set_progress_bar_config(disable=None) prompt = "A red cat sitting on a park bench" - np.random.seed(0) + generator = np.random.RandomState(0) output = pipe( prompt=prompt, image=init_image, mask_image=mask_image, guidance_scale=7.5, - num_inference_steps=8, + num_inference_steps=10, + generator=generator, output_type="np", ) images = output.images image_slice = images[0, 255:258, 255:258, -1] assert images.shape == (1, 512, 512, 3) - expected_slice = np.array([0.2951, 0.2955, 0.2922, 0.2036, 0.1977, 0.2279, 0.1716, 0.1641, 0.1799]) + expected_slice = np.array([0.2514, 0.3007, 0.3517, 0.1790, 0.2382, 0.3167, 0.1944, 0.2273, 0.2464]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 + + def test_inference_k_lms(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo.png" + ) + mask_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" + ) + lms_scheduler = LMSDiscreteScheduler.from_config( + "runwayml/stable-diffusion-inpainting", subfolder="scheduler", revision="onnx" + ) + pipe = OnnxStableDiffusionInpaintPipeline.from_pretrained( + "runwayml/stable-diffusion-inpainting", + revision="onnx", + scheduler=lms_scheduler, + provider=self.gpu_provider, + sess_options=self.gpu_options, + ) + pipe.set_progress_bar_config(disable=None) + + prompt = "A red cat sitting on a park bench" + + generator = np.random.RandomState(0) + output = pipe( + prompt=prompt, + image=init_image, + mask_image=mask_image, + guidance_scale=7.5, + num_inference_steps=10, + generator=generator, + output_type="np", + ) + images = output.images + image_slice = images[0, 255:258, 255:258, -1] + + assert images.shape == (1, 512, 512, 3) + expected_slice = np.array([0.2520, 0.2743, 0.2643, 0.2641, 0.2517, 0.2650, 0.2498, 0.2688, 0.2529]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 From 614c182f94279d50d584d1a60f2745748ec598b7 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 8 Nov 2022 15:08:35 +0100 Subject: [PATCH 57/88] Restore compatibility with deprecated `StableDiffusionOnnxPipeline` (#1191) * Restore compatibility with old ONNX pipeline. I think it broke in #552. * Add missing attribute `vae_encoder` --- .../stable_diffusion/pipeline_onnx_stable_diffusion.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py index 0c50e424e2..ecc0b31f34 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py @@ -18,6 +18,7 @@ logger = logging.get_logger(__name__) class OnnxStableDiffusionPipeline(DiffusionPipeline): + vae_encoder: OnnxRuntimeModel vae_decoder: OnnxRuntimeModel text_encoder: OnnxRuntimeModel tokenizer: CLIPTokenizer @@ -268,6 +269,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline): class StableDiffusionOnnxPipeline(OnnxStableDiffusionPipeline): def __init__( self, + vae_encoder: OnnxRuntimeModel, vae_decoder: OnnxRuntimeModel, text_encoder: OnnxRuntimeModel, tokenizer: CLIPTokenizer, @@ -279,6 +281,7 @@ class StableDiffusionOnnxPipeline(OnnxStableDiffusionPipeline): deprecation_message = "Please use `OnnxStableDiffusionPipeline` instead of `StableDiffusionOnnxPipeline`." deprecate("StableDiffusionOnnxPipeline", "1.0.0", deprecation_message) super().__init__( + vae_encoder=vae_encoder, vae_decoder=vae_decoder, text_encoder=text_encoder, tokenizer=tokenizer, From 32b0736d8ad7ec124affca3a00a266f5addcbd91 Mon Sep 17 00:00:00 2001 From: Mishig Date: Tue, 8 Nov 2022 16:38:09 +0100 Subject: [PATCH 58/88] Update pr docs actions (#1194) --- .github/workflows/build_pr_documentation.yml | 5 ++++- .github/workflows/delete_doc_comment.yml | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index d51623e735..542920d7f6 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -9,8 +9,11 @@ concurrency: jobs: build: - uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main + uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@use_hf_hub with: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: diffusers + secrets: + token: ${{ secrets.HF_DOC_PUSH }} + comment_bot_token: ${{ secrets.HUGGINGFACE_PUSH }} diff --git a/.github/workflows/delete_doc_comment.yml b/.github/workflows/delete_doc_comment.yml index 238dc0bdba..e1b2da9567 100644 --- a/.github/workflows/delete_doc_comment.yml +++ b/.github/workflows/delete_doc_comment.yml @@ -7,7 +7,10 @@ on: jobs: delete: - uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main + uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@use_hf_hub with: pr_number: ${{ github.event.number }} package: diffusers + secrets: + token: ${{ secrets.HF_DOC_PUSH }} + comment_bot_token: ${{ secrets.HUGGINGFACE_PUSH }} From 5786b0e2f7a76229781e40671abee5000a315e88 Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Tue, 8 Nov 2022 17:15:23 +0100 Subject: [PATCH 59/88] handle dtype xformers attention (#1196) handle dtype xformers --- src/diffusers/models/attention.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py index bac85e2f39..e8ea37970e 100644 --- a/src/diffusers/models/attention.py +++ b/src/diffusers/models/attention.py @@ -492,6 +492,8 @@ class CrossAttention(nn.Module): # attention, what we cannot get enough of if self._use_memory_efficient_attention_xformers: hidden_states = self._memory_efficient_attention_xformers(query, key, value) + # Some versions of xformers return output in fp32, cast it back to the dtype of the input + hidden_states = hidden_states.to(query.dtype) else: if self._slice_size is None or query.shape[0] // self._slice_size == 1: hidden_states = self._attention(query, key, value) From 249d9bc0e76e55eb16c08d0e70b2d6057259c4a0 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 8 Nov 2022 18:08:08 +0100 Subject: [PATCH 60/88] [Scheduler] Move predict epsilon to init (#1155) * [Scheduler] Move predict epsilon to init * up * uP * uP * Apply suggestions from code review Co-authored-by: Pedro Cuenca * up Co-authored-by: Pedro Cuenca --- .../train_unconditional.py | 26 ++++--- src/diffusers/configuration_utils.py | 5 ++ src/diffusers/pipelines/ddpm/pipeline_ddpm.py | 13 +++- src/diffusers/schedulers/scheduling_ddpm.py | 23 ++++-- .../schedulers/scheduling_ddpm_flax.py | 22 ++++-- tests/fixtures/custom_pipeline/pipeline.py | 3 +- tests/pipelines/ddpm/test_ddpm.py | 71 ++++++++++++++++++- tests/test_config.py | 24 +++++++ tests/test_pipelines.py | 1 + tests/test_scheduler.py | 30 +++++++- 10 files changed, 193 insertions(+), 25 deletions(-) mode change 100755 => 100644 tests/test_config.py diff --git a/examples/unconditional_image_generation/train_unconditional.py b/examples/unconditional_image_generation/train_unconditional.py index 3f9ffb11ef..0eadecbd30 100644 --- a/examples/unconditional_image_generation/train_unconditional.py +++ b/examples/unconditional_image_generation/train_unconditional.py @@ -1,4 +1,5 @@ import argparse +import inspect import math import os from pathlib import Path @@ -190,10 +191,10 @@ def parse_args(): ) parser.add_argument( - "--predict_mode", - type=str, - default="eps", - help="What the model should predict. 'eps' to predict error, 'x0' to directly predict reconstruction", + "--predict_epsilon", + action="store_true", + default=True, + help="Whether the model should predict the 'epsilon'/noise error or directly the reconstructed image 'x0'.", ) parser.add_argument("--ddpm_num_steps", type=int, default=1000) @@ -252,7 +253,17 @@ def main(args): "UpBlock2D", ), ) - noise_scheduler = DDPMScheduler(num_train_timesteps=args.ddpm_num_steps, beta_schedule=args.ddpm_beta_schedule) + accepts_predict_epsilon = "predict_epsilon" in set(inspect.signature(DDPMScheduler.__init__).parameters.keys()) + + if accepts_predict_epsilon: + noise_scheduler = DDPMScheduler( + num_train_timesteps=args.ddpm_num_steps, + beta_schedule=args.ddpm_beta_schedule, + predict_epsilon=args.predict_epsilon, + ) + else: + noise_scheduler = DDPMScheduler(num_train_timesteps=args.ddpm_num_steps, beta_schedule=args.ddpm_beta_schedule) + optimizer = torch.optim.AdamW( model.parameters(), lr=args.learning_rate, @@ -351,9 +362,9 @@ def main(args): # Predict the noise residual model_output = model(noisy_images, timesteps).sample - if args.predict_mode == "eps": + if args.predict_epsilon: loss = F.mse_loss(model_output, noise) # this could have different weights! - elif args.predict_mode == "x0": + else: alpha_t = _extract_into_tensor( noise_scheduler.alphas_cumprod, timesteps, (clean_images.shape[0], 1, 1, 1) ) @@ -401,7 +412,6 @@ def main(args): generator=generator, batch_size=args.eval_batch_size, output_type="numpy", - predict_epsilon=args.predict_mode == "eps", ).images # denormalize the images and save to tensorboard diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py index 39762e41d3..fc6ac9b5b9 100644 --- a/src/diffusers/configuration_utils.py +++ b/src/diffusers/configuration_utils.py @@ -334,6 +334,11 @@ class ConfigMixin: # 3. Create keyword arguments that will be passed to __init__ from expected keyword arguments init_dict = {} for key in expected_keys: + # if config param is passed to kwarg and is present in config dict + # it should overwrite existing config dict key + if key in kwargs and key in config_dict: + config_dict[key] = kwargs.pop(key) + if key in kwargs: # overwrite key init_dict[key] = kwargs.pop(key) diff --git a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py index 811614ecbd..37d12f2f5d 100644 --- a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py +++ b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py @@ -18,7 +18,9 @@ from typing import Optional, Tuple, Union import torch +from ...configuration_utils import FrozenDict from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput +from ...utils import deprecate class DDPMPipeline(DiffusionPipeline): @@ -45,7 +47,6 @@ class DDPMPipeline(DiffusionPipeline): num_inference_steps: int = 1000, output_type: Optional[str] = "pil", return_dict: bool = True, - predict_epsilon: bool = True, **kwargs, ) -> Union[ImagePipelineOutput, Tuple]: r""" @@ -69,6 +70,16 @@ class DDPMPipeline(DiffusionPipeline): `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the generated images. """ + message = ( + "Please make sure to instantiate your scheduler with `predict_epsilon` instead. E.g. `scheduler =" + " DDPMScheduler.from_config(, predict_epsilon=True)`." + ) + predict_epsilon = deprecate("predict_epsilon", "0.10.0", message, take_from=kwargs) + + if predict_epsilon is not None: + new_config = dict(self.scheduler.config) + new_config["predict_epsilon"] = predict_epsilon + self.scheduler._internal_dict = FrozenDict(new_config) # Sample gaussian noise to begin loop image = torch.randn( diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index 171c9598eb..08a73119e5 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -21,8 +21,8 @@ from typing import Optional, Tuple, Union import numpy as np import torch -from ..configuration_utils import ConfigMixin, register_to_config -from ..utils import BaseOutput +from ..configuration_utils import ConfigMixin, FrozenDict, register_to_config +from ..utils import BaseOutput, deprecate from .scheduling_utils import SchedulerMixin @@ -99,6 +99,8 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. clip_sample (`bool`, default `True`): option to clip predicted sample between -1 and 1 for numerical stability. + predict_epsilon (`bool`): + optional flag to use when the model predicts the noise (epsilon), or the samples instead of the noise. """ @@ -121,6 +123,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): trained_betas: Optional[np.ndarray] = None, variance_type: str = "fixed_small", clip_sample: bool = True, + predict_epsilon: bool = True, ): if trained_betas is not None: self.betas = torch.from_numpy(trained_betas) @@ -221,9 +224,9 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): model_output: torch.FloatTensor, timestep: int, sample: torch.FloatTensor, - predict_epsilon=True, generator=None, return_dict: bool = True, + **kwargs, ) -> Union[DDPMSchedulerOutput, Tuple]: """ Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion @@ -234,8 +237,6 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): timestep (`int`): current discrete timestep in the diffusion chain. sample (`torch.FloatTensor`): current instance of sample being created by diffusion process. - predict_epsilon (`bool`): - optional flag to use when model predicts the samples directly instead of the noise, epsilon. generator: random number generator. return_dict (`bool`): option for returning tuple rather than DDPMSchedulerOutput class @@ -245,6 +246,16 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): returning a tuple, the first element is the sample tensor. """ + message = ( + "Please make sure to instantiate your scheduler with `predict_epsilon` instead. E.g. `scheduler =" + " DDPMScheduler.from_config(, predict_epsilon=True)`." + ) + predict_epsilon = deprecate("predict_epsilon", "0.10.0", message, take_from=kwargs) + if predict_epsilon is not None and predict_epsilon != self.config.predict_epsilon: + new_config = dict(self.config) + new_config["predict_epsilon"] = predict_epsilon + self._internal_dict = FrozenDict(new_config) + t = timestep if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: @@ -260,7 +271,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): # 2. compute predicted original sample from predicted noise also called # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf - if predict_epsilon: + if self.config.predict_epsilon: pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) else: pred_original_sample = model_output diff --git a/src/diffusers/schedulers/scheduling_ddpm_flax.py b/src/diffusers/schedulers/scheduling_ddpm_flax.py index 7220a01454..f1b04a0417 100644 --- a/src/diffusers/schedulers/scheduling_ddpm_flax.py +++ b/src/diffusers/schedulers/scheduling_ddpm_flax.py @@ -22,7 +22,8 @@ import flax import jax.numpy as jnp from jax import random -from ..configuration_utils import ConfigMixin, register_to_config +from ..configuration_utils import ConfigMixin, FrozenDict, register_to_config +from ..utils import deprecate from .scheduling_utils_flax import FlaxSchedulerMixin, FlaxSchedulerOutput, broadcast_to_shape_from_left @@ -97,7 +98,8 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin): `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`. clip_sample (`bool`, default `True`): option to clip predicted sample between -1 and 1 for numerical stability. - tensor_format (`str`): whether the scheduler expects pytorch or numpy arrays. + predict_epsilon (`bool`): + optional flag to use when the model predicts the noise (epsilon), or the samples instead of the noise. """ @@ -115,6 +117,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin): trained_betas: Optional[jnp.ndarray] = None, variance_type: str = "fixed_small", clip_sample: bool = True, + predict_epsilon: bool = True, ): if trained_betas is not None: self.betas = jnp.asarray(trained_betas) @@ -196,6 +199,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin): key: random.KeyArray, predict_epsilon: bool = True, return_dict: bool = True, + **kwargs, ) -> Union[FlaxDDPMSchedulerOutput, Tuple]: """ Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion @@ -208,8 +212,6 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin): sample (`jnp.ndarray`): current instance of sample being created by diffusion process. key (`random.KeyArray`): a PRNG key. - predict_epsilon (`bool`): - optional flag to use when model predicts the samples directly instead of the noise, epsilon. return_dict (`bool`): option for returning tuple rather than FlaxDDPMSchedulerOutput class Returns: @@ -217,6 +219,16 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin): `tuple`. When returning a tuple, the first element is the sample tensor. """ + message = ( + "Please make sure to instantiate your scheduler with `predict_epsilon` instead. E.g. `scheduler =" + " DDPMScheduler.from_config(, predict_epsilon=True)`." + ) + predict_epsilon = deprecate("predict_epsilon", "0.10.0", message, take_from=kwargs) + if predict_epsilon is not None and predict_epsilon != self.config.predict_epsilon: + new_config = dict(self.config) + new_config["predict_epsilon"] = predict_epsilon + self._internal_dict = FrozenDict(new_config) + t = timestep if model_output.shape[1] == sample.shape[1] * 2 and self.config.variance_type in ["learned", "learned_range"]: @@ -232,7 +244,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin): # 2. compute predicted original sample from predicted noise also called # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf - if predict_epsilon: + if self.config.predict_epsilon: pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5) else: pred_original_sample = model_output diff --git a/tests/fixtures/custom_pipeline/pipeline.py b/tests/fixtures/custom_pipeline/pipeline.py index 10a22edaa4..e7429d0a19 100644 --- a/tests/fixtures/custom_pipeline/pipeline.py +++ b/tests/fixtures/custom_pipeline/pipeline.py @@ -42,7 +42,6 @@ class CustomLocalPipeline(DiffusionPipeline): self, batch_size: int = 1, generator: Optional[torch.Generator] = None, - eta: float = 0.0, num_inference_steps: int = 50, output_type: Optional[str] = "pil", return_dict: bool = True, @@ -89,7 +88,7 @@ class CustomLocalPipeline(DiffusionPipeline): # 2. predict previous mean of image x_t-1 and add variance depending on eta # eta corresponds to η in paper and should be between [0, 1] # do x_t -> x_t-1 - image = self.scheduler.step(model_output, t, image, eta).prev_sample + image = self.scheduler.step(model_output, t, image).prev_sample image = (image / 2 + 0.5).clamp(0, 1) image = image.cpu().permute(0, 2, 3, 1).numpy() diff --git a/tests/pipelines/ddpm/test_ddpm.py b/tests/pipelines/ddpm/test_ddpm.py index c58e2db38f..a09f77d124 100644 --- a/tests/pipelines/ddpm/test_ddpm.py +++ b/tests/pipelines/ddpm/test_ddpm.py @@ -19,6 +19,7 @@ import numpy as np import torch from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel +from diffusers.utils import deprecate from diffusers.utils.testing_utils import require_torch, slow, torch_device from ...test_pipelines_common import PipelineTesterMixin @@ -28,8 +29,74 @@ torch.backends.cuda.matmul.allow_tf32 = False class DDPMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): - # FIXME: add fast tests - pass + @property + def dummy_uncond_unet(self): + torch.manual_seed(0) + model = UNet2DModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=3, + out_channels=3, + down_block_types=("DownBlock2D", "AttnDownBlock2D"), + up_block_types=("AttnUpBlock2D", "UpBlock2D"), + ) + return model + + def test_inference(self): + unet = self.dummy_uncond_unet + scheduler = DDPMScheduler() + + ddpm = DDPMPipeline(unet=unet, scheduler=scheduler) + ddpm.to(torch_device) + ddpm.set_progress_bar_config(disable=None) + + # Warmup pass when using mps (see #372) + if torch_device == "mps": + _ = ddpm(num_inference_steps=1) + + generator = torch.manual_seed(0) + image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images + + generator = torch.manual_seed(0) + image_from_tuple = ddpm(generator=generator, num_inference_steps=2, output_type="numpy", return_dict=False)[0] + + image_slice = image[0, -3:, -3:, -1] + image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + expected_slice = np.array( + [5.589e-01, 7.089e-01, 2.632e-01, 6.841e-01, 1.000e-04, 9.999e-01, 1.973e-01, 1.000e-04, 8.010e-02] + ) + tolerance = 1e-2 if torch_device != "mps" else 3e-2 + assert np.abs(image_slice.flatten() - expected_slice).max() < tolerance + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < tolerance + + def test_inference_predict_epsilon(self): + deprecate("remove this test", "0.10.0", "remove") + unet = self.dummy_uncond_unet + scheduler = DDPMScheduler(predict_epsilon=False) + + ddpm = DDPMPipeline(unet=unet, scheduler=scheduler) + ddpm.to(torch_device) + ddpm.set_progress_bar_config(disable=None) + + # Warmup pass when using mps (see #372) + if torch_device == "mps": + _ = ddpm(num_inference_steps=1) + + generator = torch.manual_seed(0) + image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images + + generator = torch.manual_seed(0) + image_eps = ddpm(generator=generator, num_inference_steps=2, output_type="numpy", predict_epsilon=False)[0] + + image_slice = image[0, -3:, -3:, -1] + image_eps_slice = image_eps[0, -3:, -3:, -1] + + assert image.shape == (1, 32, 32, 3) + tolerance = 1e-2 if torch_device != "mps" else 3e-2 + assert np.abs(image_slice.flatten() - image_eps_slice.flatten()).max() < tolerance @slow diff --git a/tests/test_config.py b/tests/test_config.py old mode 100755 new mode 100644 index 5084769def..8ae8e1d9e1 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -21,6 +21,7 @@ import unittest import diffusers from diffusers import ( DDIMScheduler, + DDPMScheduler, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, @@ -291,6 +292,29 @@ class ConfigTester(unittest.TestCase): # no warning should be thrown assert cap_logger.out == "" + def test_overwrite_config_on_load(self): + logger = logging.get_logger("diffusers.configuration_utils") + + with CaptureLogger(logger) as cap_logger: + ddpm = DDPMScheduler.from_config( + "hf-internal-testing/tiny-stable-diffusion-torch", + subfolder="scheduler", + predict_epsilon=False, + beta_end=8, + ) + + with CaptureLogger(logger) as cap_logger_2: + ddpm_2 = DDPMScheduler.from_config("google/ddpm-celebahq-256", beta_start=88) + + assert ddpm.__class__ == DDPMScheduler + assert ddpm.config.predict_epsilon is False + assert ddpm.config.beta_end == 8 + assert ddpm_2.config.beta_start == 88 + + # no warning should be thrown + assert cap_logger.out == "" + assert cap_logger_2.out == "" + def test_load_dpmsolver(self): logger = logging.get_logger("diffusers.configuration_utils") diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 2b19b08b37..db3e804e67 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -107,6 +107,7 @@ class CustomPipelineTests(unittest.TestCase): images, output_str = pipeline(num_inference_steps=2, output_type="np") assert images[0].shape == (1, 32, 32, 3) + # compare output to https://huggingface.co/hf-internal-testing/diffusers-dummy-pipeline/blob/main/pipeline.py#L102 assert output_str == "This is a test" diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 70201d1e67..234e1185b4 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -33,7 +33,7 @@ from diffusers import ( ScoreSdeVeScheduler, VQDiffusionScheduler, ) -from diffusers.utils import torch_device +from diffusers.utils import deprecate, torch_device torch.backends.cuda.matmul.allow_tf32 = False @@ -393,6 +393,34 @@ class DDPMSchedulerTest(SchedulerCommonTest): for clip_sample in [True, False]: self.check_over_configs(clip_sample=clip_sample) + def test_predict_epsilon(self): + for predict_epsilon in [True, False]: + self.check_over_configs(predict_epsilon=predict_epsilon) + + def test_deprecated_epsilon(self): + deprecate("remove this test", "0.10.0", "remove") + scheduler_class = self.scheduler_classes[0] + scheduler_config = self.get_scheduler_config() + + sample = self.dummy_sample_deter + residual = 0.1 * self.dummy_sample_deter + time_step = 4 + + scheduler = scheduler_class(**scheduler_config) + scheduler_eps = scheduler_class(predict_epsilon=False, **scheduler_config) + + kwargs = {} + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.Generator().manual_seed(0) + output = scheduler.step(residual, time_step, sample, predict_epsilon=False, **kwargs).prev_sample + + kwargs = {} + if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): + kwargs["generator"] = torch.Generator().manual_seed(0) + output_eps = scheduler_eps.step(residual, time_step, sample, predict_epsilon=False, **kwargs).prev_sample + + assert (output - output_eps).abs().sum() < 1e-5 + def test_time_indices(self): for t in [0, 500, 999]: self.check_over_forward(time_step=t) From 598ff76bbf55330c0ab7120c8bf3928ee5b624ef Mon Sep 17 00:00:00 2001 From: Nathan Lambert Date: Wed, 9 Nov 2022 01:06:49 -0800 Subject: [PATCH 61/88] add licenses to pipelines (#1201) add licenses --- .../dance_diffusion/pipeline_dance_diffusion.py | 1 - src/diffusers/pipelines/ddim/pipeline_ddim.py | 1 - src/diffusers/pipelines/ddpm/pipeline_ddpm.py | 1 - .../latent_diffusion/pipeline_latent_diffusion.py | 14 ++++++++++++++ .../pipeline_latent_diffusion_uncond.py | 14 ++++++++++++++ src/diffusers/pipelines/pndm/pipeline_pndm.py | 1 - .../score_sde_ve/pipeline_score_sde_ve.py | 15 ++++++++++++++- .../stable_diffusion/pipeline_cycle_diffusion.py | 14 ++++++++++++++ .../pipeline_flax_stable_diffusion.py | 14 ++++++++++++++ .../pipeline_onnx_stable_diffusion.py | 14 ++++++++++++++ .../pipeline_onnx_stable_diffusion_img2img.py | 14 ++++++++++++++ .../pipeline_onnx_stable_diffusion_inpaint.py | 14 ++++++++++++++ .../stable_diffusion/pipeline_stable_diffusion.py | 14 ++++++++++++++ .../pipeline_stable_diffusion_img2img.py | 14 ++++++++++++++ .../pipeline_stable_diffusion_inpaint.py | 14 ++++++++++++++ .../pipeline_stable_diffusion_inpaint_legacy.py | 14 ++++++++++++++ .../pipelines/stable_diffusion/safety_checker.py | 14 ++++++++++++++ .../stable_diffusion/safety_checker_flax.py | 14 ++++++++++++++ .../pipeline_stochastic_karras_ve.py | 15 ++++++++++++++- 19 files changed, 210 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py b/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py index 86f01ec586..48d16889a0 100644 --- a/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +++ b/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py @@ -10,7 +10,6 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and - # limitations under the License. diff --git a/src/diffusers/pipelines/ddim/pipeline_ddim.py b/src/diffusers/pipelines/ddim/pipeline_ddim.py index 733a28c9f3..aab6e68613 100644 --- a/src/diffusers/pipelines/ddim/pipeline_ddim.py +++ b/src/diffusers/pipelines/ddim/pipeline_ddim.py @@ -10,7 +10,6 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and - # limitations under the License. import inspect diff --git a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py index 37d12f2f5d..3665c68efe 100644 --- a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py +++ b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py @@ -10,7 +10,6 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and - # limitations under the License. diff --git a/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py index cfa3994913..feb5b00d74 100644 --- a/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +++ b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import List, Optional, Tuple, Union diff --git a/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py b/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py index c8da6f193e..5345c4e562 100644 --- a/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +++ b/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Optional, Tuple, Union diff --git a/src/diffusers/pipelines/pndm/pipeline_pndm.py b/src/diffusers/pipelines/pndm/pipeline_pndm.py index f360da09ac..ef7062dea1 100644 --- a/src/diffusers/pipelines/pndm/pipeline_pndm.py +++ b/src/diffusers/pipelines/pndm/pipeline_pndm.py @@ -10,7 +10,6 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and - # limitations under the License. diff --git a/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py b/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py index 7f63820eec..7eb6a5d3cb 100644 --- a/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py +++ b/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py @@ -1,4 +1,17 @@ -#!/usr/bin/env python3 +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Optional, Tuple, Union import torch diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index 3d2ec7d55b..e6c8372c05 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Callable, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py index 73ec322c66..02943997d9 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import warnings from functools import partial from typing import Dict, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py index ecc0b31f34..3b4689086f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Callable, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index f85069b969..f167acf233 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Callable, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py index f763291c77..3aa1cc8299 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Callable, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 30be4156f9..23b4673563 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Callable, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index f6f38ab1d3..324878b5ae 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Callable, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 83d27926dc..d762dde55b 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Callable, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index a92e23bfc1..80d0879526 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from typing import Callable, List, Optional, Union diff --git a/src/diffusers/pipelines/stable_diffusion/safety_checker.py b/src/diffusers/pipelines/stable_diffusion/safety_checker.py index 09d7a3bbf9..0477c983ea 100644 --- a/src/diffusers/pipelines/stable_diffusion/safety_checker.py +++ b/src/diffusers/pipelines/stable_diffusion/safety_checker.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np import torch import torch.nn as nn diff --git a/src/diffusers/pipelines/stable_diffusion/safety_checker_flax.py b/src/diffusers/pipelines/stable_diffusion/safety_checker_flax.py index e4ea381a8c..e1f669d22b 100644 --- a/src/diffusers/pipelines/stable_diffusion/safety_checker_flax.py +++ b/src/diffusers/pipelines/stable_diffusion/safety_checker_flax.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Optional, Tuple import jax diff --git a/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py b/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py index 9e8864b4ca..739de8ebe6 100644 --- a/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +++ b/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py @@ -1,4 +1,17 @@ -#!/usr/bin/env python3 +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Optional, Tuple, Union import torch From 24895a1f494062d73028e31880c8848c6a674750 Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Wed, 9 Nov 2022 10:28:10 +0100 Subject: [PATCH 62/88] Fix cpu offloading (#1177) * Fix cpu offloading * get offloaded devices locally for SD pipelines --- src/diffusers/pipeline_utils.py | 2 - .../pipeline_stable_diffusion.py | 40 +++++++++++----- .../pipeline_stable_diffusion_img2img.py | 37 +++++++++++---- .../pipeline_stable_diffusion_inpaint.py | 47 +++++++++++++------ .../stable_diffusion/test_stable_diffusion.py | 10 ++-- .../test_stable_diffusion_img2img.py | 15 ++---- .../test_stable_diffusion_inpaint.py | 17 +++---- 7 files changed, 107 insertions(+), 61 deletions(-) diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 4e7aeb8c91..5fa120fe2d 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -230,8 +230,6 @@ class DiffusionPipeline(ConfigMixin): for name in module_names.keys(): module = getattr(self, name) if isinstance(module, torch.nn.Module): - if module.device == torch.device("meta"): - return torch.device("cpu") return module.device return torch.device("cpu") diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 23b4673563..c64db52666 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -195,6 +195,24 @@ class StableDiffusionPipeline(DiffusionPipeline): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) + @property + def _execution_device(self): + r""" + Returns the device on which the pipeline's models will be executed. After calling + `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module + hooks. + """ + if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"): + return self.device + for module in self.unet.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return self.device + @torch.no_grad() def __call__( self, @@ -286,6 +304,8 @@ class StableDiffusionPipeline(DiffusionPipeline): f" {type(callback_steps)}." ) + device = self._execution_device + # get prompt text embeddings text_inputs = self.tokenizer( prompt, @@ -302,7 +322,7 @@ class StableDiffusionPipeline(DiffusionPipeline): f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] + text_embeddings = self.text_encoder(text_input_ids.to(device))[0] # duplicate text embeddings for each generation per prompt, using mps friendly method bs_embed, seq_len, _ = text_embeddings.shape @@ -342,7 +362,7 @@ class StableDiffusionPipeline(DiffusionPipeline): truncation=True, return_tensors="pt", ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] # duplicate unconditional embeddings for each generation per prompt, using mps friendly method seq_len = uncond_embeddings.shape[1] @@ -362,20 +382,18 @@ class StableDiffusionPipeline(DiffusionPipeline): latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8) latents_dtype = text_embeddings.dtype if latents is None: - if self.device.type == "mps": + if device.type == "mps": # randn does not work reproducibly on mps - latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to( - self.device - ) + latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to(device) else: - latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype) + latents = torch.randn(latents_shape, generator=generator, device=device, dtype=latents_dtype) else: if latents.shape != latents_shape: raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") - latents = latents.to(self.device) + latents = latents.to(device) # set timesteps and move to the correct device - self.scheduler.set_timesteps(num_inference_steps, device=self.device) + self.scheduler.set_timesteps(num_inference_steps, device=device) timesteps_tensor = self.scheduler.timesteps # scale the initial noise by the standard deviation required by the scheduler @@ -424,9 +442,7 @@ class StableDiffusionPipeline(DiffusionPipeline): image = image.cpu().permute(0, 2, 3, 1).float().numpy() if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( - self.device - ) + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) image, has_nsfw_concept = self.safety_checker( images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) ) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 324878b5ae..f05819b0d9 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -183,6 +183,25 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) + @property + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device + def _execution_device(self): + r""" + Returns the device on which the pipeline's models will be executed. After calling + `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module + hooks. + """ + if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"): + return self.device + for module in self.unet.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return self.device + def enable_xformers_memory_efficient_attention(self): r""" Enable memory efficient attention as implemented in xformers. @@ -292,6 +311,8 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): f" {type(callback_steps)}." ) + device = self._execution_device + # set timesteps self.scheduler.set_timesteps(num_inference_steps) @@ -314,7 +335,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] + text_embeddings = self.text_encoder(text_input_ids.to(device))[0] # duplicate text embeddings for each generation per prompt text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) @@ -348,7 +369,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): truncation=True, return_tensors="pt", ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] # duplicate unconditional embeddings for each generation per prompt seq_len = uncond_embeddings.shape[1] @@ -362,7 +383,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): # encode the init image into latents and scale the latents latents_dtype = text_embeddings.dtype - init_image = init_image.to(device=self.device, dtype=latents_dtype) + init_image = init_image.to(device=device, dtype=latents_dtype) init_latent_dist = self.vae.encode(init_image).latent_dist init_latents = init_latent_dist.sample(generator=generator) init_latents = 0.18215 * init_latents @@ -393,10 +414,10 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): init_timestep = min(init_timestep, num_inference_steps) timesteps = self.scheduler.timesteps[-init_timestep] - timesteps = torch.tensor([timesteps] * batch_size * num_images_per_prompt, device=self.device) + timesteps = torch.tensor([timesteps] * batch_size * num_images_per_prompt, device=device) # add noise to latents using the timesteps - noise = torch.randn(init_latents.shape, generator=generator, device=self.device, dtype=latents_dtype) + noise = torch.randn(init_latents.shape, generator=generator, device=device, dtype=latents_dtype) init_latents = self.scheduler.add_noise(init_latents, noise, timesteps) # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature @@ -419,7 +440,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): # Some schedulers like PNDM have timesteps as arrays # It's more optimized to move all timesteps to correct device beforehand - timesteps = self.scheduler.timesteps[t_start:].to(self.device) + timesteps = self.scheduler.timesteps[t_start:].to(device) for i, t in enumerate(self.progress_bar(timesteps)): # expand the latents if we are doing classifier free guidance @@ -448,9 +469,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): image = image.cpu().permute(0, 2, 3, 1).numpy() if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( - self.device - ) + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) image, has_nsfw_concept = self.safety_checker( images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) ) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index d762dde55b..5ccdd07bb3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -183,6 +183,25 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) + @property + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device + def _execution_device(self): + r""" + Returns the device on which the pipeline's models will be executed. After calling + `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module + hooks. + """ + if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"): + return self.device + for module in self.unet.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return self.device + def enable_xformers_memory_efficient_attention(self): r""" Enable memory efficient attention as implemented in xformers. @@ -303,6 +322,8 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): f" {type(callback_steps)}." ) + device = self._execution_device + # get prompt text embeddings text_inputs = self.tokenizer( prompt, @@ -319,7 +340,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] + text_embeddings = self.text_encoder(text_input_ids.to(device))[0] # duplicate text embeddings for each generation per prompt, using mps friendly method bs_embed, seq_len, _ = text_embeddings.shape @@ -359,7 +380,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): truncation=True, return_tensors="pt", ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] # duplicate unconditional embeddings for each generation per prompt, using mps friendly method seq_len = uncond_embeddings.shape[1] @@ -379,17 +400,15 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): latents_shape = (batch_size * num_images_per_prompt, num_channels_latents, height // 8, width // 8) latents_dtype = text_embeddings.dtype if latents is None: - if self.device.type == "mps": + if device.type == "mps": # randn does not exist on mps - latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to( - self.device - ) + latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to(device) else: - latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype) + latents = torch.randn(latents_shape, generator=generator, device=device, dtype=latents_dtype) else: if latents.shape != latents_shape: raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") - latents = latents.to(self.device) + latents = latents.to(device) # prepare mask and masked_image mask, masked_image = prepare_mask_and_masked_image(image, mask_image) @@ -398,9 +417,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): # we do that before converting to dtype to avoid breaking in case we're using cpu_offload # and half precision mask = torch.nn.functional.interpolate(mask, size=(height // 8, width // 8)) - mask = mask.to(device=self.device, dtype=text_embeddings.dtype) + mask = mask.to(device=device, dtype=text_embeddings.dtype) - masked_image = masked_image.to(device=self.device, dtype=text_embeddings.dtype) + masked_image = masked_image.to(device=device, dtype=text_embeddings.dtype) # encode the mask image into latents space so we can concatenate it to the latents masked_image_latents = self.vae.encode(masked_image).latent_dist.sample(generator=generator) @@ -416,7 +435,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): ) # aligning device to prevent device errors when concating it with the latent model input - masked_image_latents = masked_image_latents.to(device=self.device, dtype=text_embeddings.dtype) + masked_image_latents = masked_image_latents.to(device=device, dtype=text_embeddings.dtype) num_channels_mask = mask.shape[1] num_channels_masked_image = masked_image_latents.shape[1] @@ -431,7 +450,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): ) # set timesteps and move to the correct device - self.scheduler.set_timesteps(num_inference_steps, device=self.device) + self.scheduler.set_timesteps(num_inference_steps, device=device) timesteps_tensor = self.scheduler.timesteps # scale the initial noise by the standard deviation required by the scheduler @@ -484,9 +503,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): image = image.cpu().permute(0, 2, 3, 1).float().numpy() if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( - self.device - ) + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) image, has_nsfw_concept = self.safety_checker( images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) ) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index a83299eaf9..252b02806a 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -839,20 +839,22 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): assert 2 * low_cpu_mem_usage_time < normal_load_time - @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU") def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): torch.cuda.empty_cache() torch.cuda.reset_max_memory_allocated() + torch.cuda.reset_peak_memory_stats() pipeline_id = "CompVis/stable-diffusion-v1-4" prompt = "Andromeda galaxy in a bottle" pipeline = StableDiffusionPipeline.from_pretrained(pipeline_id, revision="fp16", torch_dtype=torch.float16) + pipeline = pipeline.to(torch_device) pipeline.enable_attention_slicing(1) pipeline.enable_sequential_cpu_offload() - _ = pipeline(prompt, num_inference_steps=5) + generator = torch.Generator(device=torch_device).manual_seed(0) + _ = pipeline(prompt, generator=generator, num_inference_steps=5) mem_bytes = torch.cuda.max_memory_allocated() - # make sure that less than 1.5 GB is allocated - assert mem_bytes < 1.5 * 10**9 + # make sure that less than 2.8 GB is allocated + assert mem_bytes < 2.8 * 10**9 diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 2d29e1b806..6d5c6feab5 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -603,25 +603,18 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): torch.cuda.empty_cache() torch.cuda.reset_max_memory_allocated() + torch.cuda.reset_peak_memory_stats() init_image = load_image( "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/img2img/sketch-mountains-input.jpg" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/img2img/fantasy_landscape_k_lms.png" - ) init_image = init_image.resize((768, 512)) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "CompVis/stable-diffusion-v1-4" lms = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler") pipe = StableDiffusionImg2ImgPipeline.from_pretrained( - model_id, - scheduler=lms, - safety_checker=None, - device_map="auto", + model_id, scheduler=lms, safety_checker=None, device_map="auto", revision="fp16", torch_dtype=torch.float16 ) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -642,5 +635,5 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): ) mem_bytes = torch.cuda.max_memory_allocated() - # make sure that less than 1.5 GB is allocated - assert mem_bytes < 1.5 * 10**9 + # make sure that less than 2.2 GB is allocated + assert mem_bytes < 2.2 * 10**9 diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index e8dcb43163..5fcdd71dd6 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -384,6 +384,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): def test_stable_diffusion_pipeline_with_sequential_cpu_offloading(self): torch.cuda.empty_cache() torch.cuda.reset_max_memory_allocated() + torch.cuda.reset_peak_memory_stats() init_image = load_image( "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" @@ -393,16 +394,16 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/yellow_cat_sitting_on_a_park_bench_pndm.png" - ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "runwayml/stable-diffusion-inpainting" pndm = PNDMScheduler.from_config(model_id, subfolder="scheduler") pipe = StableDiffusionInpaintPipeline.from_pretrained( - model_id, safety_checker=None, scheduler=pndm, device_map="auto" + model_id, + safety_checker=None, + scheduler=pndm, + device_map="auto", + revision="fp16", + torch_dtype=torch.float16, ) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -422,5 +423,5 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase): ) mem_bytes = torch.cuda.max_memory_allocated() - # make sure that less than 1.5 GB is allocated - assert mem_bytes < 1.5 * 10**9 + # make sure that less than 2.2 GB is allocated + assert mem_bytes < 2.2 * 10**9 From 6cf72a9b1ee3125c5ccefcf031401ef8d49d8fe8 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 9 Nov 2022 11:22:12 +0100 Subject: [PATCH 63/88] Fix slow tests (#1210) * fix tests * Fix more * more --- .../pipeline_cycle_diffusion.py | 8 +++-- src/diffusers/schedulers/scheduling_ddim.py | 2 +- .../scheduling_euler_ancestral_discrete.py | 2 +- .../schedulers/scheduling_euler_discrete.py | 2 +- .../stable_diffusion/test_cycle_diffusion.py | 8 +++-- .../stable_diffusion/test_stable_diffusion.py | 4 +-- tests/test_pipelines.py | 7 ++-- tests/test_scheduler.py | 33 ++++++++++++------- 8 files changed, 40 insertions(+), 26 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index e6c8372c05..631ceda813 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -43,7 +43,7 @@ def preprocess(image): return 2.0 * image - 1.0 -def posterior_sample(scheduler, latents, timestep, clean_latents, eta): +def posterior_sample(scheduler, latents, timestep, clean_latents, generator, eta): # 1. get previous step value (=t-1) prev_timestep = timestep - scheduler.config.num_train_timesteps // scheduler.num_inference_steps @@ -62,7 +62,9 @@ def posterior_sample(scheduler, latents, timestep, clean_latents, eta): # direction pointing to x_t e_t = (latents - alpha_prod_t ** (0.5) * clean_latents) / (1 - alpha_prod_t) ** (0.5) dir_xt = (1.0 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * e_t - noise = std_dev_t * torch.randn(clean_latents.shape, dtype=clean_latents.dtype, device=clean_latents.device) + noise = std_dev_t * torch.randn( + clean_latents.shape, dtype=clean_latents.dtype, device=clean_latents.device, generator=generator + ) prev_latents = alpha_prod_t_prev ** (0.5) * clean_latents + dir_xt + noise return prev_latents @@ -499,7 +501,7 @@ class CycleDiffusionPipeline(DiffusionPipeline): # Sample source_latents from the posterior distribution. prev_source_latents = posterior_sample( - self.scheduler, source_latents, t, clean_latents, **extra_step_kwargs + self.scheduler, source_latents, t, clean_latents, generator=generator, **extra_step_kwargs ) # Compute noise. noise = compute_noise( diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py index 1acb81764d..75cef635d0 100644 --- a/src/diffusers/schedulers/scheduling_ddim.py +++ b/src/diffusers/schedulers/scheduling_ddim.py @@ -288,7 +288,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): if eta > 0: # randn_like does not support generator https://github.com/pytorch/pytorch/issues/27072 - device = model_output.device if torch.is_tensor(model_output) else torch.device("cpu") + device = model_output.device if variance_noise is not None and generator is not None: raise ValueError( "Cannot pass both generator and variance_noise. Please make sure that either `generator` or" diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index 33505c81c0..621b5c17c0 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -221,7 +221,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin): prev_sample = sample + derivative * dt - device = model_output.device if torch.is_tensor(model_output) else torch.device("cpu") + device = model_output.device if device.type == "mps": # randn does not work reproducibly on mps noise = torch.randn(model_output.shape, dtype=model_output.dtype, device="cpu", generator=generator).to( diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 9f707c27a1..2f9e938474 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -218,7 +218,7 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin): gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.0 - device = model_output.device if torch.is_tensor(model_output) else torch.device("cpu") + device = model_output.device if device.type == "mps": # randn does not work reproducibly on mps noise = torch.randn(model_output.shape, dtype=model_output.dtype, device="cpu", generator=generator).to( diff --git a/tests/pipelines/stable_diffusion/test_cycle_diffusion.py b/tests/pipelines/stable_diffusion/test_cycle_diffusion.py index 0bddd63807..de918c7e5c 100644 --- a/tests/pipelines/stable_diffusion/test_cycle_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_cycle_diffusion.py @@ -293,7 +293,7 @@ class CycleDiffusionPipelineIntegrationTests(unittest.TestCase): source_prompt = "A black colored car" prompt = "A blue colored car" - torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) output = pipe( prompt=prompt, source_prompt=source_prompt, @@ -303,12 +303,13 @@ class CycleDiffusionPipelineIntegrationTests(unittest.TestCase): strength=0.85, guidance_scale=3, source_guidance_scale=1, + generator=generator, output_type="np", ) image = output.images # the values aren't exactly equal, but the images look the same visually - assert np.abs(image - expected_image).max() < 1e-2 + assert np.abs(image - expected_image).max() < 5e-1 def test_cycle_diffusion_pipeline(self): init_image = load_image( @@ -331,7 +332,7 @@ class CycleDiffusionPipelineIntegrationTests(unittest.TestCase): source_prompt = "A black colored car" prompt = "A blue colored car" - torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) output = pipe( prompt=prompt, source_prompt=source_prompt, @@ -341,6 +342,7 @@ class CycleDiffusionPipelineIntegrationTests(unittest.TestCase): strength=0.85, guidance_scale=3, source_guidance_scale=1, + generator=generator, output_type="np", ) image = output.images diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 252b02806a..6e1071124c 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -755,7 +755,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): def test_stable_diffusion_text2img_pipeline_default(self): expected_image = load_numpy( - "https://huggingface.co/datasets/lewington/expected-images/resolve/main/astronaut_riding_a_horse.npy" + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text2img/astronaut_riding_a_horse.npy" ) model_id = "CompVis/stable-diffusion-v1-4" @@ -771,7 +771,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-3 + assert np.abs(expected_image - image).max() < 5e-3 def test_stable_diffusion_text2img_intermediate_state(self): number_of_steps = 0 diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index db3e804e67..753c821dd3 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -442,7 +442,8 @@ class PipelineSlowTests(unittest.TestCase): def test_output_format(self): model_path = "google/ddpm-cifar10-32" - pipe = DDIMPipeline.from_pretrained(model_path) + scheduler = DDIMScheduler.from_config(model_path) + pipe = DDIMPipeline.from_pretrained(model_path, scheduler=scheduler) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) @@ -451,13 +452,13 @@ class PipelineSlowTests(unittest.TestCase): assert images.shape == (1, 32, 32, 3) assert isinstance(images, np.ndarray) - images = pipe(generator=generator, output_type="pil").images + images = pipe(generator=generator, output_type="pil", num_inference_steps=4).images assert isinstance(images, list) assert len(images) == 1 assert isinstance(images[0], PIL.Image.Image) # use PIL by default - images = pipe(generator=generator).images + images = pipe(generator=generator, num_inference_steps=4).images assert isinstance(images, list) assert isinstance(images[0], PIL.Image.Image) diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 234e1185b4..ab52171511 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -1281,10 +1281,11 @@ class EulerDiscreteSchedulerTest(SchedulerCommonTest): scheduler.set_timesteps(self.num_inference_steps) - generator = torch.Generator().manual_seed(0) + generator = torch.Generator(torch_device).manual_seed(0) model = self.dummy_model() sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) for i, t in enumerate(scheduler.timesteps): sample = scheduler.scale_model_input(sample, t) @@ -1296,7 +1297,6 @@ class EulerDiscreteSchedulerTest(SchedulerCommonTest): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - print(result_sum, result_mean) assert abs(result_sum.item() - 10.0807) < 1e-2 assert abs(result_mean.item() - 0.0131) < 1e-3 @@ -1308,7 +1308,7 @@ class EulerDiscreteSchedulerTest(SchedulerCommonTest): scheduler.set_timesteps(self.num_inference_steps, device=torch_device) - generator = torch.Generator().manual_seed(0) + generator = torch.Generator(torch_device).manual_seed(0) model = self.dummy_model() sample = self.dummy_sample_deter * scheduler.init_noise_sigma @@ -1324,7 +1324,6 @@ class EulerDiscreteSchedulerTest(SchedulerCommonTest): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - print(result_sum, result_mean) assert abs(result_sum.item() - 10.0807) < 1e-2 assert abs(result_mean.item() - 0.0131) < 1e-3 @@ -1365,10 +1364,11 @@ class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): scheduler.set_timesteps(self.num_inference_steps) - generator = torch.Generator().manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) model = self.dummy_model() sample = self.dummy_sample_deter * scheduler.init_noise_sigma + sample = sample.to(torch_device) for i, t in enumerate(scheduler.timesteps): sample = scheduler.scale_model_input(sample, t) @@ -1380,9 +1380,14 @@ class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - print(result_sum, result_mean) - assert abs(result_sum.item() - 152.3192) < 1e-2 - assert abs(result_mean.item() - 0.1983) < 1e-3 + + if str(torch_device).startswith("cpu"): + assert abs(result_sum.item() - 152.3192) < 1e-2 + assert abs(result_mean.item() - 0.1983) < 1e-3 + else: + # CUDA + assert abs(result_sum.item() - 144.8084) < 1e-2 + assert abs(result_mean.item() - 0.18855) < 1e-3 def test_full_loop_device(self): scheduler_class = self.scheduler_classes[0] @@ -1391,7 +1396,7 @@ class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): scheduler.set_timesteps(self.num_inference_steps, device=torch_device) - generator = torch.Generator().manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) model = self.dummy_model() sample = self.dummy_sample_deter * scheduler.init_noise_sigma @@ -1407,14 +1412,18 @@ class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - print(result_sum, result_mean) - if not str(torch_device).startswith("mps"): + + if str(torch_device).startswith("cpu"): # The following sum varies between 148 and 156 on mps. Why? assert abs(result_sum.item() - 152.3192) < 1e-2 assert abs(result_mean.item() - 0.1983) < 1e-3 - else: + elif str(torch_device).startswith("mps"): # Larger tolerance on mps assert abs(result_mean.item() - 0.1983) < 1e-2 + else: + # CUDA + assert abs(result_sum.item() - 144.8084) < 1e-2 + assert abs(result_mean.item() - 0.18855) < 1e-3 class IPNDMSchedulerTest(SchedulerCommonTest): From 663f0c19632a245dc09cff5b5a3ea088ad33e1f2 Mon Sep 17 00:00:00 2001 From: camenduru <54370274+camenduru@users.noreply.github.com> Date: Wed, 9 Nov 2022 13:34:15 +0300 Subject: [PATCH 64/88] =?UTF-8?q?[Flax]=20fix=20extra=20copy=20pasta=20?= =?UTF-8?q?=F0=9F=8D=9D=20(#1187)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/dreambooth/train_dreambooth_flax.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_flax.py b/examples/dreambooth/train_dreambooth_flax.py index 078a66e4ac..6606af4f17 100644 --- a/examples/dreambooth/train_dreambooth_flax.py +++ b/examples/dreambooth/train_dreambooth_flax.py @@ -327,22 +327,6 @@ def main(): if args.seed is not None: set_seed(args.seed) - if jax.process_index() == 0: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) - rng = jax.random.PRNGKey(args.seed) if args.with_prior_preservation: From cd77a0365196a09a60bd80a4e7a85fbdb03c531f Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Wed, 9 Nov 2022 11:46:12 +0100 Subject: [PATCH 65/88] [CLIPGuidedStableDiffusion] support DDIM scheduler (#1190) add ddim in clip guided --- .../community/clip_guided_stable_diffusion.py | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/examples/community/clip_guided_stable_diffusion.py b/examples/community/clip_guided_stable_diffusion.py index 2c86e9130f..14d9ee6322 100644 --- a/examples/community/clip_guided_stable_diffusion.py +++ b/examples/community/clip_guided_stable_diffusion.py @@ -5,7 +5,14 @@ import torch from torch import nn from torch.nn import functional as F -from diffusers import AutoencoderKL, DiffusionPipeline, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + DiffusionPipeline, + LMSDiscreteScheduler, + PNDMScheduler, + UNet2DConditionModel, +) from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from torchvision import transforms from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextModel, CLIPTokenizer @@ -56,7 +63,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline): clip_model: CLIPModel, tokenizer: CLIPTokenizer, unet: UNet2DConditionModel, - scheduler: Union[PNDMScheduler, LMSDiscreteScheduler], + scheduler: Union[PNDMScheduler, LMSDiscreteScheduler, DDIMScheduler], feature_extractor: CLIPFeatureExtractor, ): super().__init__() @@ -123,7 +130,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline): # predict the noise residual noise_pred = self.unet(latent_model_input, timestep, encoder_hidden_states=text_embeddings).sample - if isinstance(self.scheduler, PNDMScheduler): + if isinstance(self.scheduler, (PNDMScheduler, DDIMScheduler)): alpha_prod_t = self.scheduler.alphas_cumprod[timestep] beta_prod_t = 1 - alpha_prod_t # compute predicted original sample from predicted noise also called @@ -176,6 +183,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline): num_inference_steps: Optional[int] = 50, guidance_scale: Optional[float] = 7.5, num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, clip_guidance_scale: Optional[float] = 100, clip_prompt: Optional[Union[str, List[str]]] = None, num_cutouts: Optional[int] = 4, @@ -275,6 +283,20 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline): # scale the initial noise by the standard deviation required by the scheduler latents = latents * self.scheduler.init_noise_sigma + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + # check if the scheduler accepts generator + accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) + if accepts_generator: + extra_step_kwargs["generator"] = generator + for i, t in enumerate(self.progress_bar(timesteps_tensor)): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents @@ -306,7 +328,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline): ) # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents).prev_sample + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample # scale and decode the image latents with vae latents = 1 / 0.18215 * latents From 3f7edc5f724862cce8d43bca1b531d962e963a3a Mon Sep 17 00:00:00 2001 From: "Duong A. Nguyen" <38061659+duongna21@users.noreply.github.com> Date: Wed, 9 Nov 2022 18:08:30 +0700 Subject: [PATCH 66/88] Fix layer names convert LDM script (#1206) fix script convert LDM --- ...rt_ldm_original_checkpoint_to_diffusers.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/scripts/convert_ldm_original_checkpoint_to_diffusers.py b/scripts/convert_ldm_original_checkpoint_to_diffusers.py index 5286579225..f547e96f4e 100644 --- a/scripts/convert_ldm_original_checkpoint_to_diffusers.py +++ b/scripts/convert_ldm_original_checkpoint_to_diffusers.py @@ -112,9 +112,9 @@ def assign_to_checkpoint( continue # Global renaming happens here - new_path = new_path.replace("middle_block.0", "mid.resnets.0") - new_path = new_path.replace("middle_block.1", "mid.attentions.0") - new_path = new_path.replace("middle_block.2", "mid.resnets.1") + new_path = new_path.replace("middle_block.0", "mid_block.resnets.0") + new_path = new_path.replace("middle_block.1", "mid_block.attentions.0") + new_path = new_path.replace("middle_block.2", "mid_block.resnets.1") if additional_replacements is not None: for replacement in additional_replacements: @@ -175,15 +175,16 @@ def convert_ldm_checkpoint(checkpoint, config): attentions = [key for key in input_blocks[i] if f"input_blocks.{i}.1" in key] if f"input_blocks.{i}.0.op.weight" in checkpoint: - new_checkpoint[f"downsample_blocks.{block_id}.downsamplers.0.conv.weight"] = checkpoint[ + new_checkpoint[f"down_blocks.{block_id}.downsamplers.0.conv.weight"] = checkpoint[ f"input_blocks.{i}.0.op.weight" ] - new_checkpoint[f"downsample_blocks.{block_id}.downsamplers.0.conv.bias"] = checkpoint[ + new_checkpoint[f"down_blocks.{block_id}.downsamplers.0.conv.bias"] = checkpoint[ f"input_blocks.{i}.0.op.bias" ] + continue paths = renew_resnet_paths(resnets) - meta_path = {"old": f"input_blocks.{i}.0", "new": f"downsample_blocks.{block_id}.resnets.{layer_in_block_id}"} + meta_path = {"old": f"input_blocks.{i}.0", "new": f"down_blocks.{block_id}.resnets.{layer_in_block_id}"} resnet_op = {"old": "resnets.2.op", "new": "downsamplers.0.op"} assign_to_checkpoint( paths, new_checkpoint, checkpoint, additional_replacements=[meta_path, resnet_op], config=config @@ -193,18 +194,18 @@ def convert_ldm_checkpoint(checkpoint, config): paths = renew_attention_paths(attentions) meta_path = { "old": f"input_blocks.{i}.1", - "new": f"downsample_blocks.{block_id}.attentions.{layer_in_block_id}", + "new": f"down_blocks.{block_id}.attentions.{layer_in_block_id}", } to_split = { f"input_blocks.{i}.1.qkv.bias": { - "key": f"downsample_blocks.{block_id}.attentions.{layer_in_block_id}.key.bias", - "query": f"downsample_blocks.{block_id}.attentions.{layer_in_block_id}.query.bias", - "value": f"downsample_blocks.{block_id}.attentions.{layer_in_block_id}.value.bias", + "key": f"down_blocks.{block_id}.attentions.{layer_in_block_id}.key.bias", + "query": f"down_blocks.{block_id}.attentions.{layer_in_block_id}.query.bias", + "value": f"down_blocks.{block_id}.attentions.{layer_in_block_id}.value.bias", }, f"input_blocks.{i}.1.qkv.weight": { - "key": f"downsample_blocks.{block_id}.attentions.{layer_in_block_id}.key.weight", - "query": f"downsample_blocks.{block_id}.attentions.{layer_in_block_id}.query.weight", - "value": f"downsample_blocks.{block_id}.attentions.{layer_in_block_id}.value.weight", + "key": f"down_blocks.{block_id}.attentions.{layer_in_block_id}.key.weight", + "query": f"down_blocks.{block_id}.attentions.{layer_in_block_id}.query.weight", + "value": f"down_blocks.{block_id}.attentions.{layer_in_block_id}.value.weight", }, } assign_to_checkpoint( From b93fe085459ae0327e9177a6f871172f1cebceab Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 9 Nov 2022 12:28:56 +0100 Subject: [PATCH 67/88] [Loading] Make sure loading edge cases work (#1192) * [Loading] Make edge cases work * up * finish * up --- src/diffusers/pipeline_flax_utils.py | 14 +++++---- src/diffusers/pipeline_utils.py | 15 ++++++---- tests/test_pipelines.py | 44 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/src/diffusers/pipeline_flax_utils.py b/src/diffusers/pipeline_flax_utils.py index e63009b49c..3963c80cd1 100644 --- a/src/diffusers/pipeline_flax_utils.py +++ b/src/diffusers/pipeline_flax_utils.py @@ -55,6 +55,8 @@ LOADABLE_CLASSES = { "PreTrainedTokenizerFast": ["save_pretrained", "from_pretrained"], "FlaxPreTrainedModel": ["save_pretrained", "from_pretrained"], "FeatureExtractionMixin": ["save_pretrained", "from_pretrained"], + "ProcessorMixin": ["save_pretrained", "from_pretrained"], + "ImageProcessingMixin": ["save_pretrained", "from_pretrained"], }, } @@ -172,8 +174,8 @@ class FlaxDiffusionPipeline(ConfigMixin): for library_name, library_classes in LOADABLE_CLASSES.items(): library = importlib.import_module(library_name) for base_class, save_load_methods in library_classes.items(): - class_candidate = getattr(library, base_class) - if issubclass(model_cls, class_candidate): + class_candidate = getattr(library, base_class, None) + if class_candidate is not None and issubclass(model_cls, class_candidate): # if we found a suitable base class in LOADABLE_CLASSES then grab its save method save_method_name = save_load_methods[0] break @@ -387,11 +389,11 @@ class FlaxDiffusionPipeline(ConfigMixin): library = importlib.import_module(library_name) class_obj = getattr(library, class_name) importable_classes = LOADABLE_CLASSES[library_name] - class_candidates = {c: getattr(library, c) for c in importable_classes.keys()} + class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()} expected_class_obj = None for class_name, class_candidate in class_candidates.items(): - if issubclass(class_obj, class_candidate): + if class_candidate is not None and issubclass(class_obj, class_candidate): expected_class_obj = class_candidate if not issubclass(passed_class_obj[name].__class__, expected_class_obj): @@ -425,12 +427,12 @@ class FlaxDiffusionPipeline(ConfigMixin): class_obj = import_flax_or_no_model(library, class_name) importable_classes = LOADABLE_CLASSES[library_name] - class_candidates = {c: getattr(library, c) for c in importable_classes.keys()} + class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()} if loaded_sub_model is None and sub_model_should_be_defined: load_method_name = None for class_name, class_candidate in class_candidates.items(): - if issubclass(class_obj, class_candidate): + if class_candidate is not None and issubclass(class_obj, class_candidate): load_method_name = importable_classes[class_name][1] load_method = getattr(class_obj, load_method_name) diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index 5fa120fe2d..a194f3eb34 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -74,6 +74,8 @@ LOADABLE_CLASSES = { "PreTrainedTokenizerFast": ["save_pretrained", "from_pretrained"], "PreTrainedModel": ["save_pretrained", "from_pretrained"], "FeatureExtractionMixin": ["save_pretrained", "from_pretrained"], + "ProcessorMixin": ["save_pretrained", "from_pretrained"], + "ImageProcessingMixin": ["save_pretrained", "from_pretrained"], }, } @@ -190,8 +192,8 @@ class DiffusionPipeline(ConfigMixin): for library_name, library_classes in LOADABLE_CLASSES.items(): library = importlib.import_module(library_name) for base_class, save_load_methods in library_classes.items(): - class_candidate = getattr(library, base_class) - if issubclass(model_cls, class_candidate): + class_candidate = getattr(library, base_class, None) + if class_candidate is not None and issubclass(model_cls, class_candidate): # if we found a suitable base class in LOADABLE_CLASSES then grab its save method save_method_name = save_load_methods[0] break @@ -543,11 +545,11 @@ class DiffusionPipeline(ConfigMixin): library = importlib.import_module(library_name) class_obj = getattr(library, class_name) importable_classes = LOADABLE_CLASSES[library_name] - class_candidates = {c: getattr(library, c) for c in importable_classes.keys()} + class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()} expected_class_obj = None for class_name, class_candidate in class_candidates.items(): - if issubclass(class_obj, class_candidate): + if class_candidate is not None and issubclass(class_obj, class_candidate): expected_class_obj = class_candidate if not issubclass(passed_class_obj[name].__class__, expected_class_obj): @@ -577,14 +579,15 @@ class DiffusionPipeline(ConfigMixin): else: # else we just import it from the library. library = importlib.import_module(library_name) + class_obj = getattr(library, class_name) importable_classes = LOADABLE_CLASSES[library_name] - class_candidates = {c: getattr(library, c) for c in importable_classes.keys()} + class_candidates = {c: getattr(library, c, None) for c in importable_classes.keys()} if loaded_sub_model is None and sub_model_should_be_defined: load_method_name = None for class_name, class_candidate in class_candidates.items(): - if issubclass(class_obj, class_candidate): + if class_candidate is not None and issubclass(class_obj, class_candidate): load_method_name = importable_classes[class_name][1] if load_method_name is None: diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 753c821dd3..da5bd3c244 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -88,6 +88,50 @@ class DownloadTests(unittest.TestCase): # https://huggingface.co/hf-internal-testing/tiny-stable-diffusion-pipe/blob/main/unet/diffusion_flax_model.msgpack assert not any(f.endswith(".msgpack") for f in files) + def test_download_no_safety_checker(self): + prompt = "hello" + pipe = StableDiffusionPipeline.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None + ) + generator = torch.Generator(device=torch_device).manual_seed(0) + out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + + pipe_2 = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch") + generator_2 = torch.Generator(device=torch_device).manual_seed(0) + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images + + assert np.max(np.abs(out - out_2)) < 1e-3 + + def test_load_no_safety_checker_explicit_locally(self): + prompt = "hello" + pipe = StableDiffusionPipeline.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None + ) + generator = torch.Generator(device=torch_device).manual_seed(0) + out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + + with tempfile.TemporaryDirectory() as tmpdirname: + pipe.save_pretrained(tmpdirname) + pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname, safety_checker=None) + generator_2 = torch.Generator(device=torch_device).manual_seed(0) + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images + + assert np.max(np.abs(out - out_2)) < 1e-3 + + def test_load_no_safety_checker_default_locally(self): + prompt = "hello" + pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch") + generator = torch.Generator(device=torch_device).manual_seed(0) + out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images + + with tempfile.TemporaryDirectory() as tmpdirname: + pipe.save_pretrained(tmpdirname) + pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname) + generator_2 = torch.Generator(device=torch_device).manual_seed(0) + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images + + assert np.max(np.abs(out - out_2)) < 1e-3 + class CustomPipelineTests(unittest.TestCase): def test_load_custom_pipeline(self): From 5a59f9b7179def4919dd852cd11b822dae76375b Mon Sep 17 00:00:00 2001 From: "Duong A. Nguyen" <38061659+duongna21@users.noreply.github.com> Date: Wed, 9 Nov 2022 19:42:16 +0700 Subject: [PATCH 68/88] Add LDM Super Resolution pipeline (#1116) * Add ldm super resolution pipeline * style * fix copies * style * fix doc * Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py Co-authored-by: Patrick von Platen * Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py Co-authored-by: Patrick von Platen * Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py Co-authored-by: Suraj Patil * Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py Co-authored-by: Suraj Patil * add doc * address comments * address comments * fix doc * minor * add tests * add tests * load text encoder from subfolder * fix test * fix test * style * style * handle mps latents * unfix typo * unfix typo * Update tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py Co-authored-by: Pedro Cuenca * fix set_timesteps mps * fix set_timesteps mps * Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py Co-authored-by: Suraj Patil * Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py Co-authored-by: Suraj Patil * Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py Co-authored-by: Suraj Patil * Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py Co-authored-by: Suraj Patil * style * test 64x64 instead of 256x256 Co-authored-by: Patrick von Platen Co-authored-by: Suraj Patil Co-authored-by: Pedro Cuenca --- .../source/api/pipelines/latent_diffusion.mdx | 5 + src/diffusers/__init__.py | 1 + src/diffusers/pipelines/__init__.py | 1 + .../pipelines/latent_diffusion/__init__.py | 1 + ...peline_latent_diffusion_superresolution.py | 169 ++++++++++++++++++ src/diffusers/utils/dummy_pt_objects.py | 15 ++ .../test_latent_diffusion_superresolution.py | 118 ++++++++++++ 7 files changed, 310 insertions(+) create mode 100644 src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py create mode 100644 tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py diff --git a/docs/source/api/pipelines/latent_diffusion.mdx b/docs/source/api/pipelines/latent_diffusion.mdx index 6d63cd5cbe..4ade13e67b 100644 --- a/docs/source/api/pipelines/latent_diffusion.mdx +++ b/docs/source/api/pipelines/latent_diffusion.mdx @@ -33,6 +33,7 @@ The original codebase can be found [here](https://github.com/CompVis/latent-diff | Pipeline | Tasks | Colab |---|---|:---:| | [pipeline_latent_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py) | *Text-to-Image Generation* | - | +| [pipeline_latent_diffusion_superresolution.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py) | *Super Resolution* | - | ## Examples: @@ -40,3 +41,7 @@ The original codebase can be found [here](https://github.com/CompVis/latent-diff ## LDMTextToImagePipeline [[autodoc]] pipelines.latent_diffusion.pipeline_latent_diffusion.LDMTextToImagePipeline - __call__ + +## LDMSuperResolutionPipeline +[[autodoc]] pipelines.latent_diffusion.pipeline_latent_diffusion_superresolution.LDMSuperResolutionPipeline + - __call__ diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index da56dc8881..86eda7371f 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -35,6 +35,7 @@ if is_torch_available(): DDPMPipeline, KarrasVePipeline, LDMPipeline, + LDMSuperResolutionPipeline, PNDMPipeline, RePaintPipeline, ScoreSdeVePipeline, diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index eb0635f6ee..ef4d23e5e6 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -5,6 +5,7 @@ if is_torch_available(): from .dance_diffusion import DanceDiffusionPipeline from .ddim import DDIMPipeline from .ddpm import DDPMPipeline + from .latent_diffusion import LDMSuperResolutionPipeline from .latent_diffusion_uncond import LDMPipeline from .pndm import PNDMPipeline from .repaint import RePaintPipeline diff --git a/src/diffusers/pipelines/latent_diffusion/__init__.py b/src/diffusers/pipelines/latent_diffusion/__init__.py index c481b38cf5..5544527ff5 100644 --- a/src/diffusers/pipelines/latent_diffusion/__init__.py +++ b/src/diffusers/pipelines/latent_diffusion/__init__.py @@ -1,5 +1,6 @@ # flake8: noqa from ...utils import is_transformers_available +from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline if is_transformers_available(): diff --git a/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py new file mode 100644 index 0000000000..044ff359e3 --- /dev/null +++ b/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py @@ -0,0 +1,169 @@ +import inspect +from typing import Optional, Tuple, Union + +import numpy as np +import torch +import torch.utils.checkpoint + +import PIL + +from ...models import UNet2DModel, VQModel +from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput +from ...schedulers import ( + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, +) + + +def preprocess(image): + w, h = image.size + w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + image = image.resize((w, h), resample=PIL.Image.LANCZOS) + image = np.array(image).astype(np.float32) / 255.0 + image = image[None].transpose(0, 3, 1, 2) + image = torch.from_numpy(image) + return 2.0 * image - 1.0 + + +class LDMSuperResolutionPipeline(DiffusionPipeline): + r""" + A pipeline for image super-resolution using Latent + + This class inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + + Parameters: + vqvae ([`VQModel`]): + Vector-quantized (VQ) VAE Model to encode and decode images to and from latent representations. + unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. + scheduler ([`SchedulerMixin`]): + A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + [`DDIMScheduler`], [`LMSDiscreteScheduler`], [`EulerDiscreteScheduler`], + [`EulerAncestralDiscreteScheduler`], [`DPMSolverMultistepScheduler`], or [`PNDMScheduler`]. + """ + + def __init__( + self, + vqvae: VQModel, + unet: UNet2DModel, + scheduler: Union[ + DDIMScheduler, + PNDMScheduler, + LMSDiscreteScheduler, + EulerDiscreteScheduler, + EulerAncestralDiscreteScheduler, + DPMSolverMultistepScheduler, + ], + ): + super().__init__() + self.register_modules(vqvae=vqvae, unet=unet, scheduler=scheduler) + + @torch.no_grad() + def __call__( + self, + init_image: Union[torch.Tensor, PIL.Image.Image], + batch_size: Optional[int] = 1, + num_inference_steps: Optional[int] = 100, + eta: Optional[float] = 0.0, + generator: Optional[torch.Generator] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + **kwargs, + ) -> Union[Tuple, ImagePipelineOutput]: + r""" + Args: + init_image (`torch.Tensor` or `PIL.Image.Image`): + `Image`, or tensor representing an image batch, that will be used as the starting point for the + process. + batch_size (`int`, *optional*, defaults to 1): + Number of images to generate. + num_inference_steps (`int`, *optional*, defaults to 100): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + eta (`float`, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + output_type (`str`, *optional*, defaults to `"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. + return_dict (`bool`, *optional*): + Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. + + Returns: + [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if + `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the + generated images. + """ + + if isinstance(init_image, PIL.Image.Image): + batch_size = 1 + elif isinstance(init_image, torch.Tensor): + batch_size = init_image.shape[0] + else: + raise ValueError( + f"`init_image` has to be of type `PIL.Image.Image` or `torch.Tensor` but is {type(init_image)}" + ) + + if isinstance(init_image, PIL.Image.Image): + init_image = preprocess(init_image) + + height, width = init_image.shape[-2:] + + # in_channels should be 6: 3 for latents, 3 for low resolution image + latents_shape = (batch_size, self.unet.in_channels // 2, height, width) + latents_dtype = next(self.unet.parameters()).dtype + + if self.device.type == "mps": + # randn does not work reproducibly on mps + latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype) + latents = latents.to(self.device) + else: + latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype) + + init_image = init_image.to(device=self.device, dtype=latents_dtype) + + # set timesteps and move to the correct device + self.scheduler.set_timesteps(num_inference_steps, device=self.device) + timesteps_tensor = self.scheduler.timesteps + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature. + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_kwargs = {} + if accepts_eta: + extra_kwargs["eta"] = eta + + for t in self.progress_bar(timesteps_tensor): + # concat latents and low resolution image in the channel dimension. + latents_input = torch.cat([latents, init_image], dim=1) + latents_input = self.scheduler.scale_model_input(latents_input, t) + # predict the noise residual + noise_pred = self.unet(latents_input, t).sample + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_kwargs).prev_sample + + # decode the image latents with the VQVAE + image = self.vqvae.decode(latents).sample + image = torch.clamp(image, -1.0, 1.0) + image = image / 2 + 0.5 + image = image.cpu().permute(0, 2, 3, 1).numpy() + + if output_type == "pil": + image = self.numpy_to_pil(image) + + if not return_dict: + return (image,) + + return ImagePipelineOutput(images=image) diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index 9d296d2997..af2e0c7c61 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -227,6 +227,21 @@ class LDMPipeline(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class LDMSuperResolutionPipeline(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class PNDMPipeline(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py b/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py new file mode 100644 index 0000000000..f5ec56d1bd --- /dev/null +++ b/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py @@ -0,0 +1,118 @@ +# coding=utf-8 +# Copyright 2022 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import unittest + +import numpy as np +import torch + +import PIL +from diffusers import DDIMScheduler, LDMSuperResolutionPipeline, UNet2DModel, VQModel +from diffusers.utils import floats_tensor, load_image, slow, torch_device +from diffusers.utils.testing_utils import require_torch + +from ...test_pipelines_common import PipelineTesterMixin + + +torch.backends.cuda.matmul.allow_tf32 = False + + +class LDMSuperResolutionPipelineFastTests(PipelineTesterMixin, unittest.TestCase): + @property + def dummy_image(self): + batch_size = 1 + num_channels = 3 + sizes = (32, 32) + + image = floats_tensor((batch_size, num_channels) + sizes, rng=random.Random(0)).to(torch_device) + return image + + @property + def dummy_uncond_unet(self): + torch.manual_seed(0) + model = UNet2DModel( + block_out_channels=(32, 64), + layers_per_block=2, + sample_size=32, + in_channels=6, + out_channels=3, + down_block_types=("DownBlock2D", "AttnDownBlock2D"), + up_block_types=("AttnUpBlock2D", "UpBlock2D"), + ) + return model + + @property + def dummy_vq_model(self): + torch.manual_seed(0) + model = VQModel( + block_out_channels=[32, 64], + in_channels=3, + out_channels=3, + down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"], + up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"], + latent_channels=3, + ) + return model + + def test_inference_superresolution(self): + unet = self.dummy_uncond_unet + scheduler = DDIMScheduler() + vqvae = self.dummy_vq_model + + ldm = LDMSuperResolutionPipeline(unet=unet, vqvae=vqvae, scheduler=scheduler) + ldm.to(torch_device) + ldm.set_progress_bar_config(disable=None) + + init_image = self.dummy_image.to(torch_device) + + # Warmup pass when using mps (see #372) + if torch_device == "mps": + generator = torch.manual_seed(0) + _ = ldm(init_image, generator=generator, num_inference_steps=1, output_type="numpy").images + + generator = torch.manual_seed(0) + image = ldm(init_image, generator=generator, num_inference_steps=2, output_type="numpy").images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 64, 64, 3) + expected_slice = np.array([0.8634, 0.8186, 0.6416, 0.6846, 0.4427, 0.5676, 0.4679, 0.6247, 0.5176]) + tolerance = 1e-2 if torch_device != "mps" else 3e-2 + assert np.abs(image_slice.flatten() - expected_slice).max() < tolerance + + +@slow +@require_torch +class LDMSuperResolutionPipelineIntegrationTests(unittest.TestCase): + def test_inference_superresolution(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/vq_diffusion/teddy_bear_pool.png" + ) + init_image = init_image.resize((64, 64), resample=PIL.Image.LANCZOS) + + ldm = LDMSuperResolutionPipeline.from_pretrained("duongna/ldm-super-resolution", device_map="auto") + ldm.to(torch_device) + ldm.set_progress_bar_config(disable=None) + + generator = torch.Generator(device=torch_device).manual_seed(0) + image = ldm(init_image, generator=generator, num_inference_steps=20, output_type="numpy").images + + image_slice = image[0, -3:, -3:, -1] + + assert image.shape == (1, 256, 256, 3) + expected_slice = np.array([0.7418, 0.7472, 0.7424, 0.7422, 0.7463, 0.726, 0.7382, 0.7248, 0.6828]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 From 0248541deadfa187150fe7f96a575ff905ecddd7 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 9 Nov 2022 15:46:08 +0100 Subject: [PATCH 69/88] [Conversion] Improve conversion script (#1218) up --- ...vert_original_stable_diffusion_to_diffusers.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/convert_original_stable_diffusion_to_diffusers.py b/scripts/convert_original_stable_diffusion_to_diffusers.py index 46073001f1..375b12b6f8 100644 --- a/scripts/convert_original_stable_diffusion_to_diffusers.py +++ b/scripts/convert_original_stable_diffusion_to_diffusers.py @@ -30,6 +30,9 @@ except ImportError: from diffusers import ( AutoencoderKL, DDIMScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, LDMTextToImagePipeline, LMSDiscreteScheduler, PNDMScheduler, @@ -647,7 +650,7 @@ if __name__ == "__main__": "--scheduler_type", default="pndm", type=str, - help="Type of scheduler to use. Should be one of ['pndm', 'lms', 'ddim']", + help="Type of scheduler to use. Should be one of ['pndm', 'lms', 'ddim', 'euler', 'euler-ancest', 'dpm']", ) parser.add_argument( "--extract_ema", @@ -686,6 +689,16 @@ if __name__ == "__main__": ) elif args.scheduler_type == "lms": scheduler = LMSDiscreteScheduler(beta_start=beta_start, beta_end=beta_end, beta_schedule="scaled_linear") + elif args.scheduler_type == "euler": + scheduler = EulerDiscreteScheduler(beta_start=beta_start, beta_end=beta_end, beta_schedule="scaled_linear") + elif args.scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler( + beta_start=beta_start, beta_end=beta_end, beta_schedule="scaled_linear" + ) + elif args.scheduler_type == "dpm": + scheduler = DPMSolverMultistepScheduler( + beta_start=beta_start, beta_end=beta_end, beta_schedule="scaled_linear" + ) elif args.scheduler_type == "ddim": scheduler = DDIMScheduler( beta_start=beta_start, From 6c0335c7f95b85481777445ac96d9817364ba6ae Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 9 Nov 2022 16:02:11 +0100 Subject: [PATCH 70/88] DDIM docs (#1219) --- docs/source/api/pipelines/ddim.mdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/api/pipelines/ddim.mdx b/docs/source/api/pipelines/ddim.mdx index 2bb9def76e..a7a5421b36 100644 --- a/docs/source/api/pipelines/ddim.mdx +++ b/docs/source/api/pipelines/ddim.mdx @@ -20,7 +20,8 @@ The abstract of the paper is the following: Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space. -The original codebase of this paper can be found [here](https://github.com/ermongroup/ddim). +The original codebase of this paper can be found here: [ermongroup/ddim](https://github.com/ermongroup/ddim). +For questions, feel free to contact the author on [tsong.me](https://tsong.me/). ## Available Pipelines: From 4969f46511ef07e81b9b7429cd0fb88d1926849c Mon Sep 17 00:00:00 2001 From: Jesse Casey <31020859+jncasey@users.noreply.github.com> Date: Wed, 9 Nov 2022 14:01:31 -0500 Subject: [PATCH 71/88] apply `repeat_interleave` fix for `mps` to stable diffusion image2image pipeline (#1135) copy from other pipeline --- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index f05819b0d9..6bf2babbfe 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -337,8 +337,10 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] text_embeddings = self.text_encoder(text_input_ids.to(device))[0] - # duplicate text embeddings for each generation per prompt - text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) + # duplicate text embeddings for each generation per prompt, using mps friendly method + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` From af279434d03e6e3be7808ecd15c652338b31024b Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Wed, 9 Nov 2022 20:04:43 +0100 Subject: [PATCH 72/88] Flax tests: don't hardcode number of devices (#1175) Flax tests: don't hardcode number of devices. This makes it possible to test on CPU/GPU. However, expected slices are only checked when there are 8 devices. --- tests/test_pipelines_flax.py | 47 ++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/tests/test_pipelines_flax.py b/tests/test_pipelines_flax.py index ac5e2621a5..72316aad92 100644 --- a/tests/test_pipelines_flax.py +++ b/tests/test_pipelines_flax.py @@ -73,18 +73,19 @@ class FlaxPipelineTests(unittest.TestCase): # shard inputs and rng params = replicate(params) - prng_seed = jax.random.split(prng_seed, 8) + prng_seed = jax.random.split(prng_seed, num_samples) prompt_ids = shard(prompt_ids) images = p_sample(prompt_ids, params, prng_seed, num_inference_steps).images - assert images.shape == (8, 1, 128, 128, 3) - assert np.abs(np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 3.1111548) < 1e-3 - assert np.abs(np.abs(images, dtype=np.float32).sum() - 199746.95) < 5e-1 + assert images.shape == (num_samples, 1, 128, 128, 3) + if jax.device_count() == 8: + assert np.abs(np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 3.1111548) < 1e-3 + assert np.abs(np.abs(images, dtype=np.float32).sum() - 199746.95) < 5e-1 images_pil = pipeline.numpy_to_pil(np.asarray(images.reshape((num_samples,) + images.shape[-3:]))) - assert len(images_pil) == 8 + assert len(images_pil) == num_samples def test_stable_diffusion_v1_4(self): pipeline, params = FlaxStableDiffusionPipeline.from_pretrained( @@ -107,14 +108,15 @@ class FlaxPipelineTests(unittest.TestCase): # shard inputs and rng params = replicate(params) - prng_seed = jax.random.split(prng_seed, 8) + prng_seed = jax.random.split(prng_seed, num_samples) prompt_ids = shard(prompt_ids) images = p_sample(prompt_ids, params, prng_seed, num_inference_steps).images - assert images.shape == (8, 1, 512, 512, 3) - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.05652401)) < 1e-3 - assert np.abs((np.abs(images, dtype=np.float32).sum() - 2383808.2)) < 5e-1 + assert images.shape == (num_samples, 1, 512, 512, 3) + if jax.device_count() == 8: + assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.05652401)) < 1e-3 + assert np.abs((np.abs(images, dtype=np.float32).sum() - 2383808.2)) < 5e-1 def test_stable_diffusion_v1_4_bfloat_16(self): pipeline, params = FlaxStableDiffusionPipeline.from_pretrained( @@ -137,14 +139,15 @@ class FlaxPipelineTests(unittest.TestCase): # shard inputs and rng params = replicate(params) - prng_seed = jax.random.split(prng_seed, 8) + prng_seed = jax.random.split(prng_seed, num_samples) prompt_ids = shard(prompt_ids) images = p_sample(prompt_ids, params, prng_seed, num_inference_steps).images - assert images.shape == (8, 1, 512, 512, 3) - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.06652832)) < 1e-3 - assert np.abs((np.abs(images, dtype=np.float32).sum() - 2384849.8)) < 5e-1 + assert images.shape == (num_samples, 1, 512, 512, 3) + if jax.device_count() == 8: + assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.06652832)) < 1e-3 + assert np.abs((np.abs(images, dtype=np.float32).sum() - 2384849.8)) < 5e-1 def test_stable_diffusion_v1_4_bfloat_16_with_safety(self): pipeline, params = FlaxStableDiffusionPipeline.from_pretrained( @@ -165,14 +168,15 @@ class FlaxPipelineTests(unittest.TestCase): # shard inputs and rng params = replicate(params) - prng_seed = jax.random.split(prng_seed, 8) + prng_seed = jax.random.split(prng_seed, num_samples) prompt_ids = shard(prompt_ids) images = pipeline(prompt_ids, params, prng_seed, num_inference_steps, jit=True).images - assert images.shape == (8, 1, 512, 512, 3) - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.06652832)) < 1e-3 - assert np.abs((np.abs(images, dtype=np.float32).sum() - 2384849.8)) < 5e-1 + assert images.shape == (num_samples, 1, 512, 512, 3) + if jax.device_count() == 8: + assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.06652832)) < 1e-3 + assert np.abs((np.abs(images, dtype=np.float32).sum() - 2384849.8)) < 5e-1 def test_stable_diffusion_v1_4_bfloat_16_ddim(self): scheduler = FlaxDDIMScheduler( @@ -210,11 +214,12 @@ class FlaxPipelineTests(unittest.TestCase): # shard inputs and rng params = replicate(params) - prng_seed = jax.random.split(prng_seed, 8) + prng_seed = jax.random.split(prng_seed, num_samples) prompt_ids = shard(prompt_ids) images = p_sample(prompt_ids, params, prng_seed, num_inference_steps).images - assert images.shape == (8, 1, 512, 512, 3) - assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.045043945)) < 1e-3 - assert np.abs((np.abs(images, dtype=np.float32).sum() - 2347693.5)) < 5e-1 + assert images.shape == (num_samples, 1, 512, 512, 3) + if jax.device_count() == 8: + assert np.abs((np.abs(images[0, 0, :2, :2, -2:], dtype=np.float32).sum() - 0.045043945)) < 1e-3 + assert np.abs((np.abs(images, dtype=np.float32).sum() - 2347693.5)) < 5e-1 From 13f388eeb29fc0fc64674031c042449622f5295d Mon Sep 17 00:00:00 2001 From: exo-pla-net <47930742+exo-pla-net@users.noreply.github.com> Date: Wed, 9 Nov 2022 12:39:27 -0800 Subject: [PATCH 73/88] Improve documentation for the LPW pipeline (#1182) --- examples/community/README.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/examples/community/README.md b/examples/community/README.md index b83b2ff4d4..fd6fff79c5 100644 --- a/examples/community/README.md +++ b/examples/community/README.md @@ -179,9 +179,20 @@ images = pipe.inpaint(prompt=prompt, init_image=init_image, mask_image=mask_imag As shown above this one pipeline can run all both "text-to-image", "image-to-image", and "inpainting" in one pipeline. ### Long Prompt Weighting Stable Diffusion +Features of this custom pipeline: +- Input a prompt without the 77 token length limit. +- Includes tx2img, img2img. and inpainting pipelines. +- Emphasize/weigh part of your prompt with parentheses as so: `a baby deer with (big eyes)` +- De-emphasize part of your prompt as so: `a [baby] deer with big eyes` +- Precisely weigh part of your prompt as so: `a baby deer with (big eyes:1.3)` -The Pipeline lets you input prompt without 77 token length limit. And you can increase words weighting by using "()" or decrease words weighting by using "[]" -The Pipeline also lets you use the main use cases of the stable diffusion pipeline in a single class. +Prompt weighting equivalents: +- `a baby deer with` == `(a baby deer with:1.0)` +- `(big eyes)` == `(big eyes:1.1)` +- `((big eyes))` == `(big eyes:1.21)` +- `[big eyes]` == `(big eyes:0.91)` + +You can run this custom pipeline as so: #### pytorch From 3d98dc763a645423f992b3546c4d9cdd97eb9d04 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 9 Nov 2022 22:18:57 +0100 Subject: [PATCH 74/88] Factor out encode text with Copied from (#1224) * up * more fixes * fix * finalize * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py * upload models * up --- .../pipeline_cycle_diffusion.py | 186 ++++++++++-------- .../pipeline_onnx_stable_diffusion.py | 132 ++++++++----- .../pipeline_onnx_stable_diffusion_img2img.py | 134 +++++++------ .../pipeline_onnx_stable_diffusion_inpaint.py | 138 +++++++------ .../pipeline_stable_diffusion.py | 153 ++++++++------ .../pipeline_stable_diffusion_img2img.py | 146 ++++++++------ .../pipeline_stable_diffusion_inpaint.py | 150 ++++++++------ ...ipeline_stable_diffusion_inpaint_legacy.py | 175 ++++++++++------ 8 files changed, 722 insertions(+), 492 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index 631ceda813..50f519c77f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -205,6 +205,110 @@ class CycleDiffusionPipeline(DiffusionPipeline): # set slice_size = `None` to disable `set_attention_slice` self.enable_attention_slicing(None) + @property + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device + def _execution_device(self): + r""" + Returns the device on which the pipeline's models will be executed. After calling + `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module + hooks. + """ + if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"): + return self.device + for module in self.unet.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return self.device + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt + def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `list(int)`): + prompt to be encoded + device: (`torch.device`): + torch device + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(device))[0] + + # duplicate text embeddings for each generation per prompt, using mps friendly method + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + return text_embeddings + @torch.no_grad() def __call__( self, @@ -309,89 +413,17 @@ class CycleDiffusionPipeline(DiffusionPipeline): if isinstance(init_image, PIL.Image.Image): init_image = preprocess(init_image) - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="pt", - ) - source_text_inputs = self.tokenizer( - source_prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="pt", - ) - text_input_ids = text_inputs.input_ids - source_text_input_ids = source_text_inputs.input_ids - - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - if source_text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(source_text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - source_text_input_ids = source_text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] - source_text_embeddings = self.text_encoder(source_text_input_ids.to(self.device))[0] - - # duplicate text embeddings for each generation per prompt - text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) - source_text_embeddings = source_text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) + device = self._execution_device # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - uncond_tokens = [""] - - max_length = text_input_ids.shape[-1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="pt", + text_embeddings = self._encode_prompt(prompt, device, num_images_per_prompt, do_classifier_free_guidance, None) + source_text_embeddings = self._encode_prompt( + source_prompt, device, num_images_per_prompt, do_classifier_free_guidance, None ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] - - # duplicate unconditional embeddings for each generation per prompt - uncond_embeddings = uncond_embeddings.repeat_interleave(batch_size * num_images_per_prompt, dim=0) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) - - source_uncond_tokens = [""] - - max_length = source_text_input_ids.shape[-1] - source_uncond_input = self.tokenizer( - source_uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="pt", - ) - source_uncond_embeddings = self.text_encoder(source_uncond_input.input_ids.to(self.device))[0] - - # duplicate unconditional embeddings for each generation per prompt - source_uncond_embeddings = source_uncond_embeddings.repeat_interleave( - batch_size * num_images_per_prompt, dim=0 - ) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - source_text_embeddings = torch.cat([source_uncond_embeddings, source_text_embeddings]) # encode the init image into latents and scale the latents latents_dtype = text_embeddings.dtype diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py index 3b4689086f..d1e2704fce 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py @@ -92,6 +92,81 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline): feature_extractor=feature_extractor, ) + def _encode_prompt(self, prompt, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `list(int)`): + prompt to be encoded + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + # get prompt text embeddings + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="np", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] + text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] * batch_size + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="np", + ) + uncond_embeddings = self.text_encoder(input_ids=uncond_input.input_ids.astype(np.int32))[0] + uncond_embeddings = np.repeat(uncond_embeddings, num_images_per_prompt, axis=0) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = np.concatenate([uncond_embeddings, text_embeddings]) + + return text_embeddings + def __call__( self, prompt: Union[str, List[str]], @@ -131,65 +206,14 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline): if generator is None: generator = np.random - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="np", - ) - text_input_ids = text_inputs.input_ids - - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] - text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - uncond_tokens: List[str] - if negative_prompt is None: - uncond_tokens = [""] * batch_size - elif type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] * batch_size - elif batch_size != len(negative_prompt): - raise ValueError( - f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" - f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" - " the batch size of `prompt`." - ) - else: - uncond_tokens = negative_prompt - max_length = text_input_ids.shape[-1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="np", - ) - uncond_embeddings = self.text_encoder(input_ids=uncond_input.input_ids.astype(np.int32))[0] - uncond_embeddings = np.repeat(uncond_embeddings, num_images_per_prompt, axis=0) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = np.concatenate([uncond_embeddings, text_embeddings]) + text_embeddings = self._encode_prompt( + prompt, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) # get the initial random noise unless the user supplied it latents_dtype = text_embeddings.dtype diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index f167acf233..a09dfe751f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -138,6 +138,82 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline): feature_extractor=feature_extractor, ) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionPipeline._encode_prompt + def _encode_prompt(self, prompt, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `list(int)`): + prompt to be encoded + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + # get prompt text embeddings + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="np", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] + text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] * batch_size + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="np", + ) + uncond_embeddings = self.text_encoder(input_ids=uncond_input.input_ids.astype(np.int32))[0] + uncond_embeddings = np.repeat(uncond_embeddings, num_images_per_prompt, axis=0) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = np.concatenate([uncond_embeddings, text_embeddings]) + + return text_embeddings + def __call__( self, prompt: Union[str, List[str]], @@ -236,66 +312,14 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline): if isinstance(init_image, PIL.Image.Image): init_image = preprocess(init_image) - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="np", - ) - text_input_ids = text_inputs.input_ids - - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] - - # duplicate text embeddings for each generation per prompt - text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - uncond_tokens: List[str] - if negative_prompt is None: - uncond_tokens = [""] - elif type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] * batch_size - elif batch_size != len(negative_prompt): - raise ValueError("The length of `negative_prompt` should be equal to batch_size.") - else: - uncond_tokens = negative_prompt - max_length = text_input_ids.shape[-1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="np", - ) - uncond_input_ids = uncond_input.input_ids - uncond_embeddings = self.text_encoder(input_ids=uncond_input_ids.astype(np.int32))[0] - - # duplicate unconditional embeddings for each generation per prompt - uncond_embeddings = np.repeat(uncond_embeddings, num_images_per_prompt, axis=0) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = np.concatenate([uncond_embeddings, text_embeddings]) + text_embeddings = self._encode_prompt( + prompt, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) latents_dtype = text_embeddings.dtype init_image = init_image.astype(latents_dtype) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py index 3aa1cc8299..6c226dd432 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py @@ -152,6 +152,82 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): feature_extractor=feature_extractor, ) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_onnx_stable_diffusion.OnnxStableDiffusionPipeline._encode_prompt + def _encode_prompt(self, prompt, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `list(int)`): + prompt to be encoded + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + # get prompt text embeddings + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="np", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] + text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] * batch_size + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="np", + ) + uncond_embeddings = self.text_encoder(input_ids=uncond_input.input_ids.astype(np.int32))[0] + uncond_embeddings = np.repeat(uncond_embeddings, num_images_per_prompt, axis=0) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = np.concatenate([uncond_embeddings, text_embeddings]) + + return text_embeddings + @torch.no_grad() def __call__( self, @@ -258,70 +334,14 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): # set timesteps self.scheduler.set_timesteps(num_inference_steps) - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="np", - ) - text_input_ids = text_inputs.input_ids - - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] - - # duplicate text embeddings for each generation per prompt - text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - uncond_tokens: List[str] - if negative_prompt is None: - uncond_tokens = [""] - elif type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] * batch_size - elif batch_size != len(negative_prompt): - raise ValueError( - f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" - f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" - " the batch size of `prompt`." - ) - else: - uncond_tokens = negative_prompt - max_length = text_input_ids.shape[-1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="np", - ) - uncond_input_ids = uncond_input.input_ids - uncond_embeddings = self.text_encoder(input_ids=uncond_input_ids.astype(np.int32))[0] - - # duplicate unconditional embeddings for each generation per prompt - uncond_embeddings = np.repeat(uncond_embeddings, num_images_per_prompt, axis=0) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = np.concatenate([uncond_embeddings, text_embeddings]) + text_embeddings = self._encode_prompt( + prompt, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) num_channels_latents = NUM_LATENT_CHANNELS latents_shape = (batch_size * num_images_per_prompt, num_channels_latents, height // 8, width // 8) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index c64db52666..ed5246fa91 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -213,6 +213,90 @@ class StableDiffusionPipeline(DiffusionPipeline): return torch.device(module._hf_hook.execution_device) return self.device + def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `list(int)`): + prompt to be encoded + device: (`torch.device`): + torch device + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(device))[0] + + # duplicate text embeddings for each generation per prompt, using mps friendly method + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + return text_embeddings + @torch.no_grad() def __call__( self, @@ -306,79 +390,20 @@ class StableDiffusionPipeline(DiffusionPipeline): device = self._execution_device - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="pt", - ) - text_input_ids = text_inputs.input_ids - - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(text_input_ids.to(device))[0] - - # duplicate text embeddings for each generation per prompt, using mps friendly method - bs_embed, seq_len, _ = text_embeddings.shape - text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) - text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - uncond_tokens: List[str] - if negative_prompt is None: - uncond_tokens = [""] * batch_size - elif type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] - elif batch_size != len(negative_prompt): - raise ValueError( - f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" - f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" - " the batch size of `prompt`." - ) - else: - uncond_tokens = negative_prompt - max_length = text_input_ids.shape[-1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="pt", - ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] - - # duplicate unconditional embeddings for each generation per prompt, using mps friendly method - seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) - uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) - - # get the initial random noise unless the user supplied it + text_embeddings = self._encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) # Unlike in other pipelines, latents need to be generated in the target device # for 1-to-1 results reproducibility with the CompVis implementation. # However this currently doesn't work in `mps`. + + # get the initial random noise unless the user supplied it latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8) latents_dtype = text_embeddings.dtype if latents is None: diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 6bf2babbfe..b4ecf600b1 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -220,6 +220,91 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): """ self.unet.set_use_memory_efficient_attention_xformers(False) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt + def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `list(int)`): + prompt to be encoded + device: (`torch.device`): + torch device + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(device))[0] + + # duplicate text embeddings for each generation per prompt, using mps friendly method + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + return text_embeddings + @torch.no_grad() def __call__( self, @@ -319,69 +404,14 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): if isinstance(init_image, PIL.Image.Image): init_image = preprocess(init_image) - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="pt", - ) - text_input_ids = text_inputs.input_ids - - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(text_input_ids.to(device))[0] - - # duplicate text embeddings for each generation per prompt, using mps friendly method - bs_embed, seq_len, _ = text_embeddings.shape - text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) - text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - uncond_tokens: List[str] - if negative_prompt is None: - uncond_tokens = [""] * batch_size - elif type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] - elif batch_size != len(negative_prompt): - raise ValueError("The length of `negative_prompt` should be equal to batch_size.") - else: - uncond_tokens = negative_prompt - max_length = text_input_ids.shape[-1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="pt", - ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] - - # duplicate unconditional embeddings for each generation per prompt - seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) - uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + text_embeddings = self._encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) # encode the init image into latents and scale the latents latents_dtype = text_embeddings.dtype diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 5ccdd07bb3..f42325261a 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -220,6 +220,91 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): """ self.unet.set_use_memory_efficient_attention_xformers(False) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt + def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `list(int)`): + prompt to be encoded + device: (`torch.device`): + torch device + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(device))[0] + + # duplicate text embeddings for each generation per prompt, using mps friendly method + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + return text_embeddings + @torch.no_grad() def __call__( self, @@ -324,73 +409,14 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): device = self._execution_device - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="pt", - ) - text_input_ids = text_inputs.input_ids - - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(text_input_ids.to(device))[0] - - # duplicate text embeddings for each generation per prompt, using mps friendly method - bs_embed, seq_len, _ = text_embeddings.shape - text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) - text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - uncond_tokens: List[str] - if negative_prompt is None: - uncond_tokens = [""] * batch_size - elif type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] - elif batch_size != len(negative_prompt): - raise ValueError( - f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" - f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" - " the batch size of `prompt`." - ) - else: - uncond_tokens = negative_prompt - max_length = text_input_ids.shape[-1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="pt", - ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] - - # duplicate unconditional embeddings for each generation per prompt, using mps friendly method - seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) - uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + text_embeddings = self._encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) # get the initial random noise unless the user supplied it # Unlike in other pipelines, latents need to be generated in the target device diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 80d0879526..89d40f7a79 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -170,6 +170,110 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): # set slice_size = `None` to disable `set_attention_slice` self.enable_attention_slicing(None) + @property + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device + def _execution_device(self): + r""" + Returns the device on which the pipeline's models will be executed. After calling + `pipeline.enable_sequential_cpu_offload()` the execution device can only be inferred from Accelerate's module + hooks. + """ + if self.device != torch.device("meta") or not hasattr(self.unet, "_hf_hook"): + return self.device + for module in self.unet.modules(): + if ( + hasattr(module, "_hf_hook") + and hasattr(module._hf_hook, "execution_device") + and module._hf_hook.execution_device is not None + ): + return torch.device(module._hf_hook.execution_device) + return self.device + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt + def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `list(int)`): + prompt to be encoded + device: (`torch.device`): + torch device + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`): + The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + + if text_input_ids.shape[-1] > self.tokenizer.model_max_length: + removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(text_input_ids.to(device))[0] + + # duplicate text embeddings for each generation per prompt, using mps friendly method + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0] + + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + return text_embeddings + @torch.no_grad() def __call__( self, @@ -266,78 +370,23 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): f" {type(callback_steps)}." ) + device = self._execution_device + # set timesteps self.scheduler.set_timesteps(num_inference_steps) - # get prompt text embeddings - text_inputs = self.tokenizer( - prompt, - padding="max_length", - max_length=self.tokenizer.model_max_length, - return_tensors="pt", - ) - text_input_ids = text_inputs.input_ids - - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) - logger.warning( - "The following part of your input was truncated because CLIP can only handle sequences up to" - f" {self.tokenizer.model_max_length} tokens: {removed_text}" - ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] - text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0] - - # duplicate text embeddings for each generation per prompt - text_embeddings = text_embeddings.repeat_interleave(num_images_per_prompt, dim=0) + # preprocess image + if not isinstance(init_image, torch.FloatTensor): + init_image = preprocess_image(init_image) # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - uncond_tokens: List[str] - if negative_prompt is None: - uncond_tokens = [""] * batch_size - elif type(prompt) is not type(negative_prompt): - raise TypeError( - f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" - f" {type(prompt)}." - ) - elif isinstance(negative_prompt, str): - uncond_tokens = [negative_prompt] - elif batch_size != len(negative_prompt): - raise ValueError( - f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" - f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" - " the batch size of `prompt`." - ) - else: - uncond_tokens = negative_prompt - max_length = text_input_ids.shape[-1] - uncond_input = self.tokenizer( - uncond_tokens, - padding="max_length", - max_length=max_length, - truncation=True, - return_tensors="pt", - ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] - - # duplicate unconditional embeddings for each generation per prompt - seq_len = uncond_embeddings.shape[1] - uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1) - uncond_embeddings = uncond_embeddings.view(batch_size * num_images_per_prompt, seq_len, -1) - - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) - - # preprocess image - if not isinstance(init_image, torch.FloatTensor): - init_image = preprocess_image(init_image) + text_embeddings = self._encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) # encode the init image into latents and scale the latents latents_dtype = text_embeddings.dtype From 7d0c2729399c3ce019a30fc175b973e892fd5fc3 Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Wed, 9 Nov 2022 23:00:23 +0100 Subject: [PATCH 75/88] Match the generator device to the pipeline for DDPM and DDIM (#1222) * Match the generator device to the pipeline for DDPM and DDIM * style * fix * update values * fix fast tests * trigger slow tests * deprecate * last value fixes * mps fixes --- .../train_unconditional.py | 11 ++- src/diffusers/pipelines/ddim/pipeline_ddim.py | 29 ++++--- src/diffusers/pipelines/ddpm/pipeline_ddpm.py | 15 +++- src/diffusers/schedulers/scheduling_ddpm.py | 14 +++- tests/pipelines/ddim/test_ddim.py | 28 +++---- tests/pipelines/ddpm/test_ddpm.py | 28 +++---- .../test_latent_diffusion_superresolution.py | 17 ++-- tests/test_pipelines.py | 83 ++++++++----------- 8 files changed, 115 insertions(+), 110 deletions(-) diff --git a/examples/unconditional_image_generation/train_unconditional.py b/examples/unconditional_image_generation/train_unconditional.py index 0eadecbd30..54a94d98b5 100644 --- a/examples/unconditional_image_generation/train_unconditional.py +++ b/examples/unconditional_image_generation/train_unconditional.py @@ -11,10 +11,12 @@ import torch.nn.functional as F from accelerate import Accelerator from accelerate.logging import get_logger from datasets import load_dataset -from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel +from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel, __version__ from diffusers.optimization import get_scheduler from diffusers.training_utils import EMAModel +from diffusers.utils import deprecate from huggingface_hub import HfFolder, Repository, whoami +from packaging import version from torchvision.transforms import ( CenterCrop, Compose, @@ -28,6 +30,7 @@ from tqdm.auto import tqdm logger = get_logger(__name__) +diffusers_version = version.parse(version.parse(__version__).base_version) def _extract_into_tensor(arr, timesteps, broadcast_shape): @@ -406,7 +409,11 @@ def main(args): scheduler=noise_scheduler, ) - generator = torch.manual_seed(0) + deprecate("todo: remove this check", "0.10.0", "when the most used version is >= 0.8.0") + if diffusers_version < version.parse("0.8.0"): + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=pipeline.device).manual_seed(0) # run pipeline in inference (sample random noise and denoise) images = pipeline( generator=generator, diff --git a/src/diffusers/pipelines/ddim/pipeline_ddim.py b/src/diffusers/pipelines/ddim/pipeline_ddim.py index aab6e68613..d0bca8038e 100644 --- a/src/diffusers/pipelines/ddim/pipeline_ddim.py +++ b/src/diffusers/pipelines/ddim/pipeline_ddim.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import inspect from typing import Optional, Tuple, Union import torch from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput +from ...utils import deprecate class DDIMPipeline(DiffusionPipeline): @@ -75,24 +75,29 @@ class DDIMPipeline(DiffusionPipeline): generated images. """ + if generator is not None and generator.device.type != self.device.type and self.device.type != "mps": + message = ( + f"The `generator` device is `{generator.device}` and does not match the pipeline " + f"device `{self.device}`, so the `generator` will be set to `None`. " + f'Please use `generator=torch.Generator(device="{self.device}")` instead.' + ) + deprecate( + "generator.device == 'cpu'", + "0.11.0", + message, + ) + generator = None + # Sample gaussian noise to begin loop image = torch.randn( (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), generator=generator, + device=self.device, ) - image = image.to(self.device) # set step values self.scheduler.set_timesteps(num_inference_steps) - # Ignore use_clipped_model_output if the scheduler doesn't accept this argument - accepts_use_clipped_model_output = "use_clipped_model_output" in set( - inspect.signature(self.scheduler.step).parameters.keys() - ) - extra_kwargs = {} - if accepts_use_clipped_model_output: - extra_kwargs["use_clipped_model_output"] = use_clipped_model_output - for t in self.progress_bar(self.scheduler.timesteps): # 1. predict noise model_output model_output = self.unet(image, t).sample @@ -100,7 +105,9 @@ class DDIMPipeline(DiffusionPipeline): # 2. predict previous mean of image x_t-1 and add variance depending on eta # eta corresponds to η in paper and should be between [0, 1] # do x_t -> x_t-1 - image = self.scheduler.step(model_output, t, image, eta, **extra_kwargs).prev_sample + image = self.scheduler.step( + model_output, t, image, eta=eta, use_clipped_model_output=use_clipped_model_output, generator=generator + ).prev_sample image = (image / 2 + 0.5).clamp(0, 1) image = image.cpu().permute(0, 2, 3, 1).numpy() diff --git a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py index 3665c68efe..d145c5d518 100644 --- a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py +++ b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py @@ -80,12 +80,25 @@ class DDPMPipeline(DiffusionPipeline): new_config["predict_epsilon"] = predict_epsilon self.scheduler._internal_dict = FrozenDict(new_config) + if generator is not None and generator.device.type != self.device.type and self.device.type != "mps": + message = ( + f"The `generator` device is `{generator.device}` and does not match the pipeline " + f"device `{self.device}`, so the `generator` will be set to `None`. " + f'Please use `torch.Generator(device="{self.device}")` instead.' + ) + deprecate( + "generator.device == 'cpu'", + "0.11.0", + message, + ) + generator = None + # Sample gaussian noise to begin loop image = torch.randn( (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), generator=generator, + device=self.device, ) - image = image.to(self.device) # set step values self.scheduler.set_timesteps(num_inference_steps) diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index 08a73119e5..a19d91879c 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -292,10 +292,16 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): # 6. Add noise variance = 0 if t > 0: - noise = torch.randn( - model_output.size(), dtype=model_output.dtype, layout=model_output.layout, generator=generator - ).to(model_output.device) - variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * noise + device = model_output.device + if device.type == "mps": + # randn does not work reproducibly on mps + variance_noise = torch.randn(model_output.shape, dtype=model_output.dtype, generator=generator) + variance_noise = variance_noise.to(device) + else: + variance_noise = torch.randn( + model_output.shape, generator=generator, device=device, dtype=model_output.dtype + ) + variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * variance_noise pred_prev_sample = pred_prev_sample + variance diff --git a/tests/pipelines/ddim/test_ddim.py b/tests/pipelines/ddim/test_ddim.py index 4445fe7fee..81c49912be 100644 --- a/tests/pipelines/ddim/test_ddim.py +++ b/tests/pipelines/ddim/test_ddim.py @@ -19,7 +19,7 @@ import numpy as np import torch from diffusers import DDIMPipeline, DDIMScheduler, UNet2DModel -from diffusers.utils.testing_utils import require_torch, slow, torch_device +from diffusers.utils.testing_utils import require_torch_gpu, slow, torch_device from ...test_pipelines_common import PipelineTesterMixin @@ -43,21 +43,18 @@ class DDIMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): return model def test_inference(self): + device = "cpu" unet = self.dummy_uncond_unet scheduler = DDIMScheduler() ddpm = DDIMPipeline(unet=unet, scheduler=scheduler) - ddpm.to(torch_device) + ddpm.to(device) ddpm.set_progress_bar_config(disable=None) - # Warmup pass when using mps (see #372) - if torch_device == "mps": - _ = ddpm(num_inference_steps=1) - - generator = torch.manual_seed(0) + generator = torch.Generator(device=device).manual_seed(0) image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images - generator = torch.manual_seed(0) + generator = torch.Generator(device=device).manual_seed(0) image_from_tuple = ddpm(generator=generator, num_inference_steps=2, output_type="numpy", return_dict=False)[0] image_slice = image[0, -3:, -3:, -1] @@ -67,13 +64,12 @@ class DDIMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): expected_slice = np.array( [1.000e00, 5.717e-01, 4.717e-01, 1.000e00, 0.000e00, 1.000e00, 3.000e-04, 0.000e00, 9.000e-04] ) - tolerance = 1e-2 if torch_device != "mps" else 3e-2 - assert np.abs(image_slice.flatten() - expected_slice).max() < tolerance - assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < tolerance + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 @slow -@require_torch +@require_torch_gpu class DDIMPipelineIntegrationTests(unittest.TestCase): def test_inference_ema_bedroom(self): model_id = "google/ddpm-ema-bedroom-256" @@ -85,13 +81,13 @@ class DDIMPipelineIntegrationTests(unittest.TestCase): ddpm.to(torch_device) ddpm.set_progress_bar_config(disable=None) - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) image = ddpm(generator=generator, output_type="numpy").images image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 256, 256, 3) - expected_slice = np.array([0.00605, 0.0201, 0.0344, 0.00235, 0.00185, 0.00025, 0.00215, 0.0, 0.00685]) + expected_slice = np.array([0.1546, 0.1561, 0.1595, 0.1564, 0.1569, 0.1585, 0.1554, 0.1550, 0.1575]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 def test_inference_cifar10(self): @@ -104,11 +100,11 @@ class DDIMPipelineIntegrationTests(unittest.TestCase): ddim.to(torch_device) ddim.set_progress_bar_config(disable=None) - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) image = ddim(generator=generator, eta=0.0, output_type="numpy").images image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array([0.17235, 0.16175, 0.16005, 0.16255, 0.1497, 0.1513, 0.15045, 0.1442, 0.1453]) + expected_slice = np.array([0.2060, 0.2042, 0.2022, 0.2193, 0.2146, 0.2110, 0.2471, 0.2446, 0.2388]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 diff --git a/tests/pipelines/ddpm/test_ddpm.py b/tests/pipelines/ddpm/test_ddpm.py index a09f77d124..e16e0d6e8c 100644 --- a/tests/pipelines/ddpm/test_ddpm.py +++ b/tests/pipelines/ddpm/test_ddpm.py @@ -20,7 +20,7 @@ import torch from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel from diffusers.utils import deprecate -from diffusers.utils.testing_utils import require_torch, slow, torch_device +from diffusers.utils.testing_utils import require_torch_gpu, slow, torch_device from ...test_pipelines_common import PipelineTesterMixin @@ -44,21 +44,18 @@ class DDPMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): return model def test_inference(self): + device = "cpu" unet = self.dummy_uncond_unet scheduler = DDPMScheduler() ddpm = DDPMPipeline(unet=unet, scheduler=scheduler) - ddpm.to(torch_device) + ddpm.to(device) ddpm.set_progress_bar_config(disable=None) - # Warmup pass when using mps (see #372) - if torch_device == "mps": - _ = ddpm(num_inference_steps=1) - - generator = torch.manual_seed(0) + generator = torch.Generator(device=device).manual_seed(0) image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images - generator = torch.manual_seed(0) + generator = torch.Generator(device=device).manual_seed(0) image_from_tuple = ddpm(generator=generator, num_inference_steps=2, output_type="numpy", return_dict=False)[0] image_slice = image[0, -3:, -3:, -1] @@ -68,9 +65,8 @@ class DDPMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): expected_slice = np.array( [5.589e-01, 7.089e-01, 2.632e-01, 6.841e-01, 1.000e-04, 9.999e-01, 1.973e-01, 1.000e-04, 8.010e-02] ) - tolerance = 1e-2 if torch_device != "mps" else 3e-2 - assert np.abs(image_slice.flatten() - expected_slice).max() < tolerance - assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < tolerance + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 def test_inference_predict_epsilon(self): deprecate("remove this test", "0.10.0", "remove") @@ -85,10 +81,10 @@ class DDPMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): if torch_device == "mps": _ = ddpm(num_inference_steps=1) - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) image_eps = ddpm(generator=generator, num_inference_steps=2, output_type="numpy", predict_epsilon=False)[0] image_slice = image[0, -3:, -3:, -1] @@ -100,7 +96,7 @@ class DDPMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): @slow -@require_torch +@require_torch_gpu class DDPMPipelineIntegrationTests(unittest.TestCase): def test_inference_cifar10(self): model_id = "google/ddpm-cifar10-32" @@ -112,11 +108,11 @@ class DDPMPipelineIntegrationTests(unittest.TestCase): ddpm.to(torch_device) ddpm.set_progress_bar_config(disable=None) - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) image = ddpm(generator=generator, output_type="numpy").images image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 32, 32, 3) - expected_slice = np.array([0.41995, 0.35885, 0.19385, 0.38475, 0.3382, 0.2647, 0.41545, 0.3582, 0.33845]) + expected_slice = np.array([0.4454, 0.2025, 0.0315, 0.3023, 0.2575, 0.1031, 0.0953, 0.1604, 0.2020]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 diff --git a/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py b/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py index f5ec56d1bd..f402d2f2a7 100644 --- a/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py +++ b/tests/pipelines/latent_diffusion/test_latent_diffusion_superresolution.py @@ -68,30 +68,25 @@ class LDMSuperResolutionPipelineFastTests(PipelineTesterMixin, unittest.TestCase return model def test_inference_superresolution(self): + device = "cpu" unet = self.dummy_uncond_unet scheduler = DDIMScheduler() vqvae = self.dummy_vq_model ldm = LDMSuperResolutionPipeline(unet=unet, vqvae=vqvae, scheduler=scheduler) - ldm.to(torch_device) + ldm.to(device) ldm.set_progress_bar_config(disable=None) - init_image = self.dummy_image.to(torch_device) + init_image = self.dummy_image.to(device) - # Warmup pass when using mps (see #372) - if torch_device == "mps": - generator = torch.manual_seed(0) - _ = ldm(init_image, generator=generator, num_inference_steps=1, output_type="numpy").images - - generator = torch.manual_seed(0) + generator = torch.Generator(device=device).manual_seed(0) image = ldm(init_image, generator=generator, num_inference_steps=2, output_type="numpy").images image_slice = image[0, -3:, -3:, -1] assert image.shape == (1, 64, 64, 3) - expected_slice = np.array([0.8634, 0.8186, 0.6416, 0.6846, 0.4427, 0.5676, 0.4679, 0.6247, 0.5176]) - tolerance = 1e-2 if torch_device != "mps" else 3e-2 - assert np.abs(image_slice.flatten() - expected_slice).max() < tolerance + expected_slice = np.array([0.8678, 0.8245, 0.6381, 0.6830, 0.4385, 0.5599, 0.4641, 0.6201, 0.5150]) + assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 @slow diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index da5bd3c244..775ab689bd 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -42,7 +42,6 @@ from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from diffusers.utils import CONFIG_NAME, WEIGHTS_NAME, floats_tensor, slow, torch_device from diffusers.utils.testing_utils import CaptureLogger, get_tests_dir, require_torch_gpu -from parameterized import parameterized from PIL import Image from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -93,11 +92,17 @@ class DownloadTests(unittest.TestCase): pipe = StableDiffusionPipeline.from_pretrained( "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None ) - generator = torch.Generator(device=torch_device).manual_seed(0) + pipe = pipe.to(torch_device) + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images pipe_2 = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch") - generator_2 = torch.Generator(device=torch_device).manual_seed(0) + pipe_2 = pipe_2.to(torch_device) + generator_2 = generator.manual_seed(0) out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -107,13 +112,19 @@ class DownloadTests(unittest.TestCase): pipe = StableDiffusionPipeline.from_pretrained( "hf-internal-testing/tiny-stable-diffusion-torch", safety_checker=None ) - generator = torch.Generator(device=torch_device).manual_seed(0) + pipe = pipe.to(torch_device) + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images with tempfile.TemporaryDirectory() as tmpdirname: pipe.save_pretrained(tmpdirname) pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname, safety_checker=None) - generator_2 = torch.Generator(device=torch_device).manual_seed(0) + pipe_2 = pipe_2.to(torch_device) + generator_2 = generator.manual_seed(0) out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -121,13 +132,19 @@ class DownloadTests(unittest.TestCase): def test_load_no_safety_checker_default_locally(self): prompt = "hello" pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch") - generator = torch.Generator(device=torch_device).manual_seed(0) + pipe = pipe.to(torch_device) + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) out = pipe(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images with tempfile.TemporaryDirectory() as tmpdirname: pipe.save_pretrained(tmpdirname) pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname) - generator_2 = torch.Generator(device=torch_device).manual_seed(0) + pipe_2 = pipe_2.to(torch_device) + generator_2 = generator.manual_seed(0) out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -431,7 +448,7 @@ class PipelineSlowTests(unittest.TestCase): new_ddpm = DDPMPipeline.from_pretrained(tmpdirname) new_ddpm.to(torch_device) - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) image = ddpm(generator=generator, output_type="numpy").images generator = generator.manual_seed(0) @@ -452,7 +469,7 @@ class PipelineSlowTests(unittest.TestCase): ddpm_from_hub = ddpm_from_hub.to(torch_device) ddpm_from_hub.set_progress_bar_config(disable=None) - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) image = ddpm(generator=generator, output_type="numpy").images generator = generator.manual_seed(0) @@ -475,7 +492,7 @@ class PipelineSlowTests(unittest.TestCase): ddpm_from_hub = ddpm_from_hub.to(torch_device) ddpm_from_hub_custom_model.set_progress_bar_config(disable=None) - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) image = ddpm_from_hub_custom_model(generator=generator, output_type="numpy").images generator = generator.manual_seed(0) @@ -491,7 +508,7 @@ class PipelineSlowTests(unittest.TestCase): pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - generator = torch.manual_seed(0) + generator = torch.Generator(device=torch_device).manual_seed(0) images = pipe(generator=generator, output_type="numpy").images assert images.shape == (1, 32, 32, 3) assert isinstance(images, np.ndarray) @@ -506,40 +523,8 @@ class PipelineSlowTests(unittest.TestCase): assert isinstance(images, list) assert isinstance(images[0], PIL.Image.Image) - # Make sure the test passes for different values of random seed - @parameterized.expand([(0,), (4,)]) - def test_ddpm_ddim_equality(self, seed): - model_id = "google/ddpm-cifar10-32" - - unet = UNet2DModel.from_pretrained(model_id) - ddpm_scheduler = DDPMScheduler() - ddim_scheduler = DDIMScheduler() - - ddpm = DDPMPipeline(unet=unet, scheduler=ddpm_scheduler) - ddpm.to(torch_device) - ddpm.set_progress_bar_config(disable=None) - ddim = DDIMPipeline(unet=unet, scheduler=ddim_scheduler) - ddim.to(torch_device) - ddim.set_progress_bar_config(disable=None) - - generator = torch.manual_seed(seed) - ddpm_image = ddpm(generator=generator, output_type="numpy").images - - generator = torch.manual_seed(seed) - ddim_image = ddim( - generator=generator, - num_inference_steps=1000, - eta=1.0, - output_type="numpy", - use_clipped_model_output=True, # Need this to make DDIM match DDPM - ).images - - # the values aren't exactly equal, but the images look the same visually - assert np.abs(ddpm_image - ddim_image).max() < 1e-1 - - # Make sure the test passes for different values of random seed - @parameterized.expand([(0,), (4,)]) - def test_ddpm_ddim_equality_batched(self, seed): + def test_ddpm_ddim_equality_batched(self): + seed = 0 model_id = "google/ddpm-cifar10-32" unet = UNet2DModel.from_pretrained(model_id) @@ -554,12 +539,12 @@ class PipelineSlowTests(unittest.TestCase): ddim.to(torch_device) ddim.set_progress_bar_config(disable=None) - generator = torch.manual_seed(seed) - ddpm_images = ddpm(batch_size=4, generator=generator, output_type="numpy").images + generator = torch.Generator(device=torch_device).manual_seed(seed) + ddpm_images = ddpm(batch_size=2, generator=generator, output_type="numpy").images - generator = torch.manual_seed(seed) + generator = torch.Generator(device=torch_device).manual_seed(seed) ddim_images = ddim( - batch_size=4, + batch_size=2, generator=generator, num_inference_steps=1000, eta=1.0, From 187de44352ce23acf00a9204a05a8a308aab7003 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 9 Nov 2022 22:18:14 +0000 Subject: [PATCH 76/88] Fix device on save/load tests --- tests/test_pipelines.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 775ab689bd..4559d713ed 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -102,8 +102,12 @@ class DownloadTests(unittest.TestCase): pipe_2 = StableDiffusionPipeline.from_pretrained("hf-internal-testing/tiny-stable-diffusion-torch") pipe_2 = pipe_2.to(torch_device) - generator_2 = generator.manual_seed(0) - out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -124,8 +128,14 @@ class DownloadTests(unittest.TestCase): pipe.save_pretrained(tmpdirname) pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname, safety_checker=None) pipe_2 = pipe_2.to(torch_device) - generator_2 = generator.manual_seed(0) - out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images + + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) + + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images assert np.max(np.abs(out - out_2)) < 1e-3 @@ -144,8 +154,14 @@ class DownloadTests(unittest.TestCase): pipe.save_pretrained(tmpdirname) pipe_2 = StableDiffusionPipeline.from_pretrained(tmpdirname) pipe_2 = pipe_2.to(torch_device) - generator_2 = generator.manual_seed(0) - out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator_2, output_type="numpy").images + + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) + + out_2 = pipe_2(prompt, num_inference_steps=2, generator=generator, output_type="numpy").images assert np.max(np.abs(out - out_2)) < 1e-3 From 0feb21a18c44cfbf76a916afead986f04b339292 Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Thu, 10 Nov 2022 00:09:22 +0100 Subject: [PATCH 77/88] [Tests] Fix mps+generator fast tests (#1230) * [Tests] Fix mps+generator fast tests * mps for Euler * retry * warmup issue again? * fix reproducible initial noise * Revert "fix reproducible initial noise" This reverts commit f300d05cb9782ed320064a0c58577a32d4139e6d. * fix reproducible initial noise * fix device --- .github/workflows/pr_tests.yml | 2 +- src/diffusers/pipelines/ddim/pipeline_ddim.py | 14 +++++----- src/diffusers/pipelines/ddpm/pipeline_ddpm.py | 14 +++++----- tests/pipelines/ddpm/test_ddpm.py | 8 ++++-- tests/test_scheduler.py | 26 +++++++++++++++---- 5 files changed, 44 insertions(+), 20 deletions(-) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index c978efe3b7..dc1c482aa0 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -136,7 +136,7 @@ jobs: - name: Run fast PyTorch tests on M1 (MPS) shell: arch -arch arm64 bash {0} run: | - ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps tests/ + ${CONDA_RUN} python -m pytest -n 0 -s -v --make-reports=tests_torch_mps tests/ - name: Failure short reports if: ${{ failure() }} diff --git a/src/diffusers/pipelines/ddim/pipeline_ddim.py b/src/diffusers/pipelines/ddim/pipeline_ddim.py index d0bca8038e..6db6298329 100644 --- a/src/diffusers/pipelines/ddim/pipeline_ddim.py +++ b/src/diffusers/pipelines/ddim/pipeline_ddim.py @@ -78,7 +78,7 @@ class DDIMPipeline(DiffusionPipeline): if generator is not None and generator.device.type != self.device.type and self.device.type != "mps": message = ( f"The `generator` device is `{generator.device}` and does not match the pipeline " - f"device `{self.device}`, so the `generator` will be set to `None`. " + f"device `{self.device}`, so the `generator` will be ignored. " f'Please use `generator=torch.Generator(device="{self.device}")` instead.' ) deprecate( @@ -89,11 +89,13 @@ class DDIMPipeline(DiffusionPipeline): generator = None # Sample gaussian noise to begin loop - image = torch.randn( - (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), - generator=generator, - device=self.device, - ) + image_shape = (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size) + if self.device.type == "mps": + # randn does not work reproducibly on mps + image = torch.randn(image_shape, generator=generator) + image = image.to(self.device) + else: + image = torch.randn(image_shape, generator=generator, device=self.device) # set step values self.scheduler.set_timesteps(num_inference_steps) diff --git a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py index d145c5d518..b7194664f4 100644 --- a/src/diffusers/pipelines/ddpm/pipeline_ddpm.py +++ b/src/diffusers/pipelines/ddpm/pipeline_ddpm.py @@ -83,7 +83,7 @@ class DDPMPipeline(DiffusionPipeline): if generator is not None and generator.device.type != self.device.type and self.device.type != "mps": message = ( f"The `generator` device is `{generator.device}` and does not match the pipeline " - f"device `{self.device}`, so the `generator` will be set to `None`. " + f"device `{self.device}`, so the `generator` will be ignored. " f'Please use `torch.Generator(device="{self.device}")` instead.' ) deprecate( @@ -94,11 +94,13 @@ class DDPMPipeline(DiffusionPipeline): generator = None # Sample gaussian noise to begin loop - image = torch.randn( - (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), - generator=generator, - device=self.device, - ) + image_shape = (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size) + if self.device.type == "mps": + # randn does not work reproducibly on mps + image = torch.randn(image_shape, generator=generator) + image = image.to(self.device) + else: + image = torch.randn(image_shape, generator=generator, device=self.device) # set step values self.scheduler.set_timesteps(num_inference_steps) diff --git a/tests/pipelines/ddpm/test_ddpm.py b/tests/pipelines/ddpm/test_ddpm.py index e16e0d6e8c..14bc094697 100644 --- a/tests/pipelines/ddpm/test_ddpm.py +++ b/tests/pipelines/ddpm/test_ddpm.py @@ -81,10 +81,14 @@ class DDPMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): if torch_device == "mps": _ = ddpm(num_inference_steps=1) - generator = torch.Generator(device=torch_device).manual_seed(0) + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) image = ddpm(generator=generator, num_inference_steps=2, output_type="numpy").images - generator = torch.Generator(device=torch_device).manual_seed(0) + generator = generator.manual_seed(0) image_eps = ddpm(generator=generator, num_inference_steps=2, output_type="numpy", predict_epsilon=False)[0] image_slice = image[0, -3:, -3:, -1] diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index ab52171511..a9770f0a54 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -1281,7 +1281,11 @@ class EulerDiscreteSchedulerTest(SchedulerCommonTest): scheduler.set_timesteps(self.num_inference_steps) - generator = torch.Generator(torch_device).manual_seed(0) + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) model = self.dummy_model() sample = self.dummy_sample_deter * scheduler.init_noise_sigma @@ -1308,7 +1312,11 @@ class EulerDiscreteSchedulerTest(SchedulerCommonTest): scheduler.set_timesteps(self.num_inference_steps, device=torch_device) - generator = torch.Generator(torch_device).manual_seed(0) + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) model = self.dummy_model() sample = self.dummy_sample_deter * scheduler.init_noise_sigma @@ -1364,7 +1372,11 @@ class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): scheduler.set_timesteps(self.num_inference_steps) - generator = torch.Generator(device=torch_device).manual_seed(0) + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) model = self.dummy_model() sample = self.dummy_sample_deter * scheduler.init_noise_sigma @@ -1381,7 +1393,7 @@ class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): result_sum = torch.sum(torch.abs(sample)) result_mean = torch.mean(torch.abs(sample)) - if str(torch_device).startswith("cpu"): + if torch_device in ["cpu", "mps"]: assert abs(result_sum.item() - 152.3192) < 1e-2 assert abs(result_mean.item() - 0.1983) < 1e-3 else: @@ -1396,7 +1408,11 @@ class EulerAncestralDiscreteSchedulerTest(SchedulerCommonTest): scheduler.set_timesteps(self.num_inference_steps, device=torch_device) - generator = torch.Generator(device=torch_device).manual_seed(0) + if torch_device == "mps": + # device type MPS is not supported for torch.Generator() api. + generator = torch.manual_seed(0) + else: + generator = torch.Generator(device=torch_device).manual_seed(0) model = self.dummy_model() sample = self.dummy_sample_deter * scheduler.init_noise_sigma From 2e980ac9a0c03200cbe52e78e23f648d17e97b9c Mon Sep 17 00:00:00 2001 From: Anton Lozhkov Date: Thu, 10 Nov 2022 00:44:42 +0100 Subject: [PATCH 78/88] [Tests] Adjust TPU test values (#1233) * [Tests] Adjust TPU test values * slow tests * remaining refs --- tests/test_scheduler_flax.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_scheduler_flax.py b/tests/test_scheduler_flax.py index d29a8bfcc2..7928939f2d 100644 --- a/tests/test_scheduler_flax.py +++ b/tests/test_scheduler_flax.py @@ -859,7 +859,7 @@ class FlaxPNDMSchedulerTest(FlaxSchedulerCommonTest): result_mean = jnp.mean(jnp.abs(sample)) if jax_device == "tpu": - assert abs(result_sum - 198.1542) < 1e-2 + assert abs(result_sum - 198.1275) < 1e-2 assert abs(result_mean - 0.2580) < 1e-3 else: assert abs(result_sum - 198.1318) < 1e-2 @@ -872,8 +872,8 @@ class FlaxPNDMSchedulerTest(FlaxSchedulerCommonTest): result_mean = jnp.mean(jnp.abs(sample)) if jax_device == "tpu": - assert abs(result_sum - 185.4352) < 1e-2 - assert abs(result_mean - 0.24145) < 1e-3 + assert abs(result_sum - 186.83226) < 1e-2 + assert abs(result_mean - 0.24327) < 1e-3 else: assert abs(result_sum - 186.9466) < 1e-2 assert abs(result_mean - 0.24342) < 1e-3 @@ -885,8 +885,8 @@ class FlaxPNDMSchedulerTest(FlaxSchedulerCommonTest): result_mean = jnp.mean(jnp.abs(sample)) if jax_device == "tpu": - assert abs(result_sum - 185.4352) < 1e-2 - assert abs(result_mean - 0.2414) < 1e-3 + assert abs(result_sum - 186.83226) < 1e-2 + assert abs(result_mean - 0.24327) < 1e-3 else: assert abs(result_sum - 186.9482) < 1e-2 assert abs(result_mean - 0.2434) < 1e-3 From a09d47532d791bec28926e21b3b73124175be595 Mon Sep 17 00:00:00 2001 From: apolinario Date: Thu, 10 Nov 2022 14:37:42 +0100 Subject: [PATCH 79/88] Add a reference to the name 'Sampler' (#1172) * Add a reference to the name 'Sampler' - Facilitate people that are familiar with the name samplers to understand that we call that schedulers - Better SEO if people are googling for samplers to find our library as well * Update README.md with a reference to 'Sampler' --- README.md | 2 +- docs/source/api/schedulers.mdx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5c7b911c2e..64cbd15aab 100644 --- a/README.md +++ b/README.md @@ -428,7 +428,7 @@ If you just want to play around with some web demos, you can try out the followi

**Schedulers**: Algorithm class for both **inference** and **training**. -The class provides functionality to compute previous image according to alpha, beta schedule as well as predict noise for training. +The class provides functionality to compute previous image according to alpha, beta schedule as well as predict noise for training. Also known as **Samplers**. *Examples*: [DDPM](https://arxiv.org/abs/2006.11239), [DDIM](https://arxiv.org/abs/2010.02502), [PNDM](https://arxiv.org/abs/2202.09778), [DEIS](https://arxiv.org/abs/2204.13902)

diff --git a/docs/source/api/schedulers.mdx b/docs/source/api/schedulers.mdx index 12575a5eca..7ed527bedf 100644 --- a/docs/source/api/schedulers.mdx +++ b/docs/source/api/schedulers.mdx @@ -16,7 +16,7 @@ Diffusers contains multiple pre-built schedule functions for the diffusion proce ## What is a scheduler? -The schedule functions, denoted *Schedulers* in the library take in the output of a trained model, a sample which the diffusion process is iterating on, and a timestep to return a denoised sample. +The schedule functions, denoted *Schedulers* in the library take in the output of a trained model, a sample which the diffusion process is iterating on, and a timestep to return a denoised sample. That's why schedulers may also be called *Samplers* in other diffusion models implementations. - Schedulers define the methodology for iteratively adding noise to an image or for updating a sample based on model outputs. - adding noise in different manners represent the algorithmic processes to train a diffusion model by adding noise to images. From 045157a46fb16a21a5f37a5f3f3ad710895b680b Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Thu, 10 Nov 2022 16:00:17 +0100 Subject: [PATCH 80/88] Fix Flax usage comments (#1211) * Fix Flax usage comments (they didn't work). * Spell out dtype * make style --- src/diffusers/pipeline_flax_utils.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/diffusers/pipeline_flax_utils.py b/src/diffusers/pipeline_flax_utils.py index 3963c80cd1..4c34e64f78 100644 --- a/src/diffusers/pipeline_flax_utils.py +++ b/src/diffusers/pipeline_flax_utils.py @@ -268,18 +268,27 @@ class FlaxDiffusionPipeline(ConfigMixin): >>> from diffusers import FlaxDiffusionPipeline >>> # Download pipeline from huggingface.co and cache. - >>> pipeline = FlaxDiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") + >>> # Requires to be logged in to Hugging Face hub, + >>> # see more in [the documentation](https://huggingface.co/docs/hub/security-tokens) + >>> pipeline, params = FlaxDiffusionPipeline.from_pretrained( + ... "runwayml/stable-diffusion-v1-5", + ... revision="bf16", + ... dtype=jnp.bfloat16, + ... ) - >>> # Download pipeline that requires an authorization token - >>> # For more information on access tokens, please refer to this section - >>> # of the documentation](https://huggingface.co/docs/hub/security-tokens) - >>> pipeline = FlaxDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") + >>> # Download pipeline, but use a different scheduler + >>> from diffusers import FlaxDPMSolverMultistepScheduler - >>> # Download pipeline, but overwrite scheduler - >>> from diffusers import LMSDiscreteScheduler + >>> model_id = "runwayml/stable-diffusion-v1-5" + >>> sched, sched_state = FlaxDPMSolverMultistepScheduler.from_config( + ... model_id, + ... subfolder="scheduler", + ... ) - >>> scheduler = LMSDiscreteScheduler.from_config("runwayml/stable-diffusion-v1-5", subfolder="scheduler") - >>> pipeline = FlaxDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", scheduler=scheduler) + >>> dpm_pipe, dpm_params = FlaxStableDiffusionPipeline.from_pretrained( + ... model_id, revision="bf16", dtype=jnp.bfloat16, scheduler=dpmpp + ... ) + >>> dpm_params["scheduler"] = dpmpp_state ``` """ cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) From 8171566163f0b197282786bf39de95c130eb5fa0 Mon Sep 17 00:00:00 2001 From: ruanrz <45003574+ruanrz@users.noreply.github.com> Date: Fri, 11 Nov 2022 19:28:20 +0800 Subject: [PATCH 81/88] [Docs] improve img2img example (#1193) update img2img example --- docs/source/using-diffusers/img2img.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/using-diffusers/img2img.mdx b/docs/source/using-diffusers/img2img.mdx index defefffe06..911d7bd76a 100644 --- a/docs/source/using-diffusers/img2img.mdx +++ b/docs/source/using-diffusers/img2img.mdx @@ -33,7 +33,7 @@ url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/st response = requests.get(url) init_image = Image.open(BytesIO(response.content)).convert("RGB") -init_image = init_image.resize((768, 512)) +init_image.thumbnail((768, 768)) prompt = "A fantasy landscape, trending on artstation" From 4c660d16d0c4c3b3b413f8d71b12831ccdb039af Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Sun, 13 Nov 2022 20:19:55 +0100 Subject: [PATCH 82/88] [Stable Diffusion] Fix padding / truncation (#1226) * [Stable Diffusion] Fix padding / truncation * finish --- .../pipeline_cycle_diffusion.py | 7 +-- .../pipeline_onnx_stable_diffusion.py | 8 +-- .../pipeline_onnx_stable_diffusion_img2img.py | 8 +-- .../pipeline_onnx_stable_diffusion_inpaint.py | 8 +-- .../pipeline_stable_diffusion.py | 7 +-- .../pipeline_stable_diffusion_img2img.py | 7 +-- .../pipeline_stable_diffusion_inpaint.py | 7 +-- ...ipeline_stable_diffusion_inpaint_legacy.py | 7 +-- .../stable_diffusion/test_stable_diffusion.py | 54 ++++++++++++++++++- 9 files changed, 88 insertions(+), 25 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index 50f519c77f..528dd33794 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -248,17 +248,18 @@ class CycleDiffusionPipeline(DiffusionPipeline): prompt, padding="max_length", max_length=self.tokenizer.model_max_length, + truncation=True, return_tensors="pt", ) text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="pt").input_ids - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + if not torch.equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]) logger.warning( "The following part of your input was truncated because CLIP can only handle sequences up to" f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] text_embeddings = self.text_encoder(text_input_ids.to(device))[0] # duplicate text embeddings for each generation per prompt, using mps friendly method diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py index d1e2704fce..eceefea874 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py @@ -114,17 +114,19 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline): prompt, padding="max_length", max_length=self.tokenizer.model_max_length, + truncation=True, return_tensors="np", ) text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="np").input_ids - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + if not np.array_equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]) logger.warning( "The following part of your input was truncated because CLIP can only handle sequences up to" f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py index a09dfe751f..483b5fd2d3 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py @@ -161,17 +161,19 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline): prompt, padding="max_length", max_length=self.tokenizer.model_max_length, + truncation=True, return_tensors="np", ) text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="np").input_ids - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + if not np.array_equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]) logger.warning( "The following part of your input was truncated because CLIP can only handle sequences up to" f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py index 6c226dd432..8e5c201319 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py @@ -175,17 +175,19 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline): prompt, padding="max_length", max_length=self.tokenizer.model_max_length, + truncation=True, return_tensors="np", ) text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="np").input_ids - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + if not np.array_equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]) logger.warning( "The following part of your input was truncated because CLIP can only handle sequences up to" f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] + text_embeddings = self.text_encoder(input_ids=text_input_ids.astype(np.int32))[0] text_embeddings = np.repeat(text_embeddings, num_images_per_prompt, axis=0) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index ed5246fa91..450fbbfb17 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -236,17 +236,18 @@ class StableDiffusionPipeline(DiffusionPipeline): prompt, padding="max_length", max_length=self.tokenizer.model_max_length, + truncation=True, return_tensors="pt", ) text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="pt").input_ids - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + if not torch.equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]) logger.warning( "The following part of your input was truncated because CLIP can only handle sequences up to" f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] text_embeddings = self.text_encoder(text_input_ids.to(device))[0] # duplicate text embeddings for each generation per prompt, using mps friendly method diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index b4ecf600b1..98c813eed1 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -244,17 +244,18 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): prompt, padding="max_length", max_length=self.tokenizer.model_max_length, + truncation=True, return_tensors="pt", ) text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="pt").input_ids - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + if not torch.equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]) logger.warning( "The following part of your input was truncated because CLIP can only handle sequences up to" f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] text_embeddings = self.text_encoder(text_input_ids.to(device))[0] # duplicate text embeddings for each generation per prompt, using mps friendly method diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index f42325261a..3f08f6edae 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -244,17 +244,18 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): prompt, padding="max_length", max_length=self.tokenizer.model_max_length, + truncation=True, return_tensors="pt", ) text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="pt").input_ids - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + if not torch.equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]) logger.warning( "The following part of your input was truncated because CLIP can only handle sequences up to" f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] text_embeddings = self.text_encoder(text_input_ids.to(device))[0] # duplicate text embeddings for each generation per prompt, using mps friendly method diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 89d40f7a79..612aa3c126 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -213,17 +213,18 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): prompt, padding="max_length", max_length=self.tokenizer.model_max_length, + truncation=True, return_tensors="pt", ) text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="max_length", return_tensors="pt").input_ids - if text_input_ids.shape[-1] > self.tokenizer.model_max_length: - removed_text = self.tokenizer.batch_decode(text_input_ids[:, self.tokenizer.model_max_length :]) + if not torch.equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]) logger.warning( "The following part of your input was truncated because CLIP can only handle sequences up to" f" {self.tokenizer.model_max_length} tokens: {removed_text}" ) - text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length] text_embeddings = self.text_encoder(text_input_ids.to(device))[0] # duplicate text embeddings for each generation per prompt, using mps friendly method diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 6e1071124c..87d238c869 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -33,9 +33,10 @@ from diffusers import ( UNet2DConditionModel, UNet2DModel, VQModel, + logging, ) from diffusers.utils import floats_tensor, load_numpy, slow, torch_device -from diffusers.utils.testing_utils import require_torch_gpu +from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer from ...test_pipelines_common import PipelineTesterMixin @@ -619,6 +620,57 @@ class StableDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase): assert image.shape == (1, 128, 128, 3) + def test_stable_diffusion_long_prompt(self): + unet = self.dummy_cond_unet + scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear") + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=None, + feature_extractor=self.dummy_extractor, + ) + sd_pipe = sd_pipe.to(torch_device) + sd_pipe.set_progress_bar_config(disable=None) + + do_classifier_free_guidance = True + negative_prompt = None + num_images_per_prompt = 1 + logger = logging.get_logger("diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion") + + prompt = 25 * "@" + with CaptureLogger(logger) as cap_logger_3: + text_embeddings_3 = sd_pipe._encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + + prompt = 100 * "@" + with CaptureLogger(logger) as cap_logger: + text_embeddings = sd_pipe._encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + + negative_prompt = "Hello" + with CaptureLogger(logger) as cap_logger_2: + text_embeddings_2 = sd_pipe._encode_prompt( + prompt, torch_device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + + assert text_embeddings_3.shape == text_embeddings_2.shape == text_embeddings.shape + assert text_embeddings.shape[1] == 77 + + assert cap_logger.out == cap_logger_2.out + # 100 - 77 + 1 (BOS token) + 1 (EOS token) = 25 + assert cap_logger.out.count("@") == 25 + assert cap_logger_3.out == "" + @slow @require_torch_gpu From b3c5e086e5cd41712b7627894f814edf5fe38647 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Sun, 13 Nov 2022 23:54:30 +0100 Subject: [PATCH 83/88] Finalize stable diffusion refactor (#1269) * finish * cleaner * more fixes * refactor * make fix copies * refactor cycle diffusion * finish * finish2 * Apply suggestions from code review --- .../pipeline_cycle_diffusion.py | 273 ++++++++++------- .../pipeline_stable_diffusion.py | 162 +++++----- .../pipeline_stable_diffusion_img2img.py | 230 +++++++++------ .../pipeline_stable_diffusion_inpaint.py | 279 +++++++++++------- ...ipeline_stable_diffusion_inpaint_legacy.py | 262 ++++++++++------ .../test_stable_diffusion_img2img.py | 47 ++- .../test_stable_diffusion_inpaint_legacy.py | 1 - 7 files changed, 778 insertions(+), 476 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py index 528dd33794..dfdb58de4d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py @@ -19,6 +19,7 @@ import numpy as np import torch import PIL +from diffusers.utils import is_accelerate_available from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict @@ -178,6 +179,7 @@ class CycleDiffusionPipeline(DiffusionPipeline): feature_extractor=feature_extractor, ) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. @@ -197,14 +199,33 @@ class CycleDiffusionPipeline(DiffusionPipeline): slice_size = self.unet.config.attention_head_dim // 2 self.unet.set_attention_slice(slice_size) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_attention_slicing def disable_attention_slicing(self): r""" Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go back to computing attention in one step. """ - # set slice_size = `None` to disable `set_attention_slice` + # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload + def enable_sequential_cpu_offload(self): + r""" + Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, + text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a + `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. + """ + if is_accelerate_available(): + from accelerate import cpu_offload + else: + raise ImportError("Please install accelerate via `pip install accelerate`") + + device = torch.device("cuda") + + for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: + if cpu_offloaded_model is not None: + cpu_offload(cpu_offloaded_model, device) + @property # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device def _execution_device(self): @@ -224,6 +245,26 @@ class CycleDiffusionPipeline(DiffusionPipeline): return torch.device(module._hf_hook.execution_device) return self.device + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.unet.set_use_memory_efficient_attention_xformers(True) + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.unet.set_use_memory_efficient_attention_xformers(False) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): r""" @@ -310,6 +351,106 @@ class CycleDiffusionPipeline(DiffusionPipeline): return text_embeddings + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.check_inputs + def check_inputs(self, prompt, strength, callback_steps): + if not isinstance(prompt, str) and not isinstance(prompt, list): + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if strength < 0 or strength > 1: + raise ValueError(f"The value of strength should in [1.0, 1.0] but is {strength}") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs + def prepare_extra_step_kwargs(self, generator, eta): + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + # check if the scheduler accepts generator + accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) + if accepts_generator: + extra_step_kwargs["generator"] = generator + return extra_step_kwargs + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker + def run_safety_checker(self, image, device, dtype): + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + ) + else: + has_nsfw_concept = None + return image, has_nsfw_concept + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents + def decode_latents(self, latents): + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + image = (image / 2 + 0.5).clamp(0, 1) + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + return image + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps + def get_timesteps(self, num_inference_steps, strength, device): + # get the original timestep using init_timestep + offset = self.scheduler.config.get("steps_offset", 0) + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + + t_start = max(num_inference_steps - init_timestep + offset, 0) + timesteps = self.scheduler.timesteps[t_start:] + + return timesteps + + def prepare_latents(self, init_image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None): + init_image = init_image.to(device=device, dtype=dtype) + init_latent_dist = self.vae.encode(init_image).latent_dist + init_latents = init_latent_dist.sample(generator=generator) + init_latents = 0.18215 * init_latents + + if batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] == 0: + # expand init_latents for batch_size + deprecation_message = ( + f"You have passed {batch_size} text prompts (`prompt`), but only {init_latents.shape[0]} initial" + " images (`init_image`). Initial images are now duplicating to match the number of text prompts. Note" + " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" + " your script to pass as many init images as text prompts to suppress this warning." + ) + deprecate("len(prompt) != len(init_image)", "1.0.0", deprecation_message, standard_warn=False) + additional_image_per_prompt = batch_size // init_latents.shape[0] + init_latents = torch.cat([init_latents] * additional_image_per_prompt * num_images_per_prompt, dim=0) + elif batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] != 0: + raise ValueError( + f"Cannot duplicate `init_image` of batch size {init_latents.shape[0]} to {batch_size} text prompts." + ) + else: + init_latents = torch.cat([init_latents] * num_images_per_prompt, dim=0) + + # add noise to latents using the timestep + noise = torch.randn(init_latents.shape, generator=generator, device=device, dtype=dtype) + + # get latents + clean_latents = init_latents + init_latents = self.scheduler.add_noise(init_latents, noise, timestep) + latents = init_latents + + return latents, clean_latents + @torch.no_grad() def __call__( self, @@ -384,112 +525,43 @@ class CycleDiffusionPipeline(DiffusionPipeline): list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" (nsfw) content, according to the `safety_checker`. """ - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if batch_size != 1: - raise ValueError( - "At the moment only `batch_size=1` is supported for prompts, but you seem to have passed multiple" - f" prompts: {prompt}. Please make sure to pass only a single prompt." - ) - - if strength < 0 or strength > 1: - raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) - - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - - if isinstance(init_image, PIL.Image.Image): - init_image = preprocess(init_image) + # 1. Check inputs + self.check_inputs(prompt, strength, callback_steps) + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) device = self._execution_device - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 + # 3. Encode input prompt text_embeddings = self._encode_prompt(prompt, device, num_images_per_prompt, do_classifier_free_guidance, None) source_text_embeddings = self._encode_prompt( source_prompt, device, num_images_per_prompt, do_classifier_free_guidance, None ) - # encode the init image into latents and scale the latents - latents_dtype = text_embeddings.dtype - init_image = init_image.to(device=self.device, dtype=latents_dtype) - init_latent_dist = self.vae.encode(init_image).latent_dist - init_latents = init_latent_dist.sample(generator=generator) - init_latents = 0.18215 * init_latents + # 4. Preprocess image + if isinstance(init_image, PIL.Image.Image): + init_image = preprocess(init_image) - if isinstance(prompt, str): - prompt = [prompt] - if len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] == 0: - # expand init_latents for batch_size - deprecation_message = ( - f"You have passed {len(prompt)} text prompts (`prompt`), but only {init_latents.shape[0]} initial" - " images (`init_image`). Initial images are now duplicating to match the number of text prompts. Note" - " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" - " your script to pass as many init images as text prompts to suppress this warning." - ) - deprecate("len(prompt) != len(init_image)", "1.0.0", deprecation_message, standard_warn=False) - additional_image_per_prompt = len(prompt) // init_latents.shape[0] - init_latents = torch.cat([init_latents] * additional_image_per_prompt * num_images_per_prompt, dim=0) - elif len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] != 0: - raise ValueError( - f"Cannot duplicate `init_image` of batch size {init_latents.shape[0]} to {len(prompt)} text prompts." - ) - else: - init_latents = torch.cat([init_latents] * num_images_per_prompt, dim=0) + # 5. Prepare timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.get_timesteps(num_inference_steps, strength, device) + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - # get the original timestep using init_timestep - offset = self.scheduler.config.get("steps_offset", 0) - init_timestep = int(num_inference_steps * strength) + offset - init_timestep = min(init_timestep, num_inference_steps) + # 6. Prepare latent variables + latents, clean_latents = self.prepare_latents( + init_image, latent_timestep, batch_size, num_images_per_prompt, text_embeddings.dtype, device, generator + ) + source_latents = latents - timesteps = self.scheduler.timesteps[-init_timestep] - timesteps = torch.tensor([timesteps] * batch_size * num_images_per_prompt, device=self.device) - - # add noise to latents using the timesteps - noise = torch.randn(init_latents.shape, generator=generator, device=self.device, dtype=latents_dtype) - clean_latents = init_latents - init_latents = self.scheduler.add_noise(init_latents, noise, timesteps) - - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - - if not (accepts_eta and (0 < eta <= 1)): - raise ValueError( - "Currently, only the DDIM scheduler is supported. Please make sure that `pipeline.scheduler` is of" - f" type {DDIMScheduler.__class__} and not {self.scheduler.__class__}." - ) - - extra_step_kwargs["eta"] = eta - - latents = init_latents - source_latents = init_latents - - t_start = max(num_inference_steps - init_timestep + offset, 0) - - # Some schedulers like PNDM have timesteps as arrays - # It's more optimized to move all timesteps to correct device beforehand - timesteps = self.scheduler.timesteps[t_start:].to(self.device) + # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + generator = extra_step_kwargs.pop("generator", None) + # 8. Denoising loop for i, t in enumerate(self.progress_bar(timesteps)): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) @@ -551,22 +623,13 @@ class CycleDiffusionPipeline(DiffusionPipeline): if callback is not None and i % callback_steps == 0: callback(i, t, latents) - latents = 1 / 0.18215 * latents - image = self.vae.decode(latents).sample + # 9. Post-processing + image = self.decode_latents(latents) - image = (image / 2 + 0.5).clamp(0, 1) - image = image.cpu().permute(0, 2, 3, 1).numpy() - - if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( - self.device - ) - image, has_nsfw_concept = self.safety_checker( - images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) - ) - else: - has_nsfw_concept = None + # 10. Run safety checker + image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) + # 11. Convert to PIL if output_type == "pil": image = self.numpy_to_pil(image) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 450fbbfb17..e635347293 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -298,6 +298,73 @@ class StableDiffusionPipeline(DiffusionPipeline): return text_embeddings + def run_safety_checker(self, image, device, dtype): + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + ) + else: + has_nsfw_concept = None + return image, has_nsfw_concept + + def decode_latents(self, latents): + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + image = (image / 2 + 0.5).clamp(0, 1) + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + return image + + def prepare_extra_step_kwargs(self, generator, eta): + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + # check if the scheduler accepts generator + accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) + if accepts_generator: + extra_step_kwargs["generator"] = generator + return extra_step_kwargs + + def check_inputs(self, prompt, height, width, callback_steps): + if not isinstance(prompt, str) and not isinstance(prompt, list): + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None): + shape = (batch_size, num_channels_latents, height // 8, width // 8) + if latents is None: + if device.type == "mps": + # randn does not work reproducibly on mps + latents = torch.randn(shape, generator=generator, device="cpu", dtype=dtype).to(device) + else: + latents = torch.randn(shape, generator=generator, device=device, dtype=dtype) + else: + if latents.shape != shape: + raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}") + latents = latents.to(device) + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + return latents + @torch.no_grad() def __call__( self, @@ -371,75 +438,45 @@ class StableDiffusionPipeline(DiffusionPipeline): list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" (nsfw) content, according to the `safety_checker`. """ - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - if height % 8 != 0 or width % 8 != 0: - raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) + # 1. Check inputs. Raise error if not correct + self.check_inputs(prompt, height, width, callback_steps) + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) device = self._execution_device - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 + # 3. Encode input prompt text_embeddings = self._encode_prompt( prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt ) - # Unlike in other pipelines, latents need to be generated in the target device - # for 1-to-1 results reproducibility with the CompVis implementation. - # However this currently doesn't work in `mps`. - - # get the initial random noise unless the user supplied it - latents_shape = (batch_size * num_images_per_prompt, self.unet.in_channels, height // 8, width // 8) - latents_dtype = text_embeddings.dtype - if latents is None: - if device.type == "mps": - # randn does not work reproducibly on mps - latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to(device) - else: - latents = torch.randn(latents_shape, generator=generator, device=device, dtype=latents_dtype) - else: - if latents.shape != latents_shape: - raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") - latents = latents.to(device) - - # set timesteps and move to the correct device + # 4. Prepare timesteps self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps_tensor = self.scheduler.timesteps + timesteps = self.scheduler.timesteps - # scale the initial noise by the standard deviation required by the scheduler - latents = latents * self.scheduler.init_noise_sigma + # 5. Prepare latent variables + num_channels_latents = self.unet.in_channels + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + text_embeddings.dtype, + device, + generator, + latents, + ) - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - if accepts_eta: - extra_step_kwargs["eta"] = eta + # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - # check if the scheduler accepts generator - accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) - if accepts_generator: - extra_step_kwargs["generator"] = generator - - for i, t in enumerate(self.progress_bar(timesteps_tensor)): + # 7. Denoising loop + for i, t in enumerate(self.progress_bar(timesteps)): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) @@ -459,22 +496,13 @@ class StableDiffusionPipeline(DiffusionPipeline): if callback is not None and i % callback_steps == 0: callback(i, t, latents) - latents = 1 / 0.18215 * latents - image = self.vae.decode(latents).sample + # 8. Post-processing + image = self.decode_latents(latents) - image = (image / 2 + 0.5).clamp(0, 1) - - # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 - image = image.cpu().permute(0, 2, 3, 1).float().numpy() - - if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) - image, has_nsfw_concept = self.safety_checker( - images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) - ) - else: - has_nsfw_concept = None + # 9. Run safety checker + image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) + # 10. Convert to PIL if output_type == "pil": image = self.numpy_to_pil(image) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 98c813eed1..9df800dc2d 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -27,6 +27,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel from ...pipeline_utils import DiffusionPipeline from ...schedulers import ( DDIMScheduler, + DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, LMSDiscreteScheduler, @@ -78,6 +79,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.__init__ def __init__( self, vae: AutoencoderKL, @@ -85,7 +87,12 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): tokenizer: CLIPTokenizer, unet: UNet2DConditionModel, scheduler: Union[ - DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler, EulerDiscreteScheduler, EulerAncestralDiscreteScheduler + DDIMScheduler, + PNDMScheduler, + LMSDiscreteScheduler, + EulerDiscreteScheduler, + EulerAncestralDiscreteScheduler, + DPMSolverMultistepScheduler, ], safety_checker: StableDiffusionSafetyChecker, feature_extractor: CLIPFeatureExtractor, @@ -139,6 +146,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): feature_extractor=feature_extractor, ) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. @@ -158,14 +166,16 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): slice_size = self.unet.config.attention_head_dim // 2 self.unet.set_attention_slice(slice_size) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_attention_slicing def disable_attention_slicing(self): r""" Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go back to computing attention in one step. """ - # set slice_size = `None` to disable `set_attention_slice` + # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload def enable_sequential_cpu_offload(self): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, @@ -202,6 +212,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): return torch.device(module._hf_hook.execution_device) return self.device + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention def enable_xformers_memory_efficient_attention(self): r""" Enable memory efficient attention as implemented in xformers. @@ -214,6 +225,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): """ self.unet.set_use_memory_efficient_attention_xformers(True) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention def disable_xformers_memory_efficient_attention(self): r""" Disable memory efficient attention as implemented in xformers. @@ -306,6 +318,103 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): return text_embeddings + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker + def run_safety_checker(self, image, device, dtype): + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + ) + else: + has_nsfw_concept = None + return image, has_nsfw_concept + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents + def decode_latents(self, latents): + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + image = (image / 2 + 0.5).clamp(0, 1) + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + return image + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs + def prepare_extra_step_kwargs(self, generator, eta): + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + # check if the scheduler accepts generator + accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) + if accepts_generator: + extra_step_kwargs["generator"] = generator + return extra_step_kwargs + + def check_inputs(self, prompt, strength, callback_steps): + if not isinstance(prompt, str) and not isinstance(prompt, list): + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if strength < 0 or strength > 1: + raise ValueError(f"The value of strength should in [1.0, 1.0] but is {strength}") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + def get_timesteps(self, num_inference_steps, strength, device): + # get the original timestep using init_timestep + offset = self.scheduler.config.get("steps_offset", 0) + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + + t_start = max(num_inference_steps - init_timestep + offset, 0) + timesteps = self.scheduler.timesteps[t_start:] + + return timesteps + + def prepare_latents(self, init_image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None): + init_image = init_image.to(device=device, dtype=dtype) + init_latent_dist = self.vae.encode(init_image).latent_dist + init_latents = init_latent_dist.sample(generator=generator) + init_latents = 0.18215 * init_latents + + if batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] == 0: + # expand init_latents for batch_size + deprecation_message = ( + f"You have passed {batch_size} text prompts (`prompt`), but only {init_latents.shape[0]} initial" + " images (`init_image`). Initial images are now duplicating to match the number of text prompts. Note" + " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" + " your script to pass as many init images as text prompts to suppress this warning." + ) + deprecate("len(prompt) != len(init_image)", "1.0.0", deprecation_message, standard_warn=False) + additional_image_per_prompt = batch_size // init_latents.shape[0] + init_latents = torch.cat([init_latents] * additional_image_per_prompt * num_images_per_prompt, dim=0) + elif batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] != 0: + raise ValueError( + f"Cannot duplicate `init_image` of batch size {init_latents.shape[0]} to {batch_size} text prompts." + ) + else: + init_latents = torch.cat([init_latents] * num_images_per_prompt, dim=0) + + # add noise to latents using the timesteps + noise = torch.randn(init_latents.shape, generator=generator, device=device, dtype=dtype) + + # get latents + init_latents = self.scheduler.add_noise(init_latents, noise, timestep) + latents = init_latents + + return latents + @torch.no_grad() def __call__( self, @@ -379,102 +488,40 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" (nsfw) content, according to the `safety_checker`. """ - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if strength < 0 or strength > 1: - raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) + # 1. Check inputs + self.check_inputs(prompt, strength, callback_steps) + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) device = self._execution_device - - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - - if isinstance(init_image, PIL.Image.Image): - init_image = preprocess(init_image) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 + # 3. Encode input prompt text_embeddings = self._encode_prompt( prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt ) - # encode the init image into latents and scale the latents - latents_dtype = text_embeddings.dtype - init_image = init_image.to(device=device, dtype=latents_dtype) - init_latent_dist = self.vae.encode(init_image).latent_dist - init_latents = init_latent_dist.sample(generator=generator) - init_latents = 0.18215 * init_latents + # 4. Preprocess image + if isinstance(init_image, PIL.Image.Image): + init_image = preprocess(init_image) - if isinstance(prompt, str): - prompt = [prompt] - if len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] == 0: - # expand init_latents for batch_size - deprecation_message = ( - f"You have passed {len(prompt)} text prompts (`prompt`), but only {init_latents.shape[0]} initial" - " images (`init_image`). Initial images are now duplicating to match the number of text prompts. Note" - " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" - " your script to pass as many init images as text prompts to suppress this warning." - ) - deprecate("len(prompt) != len(init_image)", "1.0.0", deprecation_message, standard_warn=False) - additional_image_per_prompt = len(prompt) // init_latents.shape[0] - init_latents = torch.cat([init_latents] * additional_image_per_prompt * num_images_per_prompt, dim=0) - elif len(prompt) > init_latents.shape[0] and len(prompt) % init_latents.shape[0] != 0: - raise ValueError( - f"Cannot duplicate `init_image` of batch size {init_latents.shape[0]} to {len(prompt)} text prompts." - ) - else: - init_latents = torch.cat([init_latents] * num_images_per_prompt, dim=0) + # 5. set timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.get_timesteps(num_inference_steps, strength, device) + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - # get the original timestep using init_timestep - offset = self.scheduler.config.get("steps_offset", 0) - init_timestep = int(num_inference_steps * strength) + offset - init_timestep = min(init_timestep, num_inference_steps) + # 6. Prepare latent variables + latents = self.prepare_latents( + init_image, latent_timestep, batch_size, num_images_per_prompt, text_embeddings.dtype, device, generator + ) - timesteps = self.scheduler.timesteps[-init_timestep] - timesteps = torch.tensor([timesteps] * batch_size * num_images_per_prompt, device=device) - - # add noise to latents using the timesteps - noise = torch.randn(init_latents.shape, generator=generator, device=device, dtype=latents_dtype) - init_latents = self.scheduler.add_noise(init_latents, noise, timesteps) - - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - if accepts_eta: - extra_step_kwargs["eta"] = eta - - # check if the scheduler accepts generator - accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) - if accepts_generator: - extra_step_kwargs["generator"] = generator - - latents = init_latents - - t_start = max(num_inference_steps - init_timestep + offset, 0) - - # Some schedulers like PNDM have timesteps as arrays - # It's more optimized to move all timesteps to correct device beforehand - timesteps = self.scheduler.timesteps[t_start:].to(device) + # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + # 8. Denoising loop for i, t in enumerate(self.progress_bar(timesteps)): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents @@ -495,20 +542,13 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): if callback is not None and i % callback_steps == 0: callback(i, t, latents) - latents = 1 / 0.18215 * latents - image = self.vae.decode(latents).sample + # 9. Post-processing + image = self.decode_latents(latents) - image = (image / 2 + 0.5).clamp(0, 1) - image = image.cpu().permute(0, 2, 3, 1).numpy() - - if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) - image, has_nsfw_concept = self.safety_checker( - images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) - ) - else: - has_nsfw_concept = None + # 10. Run safety checker + image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) + # 11. Convert to PIL if output_type == "pil": image = self.numpy_to_pil(image) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 3f08f6edae..332eb2ca77 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -139,6 +139,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): feature_extractor=feature_extractor, ) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. @@ -158,6 +159,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): slice_size = self.unet.config.attention_head_dim // 2 self.unet.set_attention_slice(slice_size) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_attention_slicing def disable_attention_slicing(self): r""" Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go @@ -166,6 +168,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload def enable_sequential_cpu_offload(self): r""" Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, @@ -183,6 +186,26 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): if cpu_offloaded_model is not None: cpu_offload(cpu_offloaded_model, device) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.unet.set_use_memory_efficient_attention_xformers(True) + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.unet.set_use_memory_efficient_attention_xformers(False) + @property # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device def _execution_device(self): @@ -202,24 +225,6 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): return torch.device(module._hf_hook.execution_device) return self.device - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt): r""" @@ -306,6 +311,106 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): return text_embeddings + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker + def run_safety_checker(self, image, device, dtype): + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + ) + else: + has_nsfw_concept = None + return image, has_nsfw_concept + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs + def prepare_extra_step_kwargs(self, generator, eta): + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + # check if the scheduler accepts generator + accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) + if accepts_generator: + extra_step_kwargs["generator"] = generator + return extra_step_kwargs + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents + def decode_latents(self, latents): + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + image = (image / 2 + 0.5).clamp(0, 1) + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + return image + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.check_inputs + def check_inputs(self, prompt, height, width, callback_steps): + if not isinstance(prompt, str) and not isinstance(prompt, list): + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents + def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None): + shape = (batch_size, num_channels_latents, height // 8, width // 8) + if latents is None: + if device.type == "mps": + # randn does not work reproducibly on mps + latents = torch.randn(shape, generator=generator, device="cpu", dtype=dtype).to(device) + else: + latents = torch.randn(shape, generator=generator, device=device, dtype=dtype) + else: + if latents.shape != shape: + raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}") + latents = latents.to(device) + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + return latents + + def prepare_mask_latents( + self, mask, masked_image, batch_size, height, width, dtype, device, generator, do_classifier_free_guidance + ): + # resize the mask to latents shape as we concatenate the mask to the latents + # we do that before converting to dtype to avoid breaking in case we're using cpu_offload + # and half precision + mask = torch.nn.functional.interpolate(mask, size=(height // 8, width // 8)) + mask = mask.to(device=device, dtype=dtype) + + masked_image = masked_image.to(device=device, dtype=dtype) + + # encode the mask image into latents space so we can concatenate it to the latents + masked_image_latents = self.vae.encode(masked_image).latent_dist.sample(generator=generator) + masked_image_latents = 0.18215 * masked_image_latents + + # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method + mask = mask.repeat(batch_size, 1, 1, 1) + masked_image_latents = masked_image_latents.repeat(batch_size, 1, 1, 1) + + mask = torch.cat([mask] * 2) if do_classifier_free_guidance else mask + masked_image_latents = ( + torch.cat([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents + ) + + # aligning device to prevent device errors when concating it with the latent model input + masked_image_latents = masked_image_latents.to(device=device, dtype=dtype) + return mask, masked_image_latents + @torch.no_grad() def __call__( self, @@ -390,83 +495,59 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): (nsfw) content, according to the `safety_checker`. """ - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if height % 8 != 0 or width % 8 != 0: - raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) + # 1. Check inputs + self.check_inputs(prompt, height, width, callback_steps) + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) device = self._execution_device - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 + # 3. Encode input prompt text_embeddings = self._encode_prompt( prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt ) - # get the initial random noise unless the user supplied it - # Unlike in other pipelines, latents need to be generated in the target device - # for 1-to-1 results reproducibility with the CompVis implementation. - # However this currently doesn't work in `mps`. + # 4. Preprocess mask and image + if isinstance(image, PIL.Image.Image) and isinstance(mask_image, PIL.Image.Image): + mask, masked_image = prepare_mask_and_masked_image(image, mask_image) + + # 5. set timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps_tensor = self.scheduler.timesteps + + # 6. Prepare latent variables num_channels_latents = self.vae.config.latent_channels - latents_shape = (batch_size * num_images_per_prompt, num_channels_latents, height // 8, width // 8) - latents_dtype = text_embeddings.dtype - if latents is None: - if device.type == "mps": - # randn does not exist on mps - latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype).to(device) - else: - latents = torch.randn(latents_shape, generator=generator, device=device, dtype=latents_dtype) - else: - if latents.shape != latents_shape: - raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") - latents = latents.to(device) - - # prepare mask and masked_image - mask, masked_image = prepare_mask_and_masked_image(image, mask_image) - - # resize the mask to latents shape as we concatenate the mask to the latents - # we do that before converting to dtype to avoid breaking in case we're using cpu_offload - # and half precision - mask = torch.nn.functional.interpolate(mask, size=(height // 8, width // 8)) - mask = mask.to(device=device, dtype=text_embeddings.dtype) - - masked_image = masked_image.to(device=device, dtype=text_embeddings.dtype) - - # encode the mask image into latents space so we can concatenate it to the latents - masked_image_latents = self.vae.encode(masked_image).latent_dist.sample(generator=generator) - masked_image_latents = 0.18215 * masked_image_latents - - # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method - mask = mask.repeat(batch_size * num_images_per_prompt, 1, 1, 1) - masked_image_latents = masked_image_latents.repeat(batch_size * num_images_per_prompt, 1, 1, 1) - - mask = torch.cat([mask] * 2) if do_classifier_free_guidance else mask - masked_image_latents = ( - torch.cat([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + text_embeddings.dtype, + device, + generator, + latents, ) - # aligning device to prevent device errors when concating it with the latent model input - masked_image_latents = masked_image_latents.to(device=device, dtype=text_embeddings.dtype) + # 7. Prepare mask latent variables + mask, masked_image_latents = self.prepare_mask_latents( + mask, + masked_image, + batch_size, + height, + width, + text_embeddings.dtype, + device, + generator, + do_classifier_free_guidance, + ) + # 8. Check that sizes of mask, masked image and latents match num_channels_mask = mask.shape[1] num_channels_masked_image = masked_image_latents.shape[1] - if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels: raise ValueError( f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects" @@ -476,27 +557,10 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): " `pipeline.unet` or your `mask_image` or `image` input." ) - # set timesteps and move to the correct device - self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps_tensor = self.scheduler.timesteps - - # scale the initial noise by the standard deviation required by the scheduler - latents = latents * self.scheduler.init_noise_sigma - - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - if accepts_eta: - extra_step_kwargs["eta"] = eta - - # check if the scheduler accepts generator - accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) - if accepts_generator: - extra_step_kwargs["generator"] = generator + # 9. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + # 10. Denoising loop for i, t in enumerate(self.progress_bar(timesteps_tensor)): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents @@ -521,22 +585,13 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): if callback is not None and i % callback_steps == 0: callback(i, t, latents) - latents = 1 / 0.18215 * latents - image = self.vae.decode(latents).sample + # 11. Post-processing + image = self.decode_latents(latents) - image = (image / 2 + 0.5).clamp(0, 1) - - # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 - image = image.cpu().permute(0, 2, 3, 1).float().numpy() - - if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) - image, has_nsfw_concept = self.safety_checker( - images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) - ) - else: - has_nsfw_concept = None + # 12. Run safety checker + image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) + # 13. Convert to PIL if output_type == "pil": image = self.numpy_to_pil(image) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py index 612aa3c126..86d879eaa8 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py @@ -19,13 +19,20 @@ import numpy as np import torch import PIL -from tqdm.auto import tqdm +from diffusers.utils import is_accelerate_available from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ...configuration_utils import FrozenDict from ...models import AutoencoderKL, UNet2DConditionModel from ...pipeline_utils import DiffusionPipeline -from ...schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler +from ...schedulers import ( + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + LMSDiscreteScheduler, + PNDMScheduler, +) from ...utils import deprecate, logging from . import StableDiffusionPipelineOutput from .safety_checker import StableDiffusionSafetyChecker @@ -85,17 +92,26 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.__init__ def __init__( self, vae: AutoencoderKL, text_encoder: CLIPTextModel, tokenizer: CLIPTokenizer, unet: UNet2DConditionModel, - scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], + scheduler: Union[ + DDIMScheduler, + PNDMScheduler, + LMSDiscreteScheduler, + EulerDiscreteScheduler, + EulerAncestralDiscreteScheduler, + DPMSolverMultistepScheduler, + ], safety_checker: StableDiffusionSafetyChecker, feature_extractor: CLIPFeatureExtractor, ): super().__init__() + if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: deprecation_message = ( f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" @@ -143,6 +159,7 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): feature_extractor=feature_extractor, ) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_attention_slicing def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" Enable sliced attention computation. @@ -162,14 +179,53 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): slice_size = self.unet.config.attention_head_dim // 2 self.unet.set_attention_slice(slice_size) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_attention_slicing def disable_attention_slicing(self): r""" Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go back to computing attention in one step. """ - # set slice_size = `None` to disable `set_attention_slice` + # set slice_size = `None` to disable `attention slicing` self.enable_attention_slicing(None) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_sequential_cpu_offload + def enable_sequential_cpu_offload(self): + r""" + Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet, + text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a + `torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called. + """ + if is_accelerate_available(): + from accelerate import cpu_offload + else: + raise ImportError("Please install accelerate via `pip install accelerate`") + + device = torch.device("cuda") + + for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae, self.safety_checker]: + if cpu_offloaded_model is not None: + cpu_offload(cpu_offloaded_model, device) + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_xformers_memory_efficient_attention + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.unet.set_use_memory_efficient_attention_xformers(True) + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_xformers_memory_efficient_attention + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.unet.set_use_memory_efficient_attention_xformers(False) + @property # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device def _execution_device(self): @@ -275,6 +331,88 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): return text_embeddings + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker + def run_safety_checker(self, image, device, dtype): + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(device) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + ) + else: + has_nsfw_concept = None + return image, has_nsfw_concept + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents + def decode_latents(self, latents): + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + image = (image / 2 + 0.5).clamp(0, 1) + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + return image + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs + def prepare_extra_step_kwargs(self, generator, eta): + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + # check if the scheduler accepts generator + accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) + if accepts_generator: + extra_step_kwargs["generator"] = generator + return extra_step_kwargs + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.check_inputs + def check_inputs(self, prompt, strength, callback_steps): + if not isinstance(prompt, str) and not isinstance(prompt, list): + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if strength < 0 or strength > 1: + raise ValueError(f"The value of strength should in [1.0, 1.0] but is {strength}") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline.get_timesteps + def get_timesteps(self, num_inference_steps, strength, device): + # get the original timestep using init_timestep + offset = self.scheduler.config.get("steps_offset", 0) + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + + t_start = max(num_inference_steps - init_timestep + offset, 0) + timesteps = self.scheduler.timesteps[t_start:] + + return timesteps + + def prepare_latents(self, init_image, timestep, batch_size, num_images_per_prompt, dtype, device, generator): + init_image = init_image.to(device=self.device, dtype=dtype) + init_latent_dist = self.vae.encode(init_image).latent_dist + init_latents = init_latent_dist.sample(generator=generator) + init_latents = 0.18215 * init_latents + + # Expand init_latents for batch_size and num_images_per_prompt + init_latents = torch.cat([init_latents] * batch_size * num_images_per_prompt, dim=0) + init_latents_orig = init_latents + + # add noise to latents using the timesteps + noise = torch.randn(init_latents.shape, generator=generator, device=self.device, dtype=dtype) + init_latents = self.scheduler.add_noise(init_latents, noise, timestep) + latents = init_latents + return latents, init_latents_orig, noise + @torch.no_grad() def __call__( self, @@ -353,98 +491,49 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" (nsfw) content, according to the `safety_checker`. """ - if isinstance(prompt, str): - batch_size = 1 - elif isinstance(prompt, list): - batch_size = len(prompt) - else: - raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") - - if strength < 0 or strength > 1: - raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}") - - if (callback_steps is None) or ( - callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) - ): - raise ValueError( - f"`callback_steps` has to be a positive integer but is {callback_steps} of type" - f" {type(callback_steps)}." - ) + # 1. Check inputs + self.check_inputs(prompt, strength, callback_steps) + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) device = self._execution_device - - # set timesteps - self.scheduler.set_timesteps(num_inference_steps) - - # preprocess image - if not isinstance(init_image, torch.FloatTensor): - init_image = preprocess_image(init_image) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 + # 3. Encode input prompt text_embeddings = self._encode_prompt( prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt ) - # encode the init image into latents and scale the latents - latents_dtype = text_embeddings.dtype - init_image = init_image.to(device=self.device, dtype=latents_dtype) - init_latent_dist = self.vae.encode(init_image).latent_dist - init_latents = init_latent_dist.sample(generator=generator) - init_latents = 0.18215 * init_latents + # 4. Preprocess image and mask + if not isinstance(init_image, torch.FloatTensor): + init_image = preprocess_image(init_image) - # Expand init_latents for batch_size and num_images_per_prompt - init_latents = torch.cat([init_latents] * batch_size * num_images_per_prompt, dim=0) - init_latents_orig = init_latents - - # preprocess mask if not isinstance(mask_image, torch.FloatTensor): mask_image = preprocess_mask(mask_image) - mask_image = mask_image.to(device=self.device, dtype=latents_dtype) - mask = torch.cat([mask_image] * batch_size * num_images_per_prompt) - # check sizes - if not mask.shape == init_latents.shape: - raise ValueError("The mask and init_image should be the same size!") + # 5. set timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.get_timesteps(num_inference_steps, strength, device) + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - # get the original timestep using init_timestep - offset = self.scheduler.config.get("steps_offset", 0) - init_timestep = int(num_inference_steps * strength) + offset - init_timestep = min(init_timestep, num_inference_steps) + # 6. Prepare latent variables + # encode the init image into latents and scale the latents + latents, init_latents_orig, noise = self.prepare_latents( + init_image, latent_timestep, batch_size, num_images_per_prompt, text_embeddings.dtype, device, generator + ) - timesteps = self.scheduler.timesteps[-init_timestep] - timesteps = torch.tensor([timesteps] * batch_size * num_images_per_prompt, device=self.device) + # 7. Prepare mask latent + mask = mask_image.to(device=self.device, dtype=latents.dtype) + mask = torch.cat([mask] * batch_size * num_images_per_prompt) - # add noise to latents using the timesteps - noise = torch.randn(init_latents.shape, generator=generator, device=self.device, dtype=latents_dtype) - init_latents = self.scheduler.add_noise(init_latents, noise, timesteps) + # 8. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature - # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. - # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 - # and should be between [0, 1] - accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) - extra_step_kwargs = {} - if accepts_eta: - extra_step_kwargs["eta"] = eta - - # check if the scheduler accepts generator - accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) - if accepts_generator: - extra_step_kwargs["generator"] = generator - - latents = init_latents - - t_start = max(num_inference_steps - init_timestep + offset, 0) - - # Some schedulers like PNDM have timesteps as arrays - # It's more optimized to move all timesteps to correct device beforehand - timesteps = self.scheduler.timesteps[t_start:].to(self.device) - - for i, t in tqdm(enumerate(timesteps)): + # 9. Denoising loop + for i, t in enumerate(self.progress_bar(timesteps)): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) @@ -468,22 +557,13 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): if callback is not None and i % callback_steps == 0: callback(i, t, latents) - latents = 1 / 0.18215 * latents - image = self.vae.decode(latents).sample + # 10. Post-processing + image = self.decode_latents(latents) - image = (image / 2 + 0.5).clamp(0, 1) - image = image.cpu().permute(0, 2, 3, 1).numpy() - - if self.safety_checker is not None: - safety_checker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to( - self.device - ) - image, has_nsfw_concept = self.safety_checker( - images=image, clip_input=safety_checker_input.pixel_values.to(text_embeddings.dtype) - ) - else: - has_nsfw_concept = None + # 11. Run safety checker + image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) + # 12. Convert to PIL if output_type == "pil": image = self.numpy_to_pil(image) diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 6d5c6feab5..3c0fa8aa81 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -22,6 +22,7 @@ import torch from diffusers import ( AutoencoderKL, + DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler, StableDiffusionImg2ImgPipeline, @@ -479,7 +480,7 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): ) init_image = init_image.resize((768, 512)) expected_image = load_numpy( - "https://huggingface.co/datasets/lewington/expected-images/resolve/main/fantasy_landscape.npy" + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/fantasy_landscape.npy" ) model_id = "CompVis/stable-diffusion-v1-4" @@ -506,7 +507,7 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): assert image.shape == (512, 768, 3) # img2img is flaky across GPUs even in fp32, so using MAE here - assert np.abs(expected_image - image).mean() < 1e-3 + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_img2img_pipeline_k_lms(self): init_image = load_image( @@ -515,7 +516,7 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): ) init_image = init_image.resize((768, 512)) expected_image = load_numpy( - "https://huggingface.co/datasets/lewington/expected-images/resolve/main/fantasy_landscape_k_lms.npy" + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/fantasy_landscape_k_lms.npy" ) model_id = "CompVis/stable-diffusion-v1-4" @@ -543,8 +544,44 @@ class StableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase): image = output.images[0] assert image.shape == (512, 768, 3) - # img2img is flaky across GPUs even in fp32, so using MAE here - assert np.abs(expected_image - image).mean() < 1e-3 + assert np.abs(expected_image - image).max() < 1e-3 + + def test_stable_diffusion_img2img_pipeline_ddim(self): + init_image = load_image( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" + "/img2img/sketch-mountains-input.jpg" + ) + init_image = init_image.resize((768, 512)) + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/fantasy_landscape_ddim.npy" + ) + + model_id = "CompVis/stable-diffusion-v1-4" + ddim = DDIMScheduler.from_config(model_id, subfolder="scheduler") + pipe = StableDiffusionImg2ImgPipeline.from_pretrained( + model_id, + scheduler=ddim, + safety_checker=None, + ) + pipe.to(torch_device) + pipe.set_progress_bar_config(disable=None) + pipe.enable_attention_slicing() + + prompt = "A fantasy landscape, trending on artstation" + + generator = torch.Generator(device=torch_device).manual_seed(0) + output = pipe( + prompt=prompt, + init_image=init_image, + strength=0.75, + guidance_scale=7.5, + generator=generator, + output_type="np", + ) + image = output.images[0] + + assert image.shape == (512, 768, 3) + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_img2img_intermediate_state(self): number_of_steps = 0 diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py index c5b2572fb7..4b535dc9df 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py @@ -387,7 +387,6 @@ class StableDiffusionInpaintLegacyPipelineIntegrationTests(unittest.TestCase): assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self): - # TODO(Anton, Patrick) - I think we can remove this test soon init_image = load_image( "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo.png" From 33d7e89c42e0fe7b4a277d7a5bae12ba14828dd8 Mon Sep 17 00:00:00 2001 From: Lime-Cakes <91322985+Lime-Cakes@users.noreply.github.com> Date: Mon, 14 Nov 2022 20:35:47 +0800 Subject: [PATCH 84/88] Edited attention.py for older xformers (#1270) Older versions of xformers require query, key, value to be contiguous, this calls .contiguous() on q/k/v before passing to xformers. --- src/diffusers/models/attention.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py index e8ea37970e..be9203b4d6 100644 --- a/src/diffusers/models/attention.py +++ b/src/diffusers/models/attention.py @@ -557,6 +557,9 @@ class CrossAttention(nn.Module): return hidden_states def _memory_efficient_attention_xformers(self, query, key, value): + query = query.contiguous() + key = key.contiguous() + value = value.contiguous() hidden_states = xformers.ops.memory_efficient_attention(query, key, value, attn_bias=None) hidden_states = self.reshape_batch_dim_to_heads(hidden_states) return hidden_states From c9b34637039b8b7374ae15c70aa93289bc5b6ccd Mon Sep 17 00:00:00 2001 From: Partho Date: Tue, 15 Nov 2022 01:12:14 +0530 Subject: [PATCH 85/88] Fix wrong link in text2img fine-tuning documentation (#1282) fix link typo --- docs/source/training/text2image.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/training/text2image.mdx b/docs/source/training/text2image.mdx index 1b04462f77..eb71457cb7 100644 --- a/docs/source/training/text2image.mdx +++ b/docs/source/training/text2image.mdx @@ -13,7 +13,7 @@ specific language governing permissions and limitations under the License. # Stable Diffusion text-to-image fine-tuning -The [`train_text_to_image.py`](https://github.com/huggingface/diffusers/tree/main/examples/textual_inversion) script shows how to fine-tune the stable diffusion model on your own dataset. +The [`train_text_to_image.py`](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image) script shows how to fine-tune the stable diffusion model on your own dataset. From a8d0977769debad7c1071895f576f9413e2b967d Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Mon, 14 Nov 2022 22:03:10 +0100 Subject: [PATCH 86/88] [StableDiffusionInpaintPipeline] fix batch_size for mask and masked latents (#1279) fix bs for mask and masked latents --- .../pipeline_stable_diffusion_inpaint.py | 2 +- .../test_stable_diffusion_inpaint.py | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 332eb2ca77..a122723eee 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -536,7 +536,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): mask, masked_image_latents = self.prepare_mask_latents( mask, masked_image, - batch_size, + batch_size * num_images_per_prompt, height, width, text_embeddings.dtype, diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index 5fcdd71dd6..8d269c38f9 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -215,6 +215,47 @@ class StableDiffusionInpaintPipelineFastTests(PipelineTesterMixin, unittest.Test assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 + def test_stable_diffusion_inpaint_with_num_images_per_prompt(self): + device = "cpu" + unet = self.dummy_cond_unet_inpaint + scheduler = PNDMScheduler(skip_prk_steps=True) + vae = self.dummy_vae + bert = self.dummy_text_encoder + tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip") + + image = self.dummy_image.cpu().permute(0, 2, 3, 1)[0] + init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((128, 128)) + mask_image = Image.fromarray(np.uint8(image + 4)).convert("RGB").resize((128, 128)) + + # make sure here that pndm scheduler skips prk + sd_pipe = StableDiffusionInpaintPipeline( + unet=unet, + scheduler=scheduler, + vae=vae, + text_encoder=bert, + tokenizer=tokenizer, + safety_checker=None, + feature_extractor=None, + ) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + prompt = "A painting of a squirrel eating a burger" + generator = torch.Generator(device=device).manual_seed(0) + images = sd_pipe( + [prompt], + generator=generator, + guidance_scale=6.0, + num_inference_steps=2, + output_type="np", + image=init_image, + mask_image=mask_image, + num_images_per_prompt=2, + ).images + + # check if the output is a list of 2 images + assert len(images) == 2 + @unittest.skipIf(torch_device != "cuda", "This test requires a GPU") def test_stable_diffusion_inpaint_fp16(self): """Test that stable diffusion inpaint_legacy works with fp16""" From 7c5fef81e0aecff65c041a9dfb23aff22bf64f4b Mon Sep 17 00:00:00 2001 From: Nathan Lambert Date: Mon, 14 Nov 2022 13:48:48 -0800 Subject: [PATCH 87/88] Add UNet 1d for RL model for planning + colab (#105) * re-add RL model code * match model forward api * add register_to_config, pass training tests * fix tests, update forward outputs * remove unused code, some comments * add to docs * remove extra embedding code * unify time embedding * remove conv1d output sequential * remove sequential from conv1dblock * style and deleting duplicated code * clean files * remove unused variables * clean variables * add 1d resnet block structure for downsample * rename as unet1d * fix renaming * rename files * add get_block(...) api * unify args for model1d like model2d * minor cleaning * fix docs * improve 1d resnet blocks * fix tests, remove permuts * fix style * add output activation * rename flax blocks file * Add Value Function and corresponding example script to Diffuser implementation (#884) * valuefunction code * start example scripts * missing imports * bug fixes and placeholder example script * add value function scheduler * load value function from hub and get best actions in example * very close to working example * larger batch size for planning * more tests * merge unet1d changes * wandb for debugging, use newer models * success! * turns out we just need more diffusion steps * run on modal * merge and code cleanup * use same api for rl model * fix variance type * wrong normalization function * add tests * style * style and quality * edits based on comments * style and quality * remove unused var * hack unet1d into a value function * add pipeline * fix arg order * add pipeline to core library * community pipeline * fix couple shape bugs * style * Apply suggestions from code review Co-authored-by: Nathan Lambert * update post merge of scripts * add mdiblock / outblock architecture * Pipeline cleanup (#947) * valuefunction code * start example scripts * missing imports * bug fixes and placeholder example script * add value function scheduler * load value function from hub and get best actions in example * very close to working example * larger batch size for planning * more tests * merge unet1d changes * wandb for debugging, use newer models * success! * turns out we just need more diffusion steps * run on modal * merge and code cleanup * use same api for rl model * fix variance type * wrong normalization function * add tests * style * style and quality * edits based on comments * style and quality * remove unused var * hack unet1d into a value function * add pipeline * fix arg order * add pipeline to core library * community pipeline * fix couple shape bugs * style * Apply suggestions from code review * clean up comments * convert older script to using pipeline and add readme * rename scripts * style, update tests * delete unet rl model file * remove imports in src Co-authored-by: Nathan Lambert * Update src/diffusers/models/unet_1d_blocks.py * Update tests/test_models_unet.py * RL Cleanup v2 (#965) * valuefunction code * start example scripts * missing imports * bug fixes and placeholder example script * add value function scheduler * load value function from hub and get best actions in example * very close to working example * larger batch size for planning * more tests * merge unet1d changes * wandb for debugging, use newer models * success! * turns out we just need more diffusion steps * run on modal * merge and code cleanup * use same api for rl model * fix variance type * wrong normalization function * add tests * style * style and quality * edits based on comments * style and quality * remove unused var * hack unet1d into a value function * add pipeline * fix arg order * add pipeline to core library * community pipeline * fix couple shape bugs * style * Apply suggestions from code review * clean up comments * convert older script to using pipeline and add readme * rename scripts * style, update tests * delete unet rl model file * remove imports in src * add specific vf block and update tests * style * Update tests/test_models_unet.py Co-authored-by: Nathan Lambert * fix quality in tests * fix quality style, split test file * fix checks / tests * make timesteps closer to main * unify block API * unify forward api * delete lines in examples * style * examples style * all tests pass * make style * make dance_diff test pass * Refactoring RL PR (#1200) * init file changes * add import utils * finish cleaning files, imports * remove import flags * clean examples * fix imports, tests for merge * update readmes * hotfix for tests * quality * fix some tests * change defaults * more mps test fixes * unet1d defaults * do not default import experimental * defaults for tests * fix tests * fix-copies * fix * changes per Patrik's comments (#1285) * changes per Patrik's comments * update conversion script * fix renaming * skip more mps tests * last test fix * Update examples/rl/README.md Co-authored-by: Ben Glickenhaus --- .gitignore | 4 +- docs/source/api/models.mdx | 9 +- examples/README.md | 2 +- examples/rl/README.md | 19 + examples/rl/run_diffuser_gen_trajectories.py | 57 +++ examples/rl/run_diffuser_locomotion.py | 57 +++ .../convert_models_diffuser_to_diffusers.py | 100 +++++ src/diffusers/experimental/README.md | 5 + src/diffusers/experimental/__init__.py | 1 + src/diffusers/experimental/rl/__init__.py | 1 + .../experimental/rl/value_guided_sampling.py | 129 +++++++ src/diffusers/models/embeddings.py | 13 +- src/diffusers/models/resnet.py | 138 ++++++- src/diffusers/models/unet_1d.py | 115 ++++-- src/diffusers/models/unet_1d_blocks.py | 346 ++++++++++++++++-- src/diffusers/schedulers/scheduling_ddpm.py | 6 +- tests/models/test_models_unet_1d.py | 235 +++++++++++- .../dance_diffusion/test_dance_diffusion.py | 4 + 18 files changed, 1176 insertions(+), 65 deletions(-) create mode 100644 examples/rl/README.md create mode 100644 examples/rl/run_diffuser_gen_trajectories.py create mode 100644 examples/rl/run_diffuser_locomotion.py create mode 100644 scripts/convert_models_diffuser_to_diffusers.py create mode 100644 src/diffusers/experimental/README.md create mode 100644 src/diffusers/experimental/__init__.py create mode 100644 src/diffusers/experimental/rl/__init__.py create mode 100644 src/diffusers/experimental/rl/value_guided_sampling.py diff --git a/.gitignore b/.gitignore index cf81834636..f018a111ea 100644 --- a/.gitignore +++ b/.gitignore @@ -163,4 +163,6 @@ tags *.lock # DS_Store (MacOS) -.DS_Store \ No newline at end of file +.DS_Store +# RL pipelines may produce mp4 outputs +*.mp4 \ No newline at end of file diff --git a/docs/source/api/models.mdx b/docs/source/api/models.mdx index 2e1e8798a7..7c1faa8474 100644 --- a/docs/source/api/models.mdx +++ b/docs/source/api/models.mdx @@ -22,12 +22,15 @@ The models are built on the base class ['ModelMixin'] that is a `torch.nn.module ## UNet2DOutput [[autodoc]] models.unet_2d.UNet2DOutput -## UNet1DModel -[[autodoc]] UNet1DModel - ## UNet2DModel [[autodoc]] UNet2DModel +## UNet1DOutput +[[autodoc]] models.unet_1d.UNet1DOutput + +## UNet1DModel +[[autodoc]] UNet1DModel + ## UNet2DConditionOutput [[autodoc]] models.unet_2d_condition.UNet2DConditionOutput diff --git a/examples/README.md b/examples/README.md index 29872a7a16..06ce06b9e3 100644 --- a/examples/README.md +++ b/examples/README.md @@ -42,7 +42,7 @@ Training examples show how to pretrain or fine-tune diffusion models for a varie | [**Text-to-Image fine-tuning**](./text_to_image) | ✅ | ✅ | | [**Textual Inversion**](./textual_inversion) | ✅ | - | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_textual_inversion_training.ipynb) | [**Dreambooth**](./dreambooth) | ✅ | - | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_training.ipynb) - +| [**Reinforcement Learning for Control**](https://github.com/huggingface/diffusers/blob/main/examples/rl/run_diffusers_locomotion.py) | - | - | coming soon. ## Community diff --git a/examples/rl/README.md b/examples/rl/README.md new file mode 100644 index 0000000000..d68f2bf780 --- /dev/null +++ b/examples/rl/README.md @@ -0,0 +1,19 @@ +# Overview + +These examples show how to run (Diffuser)[https://arxiv.org/abs/2205.09991] in Diffusers. +There are four scripts, +1. `run_diffuser_locomotion.py` to sample actions and run them in the environment, +2. and `run_diffuser_gen_trajectories.py` to just sample actions from the pre-trained diffusion model. + +You will need some RL specific requirements to run the examples: + +``` +pip install -f https://download.pytorch.org/whl/torch_stable.html \ + free-mujoco-py \ + einops \ + gym==0.24.1 \ + protobuf==3.20.1 \ + git+https://github.com/rail-berkeley/d4rl.git \ + mediapy \ + Pillow==9.0.0 +``` diff --git a/examples/rl/run_diffuser_gen_trajectories.py b/examples/rl/run_diffuser_gen_trajectories.py new file mode 100644 index 0000000000..5bb068cc9f --- /dev/null +++ b/examples/rl/run_diffuser_gen_trajectories.py @@ -0,0 +1,57 @@ +import d4rl # noqa +import gym +import tqdm +from diffusers.experimental import ValueGuidedRLPipeline + + +config = dict( + n_samples=64, + horizon=32, + num_inference_steps=20, + n_guide_steps=0, + scale_grad_by_std=True, + scale=0.1, + eta=0.0, + t_grad_cutoff=2, + device="cpu", +) + + +if __name__ == "__main__": + env_name = "hopper-medium-v2" + env = gym.make(env_name) + + pipeline = ValueGuidedRLPipeline.from_pretrained( + "bglick13/hopper-medium-v2-value-function-hor32", + env=env, + ) + + env.seed(0) + obs = env.reset() + total_reward = 0 + total_score = 0 + T = 1000 + rollout = [obs.copy()] + try: + for t in tqdm.tqdm(range(T)): + # Call the policy + denorm_actions = pipeline(obs, planning_horizon=32) + + # execute action in environment + next_observation, reward, terminal, _ = env.step(denorm_actions) + score = env.get_normalized_score(total_reward) + # update return + total_reward += reward + total_score += score + print( + f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:" + f" {total_score}" + ) + # save observations for rendering + rollout.append(next_observation.copy()) + + obs = next_observation + except KeyboardInterrupt: + pass + + print(f"Total reward: {total_reward}") diff --git a/examples/rl/run_diffuser_locomotion.py b/examples/rl/run_diffuser_locomotion.py new file mode 100644 index 0000000000..e89181610b --- /dev/null +++ b/examples/rl/run_diffuser_locomotion.py @@ -0,0 +1,57 @@ +import d4rl # noqa +import gym +import tqdm +from diffusers.experimental import ValueGuidedRLPipeline + + +config = dict( + n_samples=64, + horizon=32, + num_inference_steps=20, + n_guide_steps=2, + scale_grad_by_std=True, + scale=0.1, + eta=0.0, + t_grad_cutoff=2, + device="cpu", +) + + +if __name__ == "__main__": + env_name = "hopper-medium-v2" + env = gym.make(env_name) + + pipeline = ValueGuidedRLPipeline.from_pretrained( + "bglick13/hopper-medium-v2-value-function-hor32", + env=env, + ) + + env.seed(0) + obs = env.reset() + total_reward = 0 + total_score = 0 + T = 1000 + rollout = [obs.copy()] + try: + for t in tqdm.tqdm(range(T)): + # call the policy + denorm_actions = pipeline(obs, planning_horizon=32) + + # execute action in environment + next_observation, reward, terminal, _ = env.step(denorm_actions) + score = env.get_normalized_score(total_reward) + # update return + total_reward += reward + total_score += score + print( + f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:" + f" {total_score}" + ) + # save observations for rendering + rollout.append(next_observation.copy()) + + obs = next_observation + except KeyboardInterrupt: + pass + + print(f"Total reward: {total_reward}") diff --git a/scripts/convert_models_diffuser_to_diffusers.py b/scripts/convert_models_diffuser_to_diffusers.py new file mode 100644 index 0000000000..9475f7da93 --- /dev/null +++ b/scripts/convert_models_diffuser_to_diffusers.py @@ -0,0 +1,100 @@ +import json +import os + +import torch + +from diffusers import UNet1DModel + + +os.makedirs("hub/hopper-medium-v2/unet/hor32", exist_ok=True) +os.makedirs("hub/hopper-medium-v2/unet/hor128", exist_ok=True) + +os.makedirs("hub/hopper-medium-v2/value_function", exist_ok=True) + + +def unet(hor): + if hor == 128: + down_block_types = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D") + block_out_channels = (32, 128, 256) + up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D") + + elif hor == 32: + down_block_types = ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D") + block_out_channels = (32, 64, 128, 256) + up_block_types = ("UpResnetBlock1D", "UpResnetBlock1D", "UpResnetBlock1D") + model = torch.load(f"/Users/bglickenhaus/Documents/diffuser/temporal_unet-hopper-mediumv2-hor{hor}.torch") + state_dict = model.state_dict() + config = dict( + down_block_types=down_block_types, + block_out_channels=block_out_channels, + up_block_types=up_block_types, + layers_per_block=1, + use_timestep_embedding=True, + out_block_type="OutConv1DBlock", + norm_num_groups=8, + downsample_each_block=False, + in_channels=14, + out_channels=14, + extra_in_channels=0, + time_embedding_type="positional", + flip_sin_to_cos=False, + freq_shift=1, + sample_size=65536, + mid_block_type="MidResTemporalBlock1D", + act_fn="mish", + ) + hf_value_function = UNet1DModel(**config) + print(f"length of state dict: {len(state_dict.keys())}") + print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") + mapping = dict((k, hfk) for k, hfk in zip(model.state_dict().keys(), hf_value_function.state_dict().keys())) + for k, v in mapping.items(): + state_dict[v] = state_dict.pop(k) + hf_value_function.load_state_dict(state_dict) + + torch.save(hf_value_function.state_dict(), f"hub/hopper-medium-v2/unet/hor{hor}/diffusion_pytorch_model.bin") + with open(f"hub/hopper-medium-v2/unet/hor{hor}/config.json", "w") as f: + json.dump(config, f) + + +def value_function(): + config = dict( + in_channels=14, + down_block_types=("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), + up_block_types=(), + out_block_type="ValueFunction", + mid_block_type="ValueFunctionMidBlock1D", + block_out_channels=(32, 64, 128, 256), + layers_per_block=1, + downsample_each_block=True, + sample_size=65536, + out_channels=14, + extra_in_channels=0, + time_embedding_type="positional", + use_timestep_embedding=True, + flip_sin_to_cos=False, + freq_shift=1, + norm_num_groups=8, + act_fn="mish", + ) + + model = torch.load("/Users/bglickenhaus/Documents/diffuser/value_function-hopper-mediumv2-hor32.torch") + state_dict = model + hf_value_function = UNet1DModel(**config) + print(f"length of state dict: {len(state_dict.keys())}") + print(f"length of value function dict: {len(hf_value_function.state_dict().keys())}") + + mapping = dict((k, hfk) for k, hfk in zip(state_dict.keys(), hf_value_function.state_dict().keys())) + for k, v in mapping.items(): + state_dict[v] = state_dict.pop(k) + + hf_value_function.load_state_dict(state_dict) + + torch.save(hf_value_function.state_dict(), "hub/hopper-medium-v2/value_function/diffusion_pytorch_model.bin") + with open("hub/hopper-medium-v2/value_function/config.json", "w") as f: + json.dump(config, f) + + +if __name__ == "__main__": + unet(32) + # unet(128) + value_function() diff --git a/src/diffusers/experimental/README.md b/src/diffusers/experimental/README.md new file mode 100644 index 0000000000..81a9de81c7 --- /dev/null +++ b/src/diffusers/experimental/README.md @@ -0,0 +1,5 @@ +# 🧨 Diffusers Experimental + +We are adding experimental code to support novel applications and usages of the Diffusers library. +Currently, the following experiments are supported: +* Reinforcement learning via an implementation of the [Diffuser](https://arxiv.org/abs/2205.09991) model. \ No newline at end of file diff --git a/src/diffusers/experimental/__init__.py b/src/diffusers/experimental/__init__.py new file mode 100644 index 0000000000..ebc8155403 --- /dev/null +++ b/src/diffusers/experimental/__init__.py @@ -0,0 +1 @@ +from .rl import ValueGuidedRLPipeline diff --git a/src/diffusers/experimental/rl/__init__.py b/src/diffusers/experimental/rl/__init__.py new file mode 100644 index 0000000000..7b338d3173 --- /dev/null +++ b/src/diffusers/experimental/rl/__init__.py @@ -0,0 +1 @@ +from .value_guided_sampling import ValueGuidedRLPipeline diff --git a/src/diffusers/experimental/rl/value_guided_sampling.py b/src/diffusers/experimental/rl/value_guided_sampling.py new file mode 100644 index 0000000000..8d5062e3d4 --- /dev/null +++ b/src/diffusers/experimental/rl/value_guided_sampling.py @@ -0,0 +1,129 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch + +import tqdm + +from ...models.unet_1d import UNet1DModel +from ...pipeline_utils import DiffusionPipeline +from ...utils.dummy_pt_objects import DDPMScheduler + + +class ValueGuidedRLPipeline(DiffusionPipeline): + def __init__( + self, + value_function: UNet1DModel, + unet: UNet1DModel, + scheduler: DDPMScheduler, + env, + ): + super().__init__() + self.value_function = value_function + self.unet = unet + self.scheduler = scheduler + self.env = env + self.data = env.get_dataset() + self.means = dict() + for key in self.data.keys(): + try: + self.means[key] = self.data[key].mean() + except: + pass + self.stds = dict() + for key in self.data.keys(): + try: + self.stds[key] = self.data[key].std() + except: + pass + self.state_dim = env.observation_space.shape[0] + self.action_dim = env.action_space.shape[0] + + def normalize(self, x_in, key): + return (x_in - self.means[key]) / self.stds[key] + + def de_normalize(self, x_in, key): + return x_in * self.stds[key] + self.means[key] + + def to_torch(self, x_in): + if type(x_in) is dict: + return {k: self.to_torch(v) for k, v in x_in.items()} + elif torch.is_tensor(x_in): + return x_in.to(self.unet.device) + return torch.tensor(x_in, device=self.unet.device) + + def reset_x0(self, x_in, cond, act_dim): + for key, val in cond.items(): + x_in[:, key, act_dim:] = val.clone() + return x_in + + def run_diffusion(self, x, conditions, n_guide_steps, scale): + batch_size = x.shape[0] + y = None + for i in tqdm.tqdm(self.scheduler.timesteps): + # create batch of timesteps to pass into model + timesteps = torch.full((batch_size,), i, device=self.unet.device, dtype=torch.long) + for _ in range(n_guide_steps): + with torch.enable_grad(): + x.requires_grad_() + y = self.value_function(x.permute(0, 2, 1), timesteps).sample + grad = torch.autograd.grad([y.sum()], [x])[0] + + posterior_variance = self.scheduler._get_variance(i) + model_std = torch.exp(0.5 * posterior_variance) + grad = model_std * grad + grad[timesteps < 2] = 0 + x = x.detach() + x = x + scale * grad + x = self.reset_x0(x, conditions, self.action_dim) + prev_x = self.unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1) + x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"] + + # apply conditions to the trajectory + x = self.reset_x0(x, conditions, self.action_dim) + x = self.to_torch(x) + return x, y + + def __call__(self, obs, batch_size=64, planning_horizon=32, n_guide_steps=2, scale=0.1): + # normalize the observations and create batch dimension + obs = self.normalize(obs, "observations") + obs = obs[None].repeat(batch_size, axis=0) + + conditions = {0: self.to_torch(obs)} + shape = (batch_size, planning_horizon, self.state_dim + self.action_dim) + + # generate initial noise and apply our conditions (to make the trajectories start at current state) + x1 = torch.randn(shape, device=self.unet.device) + x = self.reset_x0(x1, conditions, self.action_dim) + x = self.to_torch(x) + + # run the diffusion process + x, y = self.run_diffusion(x, conditions, n_guide_steps, scale) + + # sort output trajectories by value + sorted_idx = y.argsort(0, descending=True).squeeze() + sorted_values = x[sorted_idx] + actions = sorted_values[:, :, : self.action_dim] + actions = actions.detach().cpu().numpy() + denorm_actions = self.de_normalize(actions, key="actions") + + # select the action with the highest value + if y is not None: + selected_index = 0 + else: + # if we didn't run value guiding, select a random action + selected_index = np.random.randint(0, batch_size) + denorm_actions = denorm_actions[selected_index, 0] + return denorm_actions diff --git a/src/diffusers/models/embeddings.py b/src/diffusers/models/embeddings.py index b09d43fc2e..0221d891f1 100644 --- a/src/diffusers/models/embeddings.py +++ b/src/diffusers/models/embeddings.py @@ -62,14 +62,21 @@ def get_timestep_embedding( class TimestepEmbedding(nn.Module): - def __init__(self, channel: int, time_embed_dim: int, act_fn: str = "silu"): + def __init__(self, in_channels: int, time_embed_dim: int, act_fn: str = "silu", out_dim: int = None): super().__init__() - self.linear_1 = nn.Linear(channel, time_embed_dim) + self.linear_1 = nn.Linear(in_channels, time_embed_dim) self.act = None if act_fn == "silu": self.act = nn.SiLU() - self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim) + elif act_fn == "mish": + self.act = nn.Mish() + + if out_dim is not None: + time_embed_dim_out = out_dim + else: + time_embed_dim_out = time_embed_dim + self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim_out) def forward(self, sample): sample = self.linear_1(sample) diff --git a/src/diffusers/models/resnet.py b/src/diffusers/models/resnet.py index 7bb5416adf..52d056ae96 100644 --- a/src/diffusers/models/resnet.py +++ b/src/diffusers/models/resnet.py @@ -5,6 +5,75 @@ import torch.nn as nn import torch.nn.functional as F +class Upsample1D(nn.Module): + """ + An upsampling layer with an optional convolution. + + Parameters: + channels: channels in the inputs and outputs. + use_conv: a bool determining if a convolution is applied. + use_conv_transpose: + out_channels: + """ + + def __init__(self, channels, use_conv=False, use_conv_transpose=False, out_channels=None, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + + self.conv = None + if use_conv_transpose: + self.conv = nn.ConvTranspose1d(channels, self.out_channels, 4, 2, 1) + elif use_conv: + self.conv = nn.Conv1d(self.channels, self.out_channels, 3, padding=1) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv_transpose: + return self.conv(x) + + x = F.interpolate(x, scale_factor=2.0, mode="nearest") + + if self.use_conv: + x = self.conv(x) + + return x + + +class Downsample1D(nn.Module): + """ + A downsampling layer with an optional convolution. + + Parameters: + channels: channels in the inputs and outputs. + use_conv: a bool determining if a convolution is applied. + out_channels: + padding: + """ + + def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if use_conv: + self.conv = nn.Conv1d(self.channels, self.out_channels, 3, stride=stride, padding=padding) + else: + assert self.channels == self.out_channels + self.conv = nn.AvgPool1d(kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.conv(x) + + class Upsample2D(nn.Module): """ An upsampling layer with an optional convolution. @@ -12,7 +81,8 @@ class Upsample2D(nn.Module): Parameters: channels: channels in the inputs and outputs. use_conv: a bool determining if a convolution is applied. - dims: determines if the signal is 1D, 2D, or 3D. If 3D, then upsampling occurs in the inner-two dimensions. + use_conv_transpose: + out_channels: """ def __init__(self, channels, use_conv=False, use_conv_transpose=False, out_channels=None, name="conv"): @@ -80,7 +150,8 @@ class Downsample2D(nn.Module): Parameters: channels: channels in the inputs and outputs. use_conv: a bool determining if a convolution is applied. - dims: determines if the signal is 1D, 2D, or 3D. If 3D, then downsampling occurs in the inner-two dimensions. + out_channels: + padding: """ def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name="conv"): @@ -415,6 +486,69 @@ class Mish(torch.nn.Module): return hidden_states * torch.tanh(torch.nn.functional.softplus(hidden_states)) +# unet_rl.py +def rearrange_dims(tensor): + if len(tensor.shape) == 2: + return tensor[:, :, None] + if len(tensor.shape) == 3: + return tensor[:, :, None, :] + elif len(tensor.shape) == 4: + return tensor[:, :, 0, :] + else: + raise ValueError(f"`len(tensor)`: {len(tensor)} has to be 2, 3 or 4.") + + +class Conv1dBlock(nn.Module): + """ + Conv1d --> GroupNorm --> Mish + """ + + def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8): + super().__init__() + + self.conv1d = nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2) + self.group_norm = nn.GroupNorm(n_groups, out_channels) + self.mish = nn.Mish() + + def forward(self, x): + x = self.conv1d(x) + x = rearrange_dims(x) + x = self.group_norm(x) + x = rearrange_dims(x) + x = self.mish(x) + return x + + +# unet_rl.py +class ResidualTemporalBlock1D(nn.Module): + def __init__(self, inp_channels, out_channels, embed_dim, kernel_size=5): + super().__init__() + self.conv_in = Conv1dBlock(inp_channels, out_channels, kernel_size) + self.conv_out = Conv1dBlock(out_channels, out_channels, kernel_size) + + self.time_emb_act = nn.Mish() + self.time_emb = nn.Linear(embed_dim, out_channels) + + self.residual_conv = ( + nn.Conv1d(inp_channels, out_channels, 1) if inp_channels != out_channels else nn.Identity() + ) + + def forward(self, x, t): + """ + Args: + x : [ batch_size x inp_channels x horizon ] + t : [ batch_size x embed_dim ] + + returns: + out : [ batch_size x out_channels x horizon ] + """ + t = self.time_emb_act(t) + t = self.time_emb(t) + out = self.conv_in(x) + rearrange_dims(t) + out = self.conv_out(out) + return out + self.residual_conv(x) + + def upsample_2d(hidden_states, kernel=None, factor=2, gain=1): r"""Upsample2D a batch of 2D images with the given filter. Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and upsamples each image with the given diff --git a/src/diffusers/models/unet_1d.py b/src/diffusers/models/unet_1d.py index cc0685deb9..29d1d707f5 100644 --- a/src/diffusers/models/unet_1d.py +++ b/src/diffusers/models/unet_1d.py @@ -1,3 +1,17 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import Optional, Tuple, Union @@ -8,7 +22,7 @@ from ..configuration_utils import ConfigMixin, register_to_config from ..modeling_utils import ModelMixin from ..utils import BaseOutput from .embeddings import GaussianFourierProjection, TimestepEmbedding, Timesteps -from .unet_1d_blocks import get_down_block, get_mid_block, get_up_block +from .unet_1d_blocks import get_down_block, get_mid_block, get_out_block, get_up_block @dataclass @@ -30,11 +44,11 @@ class UNet1DModel(ModelMixin, ConfigMixin): implements for all the model (such as downloading or saving, etc.) Parameters: - sample_size (`int`, *optionl*): Default length of sample. Should be adaptable at runtime. + sample_size (`int`, *optional*): Default length of sample. Should be adaptable at runtime. in_channels (`int`, *optional*, defaults to 2): Number of channels in the input sample. out_channels (`int`, *optional*, defaults to 2): Number of channels in the output. time_embedding_type (`str`, *optional*, defaults to `"fourier"`): Type of time embedding to use. - freq_shift (`int`, *optional*, defaults to 0): Frequency shift for fourier time embedding. + freq_shift (`float`, *optional*, defaults to 0.0): Frequency shift for fourier time embedding. flip_sin_to_cos (`bool`, *optional*, defaults to : obj:`False`): Whether to flip sin to cos for fourier time embedding. down_block_types (`Tuple[str]`, *optional*, defaults to : @@ -43,6 +57,13 @@ class UNet1DModel(ModelMixin, ConfigMixin): obj:`("UpBlock1D", "UpBlock1DNoSkip", "AttnUpBlock1D")`): Tuple of upsample block types. block_out_channels (`Tuple[int]`, *optional*, defaults to : obj:`(32, 32, 64)`): Tuple of block output channels. + mid_block_type (`str`, *optional*, defaults to "UNetMidBlock1D"): block type for middle of UNet. + out_block_type (`str`, *optional*, defaults to `None`): optional output processing of UNet. + act_fn (`str`, *optional*, defaults to None): optional activitation function in UNet blocks. + norm_num_groups (`int`, *optional*, defaults to 8): group norm member count in UNet blocks. + layers_per_block (`int`, *optional*, defaults to 1): added number of layers in a UNet block. + downsample_each_block (`int`, *optional*, defaults to False: + experimental feature for using a UNet without upsampling. """ @register_to_config @@ -54,16 +75,20 @@ class UNet1DModel(ModelMixin, ConfigMixin): out_channels: int = 2, extra_in_channels: int = 0, time_embedding_type: str = "fourier", - freq_shift: int = 0, flip_sin_to_cos: bool = True, use_timestep_embedding: bool = False, + freq_shift: float = 0.0, down_block_types: Tuple[str] = ("DownBlock1DNoSkip", "DownBlock1D", "AttnDownBlock1D"), - mid_block_type: str = "UNetMidBlock1D", up_block_types: Tuple[str] = ("AttnUpBlock1D", "UpBlock1D", "UpBlock1DNoSkip"), + mid_block_type: Tuple[str] = "UNetMidBlock1D", + out_block_type: str = None, block_out_channels: Tuple[int] = (32, 32, 64), + act_fn: str = None, + norm_num_groups: int = 8, + layers_per_block: int = 1, + downsample_each_block: bool = False, ): super().__init__() - self.sample_size = sample_size # time @@ -73,12 +98,19 @@ class UNet1DModel(ModelMixin, ConfigMixin): ) timestep_input_dim = 2 * block_out_channels[0] elif time_embedding_type == "positional": - self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + self.time_proj = Timesteps( + block_out_channels[0], flip_sin_to_cos=flip_sin_to_cos, downscale_freq_shift=freq_shift + ) timestep_input_dim = block_out_channels[0] if use_timestep_embedding: time_embed_dim = block_out_channels[0] * 4 - self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + self.time_mlp = TimestepEmbedding( + in_channels=timestep_input_dim, + time_embed_dim=time_embed_dim, + act_fn=act_fn, + out_dim=block_out_channels[0], + ) self.down_blocks = nn.ModuleList([]) self.mid_block = None @@ -94,38 +126,66 @@ class UNet1DModel(ModelMixin, ConfigMixin): if i == 0: input_channel += extra_in_channels + is_final_block = i == len(block_out_channels) - 1 + down_block = get_down_block( down_block_type, + num_layers=layers_per_block, in_channels=input_channel, out_channels=output_channel, + temb_channels=block_out_channels[0], + add_downsample=not is_final_block or downsample_each_block, ) self.down_blocks.append(down_block) # mid self.mid_block = get_mid_block( - mid_block_type=mid_block_type, - mid_channels=block_out_channels[-1], + mid_block_type, in_channels=block_out_channels[-1], - out_channels=None, + mid_channels=block_out_channels[-1], + out_channels=block_out_channels[-1], + embed_dim=block_out_channels[0], + num_layers=layers_per_block, + add_downsample=downsample_each_block, ) # up reversed_block_out_channels = list(reversed(block_out_channels)) output_channel = reversed_block_out_channels[0] + if out_block_type is None: + final_upsample_channels = out_channels + else: + final_upsample_channels = block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): prev_output_channel = output_channel - output_channel = reversed_block_out_channels[i + 1] if i < len(up_block_types) - 1 else out_channels + output_channel = ( + reversed_block_out_channels[i + 1] if i < len(up_block_types) - 1 else final_upsample_channels + ) + + is_final_block = i == len(block_out_channels) - 1 up_block = get_up_block( up_block_type, + num_layers=layers_per_block, in_channels=prev_output_channel, out_channels=output_channel, + temb_channels=block_out_channels[0], + add_upsample=not is_final_block, ) self.up_blocks.append(up_block) prev_output_channel = output_channel - # TODO(PVP, Nathan) placeholder for RL application to be merged shortly - # Totally fine to add another layer with a if statement - no need for nn.Identity here + # out + num_groups_out = norm_num_groups if norm_num_groups is not None else min(block_out_channels[0] // 4, 32) + self.out_block = get_out_block( + out_block_type=out_block_type, + num_groups_out=num_groups_out, + embed_dim=block_out_channels[0], + out_channels=out_channels, + act_fn=act_fn, + fc_dim=block_out_channels[-1] // 4, + ) def forward( self, @@ -144,12 +204,20 @@ class UNet1DModel(ModelMixin, ConfigMixin): [`~models.unet_1d.UNet1DOutput`] or `tuple`: [`~models.unet_1d.UNet1DOutput`] if `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is the sample tensor. """ - # 1. time - if len(timestep.shape) == 0: - timestep = timestep[None] - timestep_embed = self.time_proj(timestep)[..., None] - timestep_embed = timestep_embed.repeat([1, 1, sample.shape[2]]).to(sample.dtype) + # 1. time + timesteps = timestep + if not torch.is_tensor(timesteps): + timesteps = torch.tensor([timesteps], dtype=torch.long, device=sample.device) + elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None].to(sample.device) + + timestep_embed = self.time_proj(timesteps) + if self.config.use_timestep_embedding: + timestep_embed = self.time_mlp(timestep_embed) + else: + timestep_embed = timestep_embed[..., None] + timestep_embed = timestep_embed.repeat([1, 1, sample.shape[2]]).to(sample.dtype) # 2. down down_block_res_samples = () @@ -158,13 +226,18 @@ class UNet1DModel(ModelMixin, ConfigMixin): down_block_res_samples += res_samples # 3. mid - sample = self.mid_block(sample) + if self.mid_block: + sample = self.mid_block(sample, timestep_embed) # 4. up for i, upsample_block in enumerate(self.up_blocks): res_samples = down_block_res_samples[-1:] down_block_res_samples = down_block_res_samples[:-1] - sample = upsample_block(sample, res_samples) + sample = upsample_block(sample, res_hidden_states_tuple=res_samples, temb=timestep_embed) + + # 5. post-process + if self.out_block: + sample = self.out_block(sample, timestep_embed) if not return_dict: return (sample,) diff --git a/src/diffusers/models/unet_1d_blocks.py b/src/diffusers/models/unet_1d_blocks.py index 9009071d1e..fc758ebbb0 100644 --- a/src/diffusers/models/unet_1d_blocks.py +++ b/src/diffusers/models/unet_1d_blocks.py @@ -17,6 +17,256 @@ import torch import torch.nn.functional as F from torch import nn +from .resnet import Downsample1D, ResidualTemporalBlock1D, Upsample1D, rearrange_dims + + +class DownResnetBlock1D(nn.Module): + def __init__( + self, + in_channels, + out_channels=None, + num_layers=1, + conv_shortcut=False, + temb_channels=32, + groups=32, + groups_out=None, + non_linearity=None, + time_embedding_norm="default", + output_scale_factor=1.0, + add_downsample=True, + ): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.time_embedding_norm = time_embedding_norm + self.add_downsample = add_downsample + self.output_scale_factor = output_scale_factor + + if groups_out is None: + groups_out = groups + + # there will always be at least one resnet + resnets = [ResidualTemporalBlock1D(in_channels, out_channels, embed_dim=temb_channels)] + + for _ in range(num_layers): + resnets.append(ResidualTemporalBlock1D(out_channels, out_channels, embed_dim=temb_channels)) + + self.resnets = nn.ModuleList(resnets) + + if non_linearity == "swish": + self.nonlinearity = lambda x: F.silu(x) + elif non_linearity == "mish": + self.nonlinearity = nn.Mish() + elif non_linearity == "silu": + self.nonlinearity = nn.SiLU() + else: + self.nonlinearity = None + + self.downsample = None + if add_downsample: + self.downsample = Downsample1D(out_channels, use_conv=True, padding=1) + + def forward(self, hidden_states, temb=None): + output_states = () + + hidden_states = self.resnets[0](hidden_states, temb) + for resnet in self.resnets[1:]: + hidden_states = resnet(hidden_states, temb) + + output_states += (hidden_states,) + + if self.nonlinearity is not None: + hidden_states = self.nonlinearity(hidden_states) + + if self.downsample is not None: + hidden_states = self.downsample(hidden_states) + + return hidden_states, output_states + + +class UpResnetBlock1D(nn.Module): + def __init__( + self, + in_channels, + out_channels=None, + num_layers=1, + temb_channels=32, + groups=32, + groups_out=None, + non_linearity=None, + time_embedding_norm="default", + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.time_embedding_norm = time_embedding_norm + self.add_upsample = add_upsample + self.output_scale_factor = output_scale_factor + + if groups_out is None: + groups_out = groups + + # there will always be at least one resnet + resnets = [ResidualTemporalBlock1D(2 * in_channels, out_channels, embed_dim=temb_channels)] + + for _ in range(num_layers): + resnets.append(ResidualTemporalBlock1D(out_channels, out_channels, embed_dim=temb_channels)) + + self.resnets = nn.ModuleList(resnets) + + if non_linearity == "swish": + self.nonlinearity = lambda x: F.silu(x) + elif non_linearity == "mish": + self.nonlinearity = nn.Mish() + elif non_linearity == "silu": + self.nonlinearity = nn.SiLU() + else: + self.nonlinearity = None + + self.upsample = None + if add_upsample: + self.upsample = Upsample1D(out_channels, use_conv_transpose=True) + + def forward(self, hidden_states, res_hidden_states_tuple=None, temb=None): + if res_hidden_states_tuple is not None: + res_hidden_states = res_hidden_states_tuple[-1] + hidden_states = torch.cat((hidden_states, res_hidden_states), dim=1) + + hidden_states = self.resnets[0](hidden_states, temb) + for resnet in self.resnets[1:]: + hidden_states = resnet(hidden_states, temb) + + if self.nonlinearity is not None: + hidden_states = self.nonlinearity(hidden_states) + + if self.upsample is not None: + hidden_states = self.upsample(hidden_states) + + return hidden_states + + +class ValueFunctionMidBlock1D(nn.Module): + def __init__(self, in_channels, out_channels, embed_dim): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.embed_dim = embed_dim + + self.res1 = ResidualTemporalBlock1D(in_channels, in_channels // 2, embed_dim=embed_dim) + self.down1 = Downsample1D(out_channels // 2, use_conv=True) + self.res2 = ResidualTemporalBlock1D(in_channels // 2, in_channels // 4, embed_dim=embed_dim) + self.down2 = Downsample1D(out_channels // 4, use_conv=True) + + def forward(self, x, temb=None): + x = self.res1(x, temb) + x = self.down1(x) + x = self.res2(x, temb) + x = self.down2(x) + return x + + +class MidResTemporalBlock1D(nn.Module): + def __init__( + self, + in_channels, + out_channels, + embed_dim, + num_layers: int = 1, + add_downsample: bool = False, + add_upsample: bool = False, + non_linearity=None, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.add_downsample = add_downsample + + # there will always be at least one resnet + resnets = [ResidualTemporalBlock1D(in_channels, out_channels, embed_dim=embed_dim)] + + for _ in range(num_layers): + resnets.append(ResidualTemporalBlock1D(out_channels, out_channels, embed_dim=embed_dim)) + + self.resnets = nn.ModuleList(resnets) + + if non_linearity == "swish": + self.nonlinearity = lambda x: F.silu(x) + elif non_linearity == "mish": + self.nonlinearity = nn.Mish() + elif non_linearity == "silu": + self.nonlinearity = nn.SiLU() + else: + self.nonlinearity = None + + self.upsample = None + if add_upsample: + self.upsample = Downsample1D(out_channels, use_conv=True) + + self.downsample = None + if add_downsample: + self.downsample = Downsample1D(out_channels, use_conv=True) + + if self.upsample and self.downsample: + raise ValueError("Block cannot downsample and upsample") + + def forward(self, hidden_states, temb): + hidden_states = self.resnets[0](hidden_states, temb) + for resnet in self.resnets[1:]: + hidden_states = resnet(hidden_states, temb) + + if self.upsample: + hidden_states = self.upsample(hidden_states) + if self.downsample: + self.downsample = self.downsample(hidden_states) + + return hidden_states + + +class OutConv1DBlock(nn.Module): + def __init__(self, num_groups_out, out_channels, embed_dim, act_fn): + super().__init__() + self.final_conv1d_1 = nn.Conv1d(embed_dim, embed_dim, 5, padding=2) + self.final_conv1d_gn = nn.GroupNorm(num_groups_out, embed_dim) + if act_fn == "silu": + self.final_conv1d_act = nn.SiLU() + if act_fn == "mish": + self.final_conv1d_act = nn.Mish() + self.final_conv1d_2 = nn.Conv1d(embed_dim, out_channels, 1) + + def forward(self, hidden_states, temb=None): + hidden_states = self.final_conv1d_1(hidden_states) + hidden_states = rearrange_dims(hidden_states) + hidden_states = self.final_conv1d_gn(hidden_states) + hidden_states = rearrange_dims(hidden_states) + hidden_states = self.final_conv1d_act(hidden_states) + hidden_states = self.final_conv1d_2(hidden_states) + return hidden_states + + +class OutValueFunctionBlock(nn.Module): + def __init__(self, fc_dim, embed_dim): + super().__init__() + self.final_block = nn.ModuleList( + [ + nn.Linear(fc_dim + embed_dim, fc_dim // 2), + nn.Mish(), + nn.Linear(fc_dim // 2, 1), + ] + ) + + def forward(self, hidden_states, temb): + hidden_states = hidden_states.view(hidden_states.shape[0], -1) + hidden_states = torch.cat((hidden_states, temb), dim=-1) + for layer in self.final_block: + hidden_states = layer(hidden_states) + + return hidden_states + _kernels = { "linear": [1 / 8, 3 / 8, 3 / 8, 1 / 8], @@ -62,7 +312,7 @@ class Upsample1d(nn.Module): self.pad = kernel_1d.shape[0] // 2 - 1 self.register_buffer("kernel", kernel_1d) - def forward(self, hidden_states): + def forward(self, hidden_states, temb=None): hidden_states = F.pad(hidden_states, ((self.pad + 1) // 2,) * 2, self.pad_mode) weight = hidden_states.new_zeros([hidden_states.shape[1], hidden_states.shape[1], self.kernel.shape[0]]) indices = torch.arange(hidden_states.shape[1], device=hidden_states.device) @@ -162,32 +412,6 @@ class ResConvBlock(nn.Module): return output -def get_down_block(down_block_type, out_channels, in_channels): - if down_block_type == "DownBlock1D": - return DownBlock1D(out_channels=out_channels, in_channels=in_channels) - elif down_block_type == "AttnDownBlock1D": - return AttnDownBlock1D(out_channels=out_channels, in_channels=in_channels) - elif down_block_type == "DownBlock1DNoSkip": - return DownBlock1DNoSkip(out_channels=out_channels, in_channels=in_channels) - raise ValueError(f"{down_block_type} does not exist.") - - -def get_up_block(up_block_type, in_channels, out_channels): - if up_block_type == "UpBlock1D": - return UpBlock1D(in_channels=in_channels, out_channels=out_channels) - elif up_block_type == "AttnUpBlock1D": - return AttnUpBlock1D(in_channels=in_channels, out_channels=out_channels) - elif up_block_type == "UpBlock1DNoSkip": - return UpBlock1DNoSkip(in_channels=in_channels, out_channels=out_channels) - raise ValueError(f"{up_block_type} does not exist.") - - -def get_mid_block(mid_block_type, in_channels, mid_channels, out_channels): - if mid_block_type == "UNetMidBlock1D": - return UNetMidBlock1D(in_channels=in_channels, mid_channels=mid_channels, out_channels=out_channels) - raise ValueError(f"{mid_block_type} does not exist.") - - class UNetMidBlock1D(nn.Module): def __init__(self, mid_channels, in_channels, out_channels=None): super().__init__() @@ -217,7 +441,7 @@ class UNetMidBlock1D(nn.Module): self.attentions = nn.ModuleList(attentions) self.resnets = nn.ModuleList(resnets) - def forward(self, hidden_states): + def forward(self, hidden_states, temb=None): hidden_states = self.down(hidden_states) for attn, resnet in zip(self.attentions, self.resnets): hidden_states = resnet(hidden_states) @@ -322,7 +546,7 @@ class AttnUpBlock1D(nn.Module): self.resnets = nn.ModuleList(resnets) self.up = Upsample1d(kernel="cubic") - def forward(self, hidden_states, res_hidden_states_tuple): + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): res_hidden_states = res_hidden_states_tuple[-1] hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1) @@ -349,7 +573,7 @@ class UpBlock1D(nn.Module): self.resnets = nn.ModuleList(resnets) self.up = Upsample1d(kernel="cubic") - def forward(self, hidden_states, res_hidden_states_tuple): + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): res_hidden_states = res_hidden_states_tuple[-1] hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1) @@ -374,7 +598,7 @@ class UpBlock1DNoSkip(nn.Module): self.resnets = nn.ModuleList(resnets) - def forward(self, hidden_states, res_hidden_states_tuple): + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): res_hidden_states = res_hidden_states_tuple[-1] hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1) @@ -382,3 +606,63 @@ class UpBlock1DNoSkip(nn.Module): hidden_states = resnet(hidden_states) return hidden_states + + +def get_down_block(down_block_type, num_layers, in_channels, out_channels, temb_channels, add_downsample): + if down_block_type == "DownResnetBlock1D": + return DownResnetBlock1D( + in_channels=in_channels, + num_layers=num_layers, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + ) + elif down_block_type == "DownBlock1D": + return DownBlock1D(out_channels=out_channels, in_channels=in_channels) + elif down_block_type == "AttnDownBlock1D": + return AttnDownBlock1D(out_channels=out_channels, in_channels=in_channels) + elif down_block_type == "DownBlock1DNoSkip": + return DownBlock1DNoSkip(out_channels=out_channels, in_channels=in_channels) + raise ValueError(f"{down_block_type} does not exist.") + + +def get_up_block(up_block_type, num_layers, in_channels, out_channels, temb_channels, add_upsample): + if up_block_type == "UpResnetBlock1D": + return UpResnetBlock1D( + in_channels=in_channels, + num_layers=num_layers, + out_channels=out_channels, + temb_channels=temb_channels, + add_upsample=add_upsample, + ) + elif up_block_type == "UpBlock1D": + return UpBlock1D(in_channels=in_channels, out_channels=out_channels) + elif up_block_type == "AttnUpBlock1D": + return AttnUpBlock1D(in_channels=in_channels, out_channels=out_channels) + elif up_block_type == "UpBlock1DNoSkip": + return UpBlock1DNoSkip(in_channels=in_channels, out_channels=out_channels) + raise ValueError(f"{up_block_type} does not exist.") + + +def get_mid_block(mid_block_type, num_layers, in_channels, mid_channels, out_channels, embed_dim, add_downsample): + if mid_block_type == "MidResTemporalBlock1D": + return MidResTemporalBlock1D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + embed_dim=embed_dim, + add_downsample=add_downsample, + ) + elif mid_block_type == "ValueFunctionMidBlock1D": + return ValueFunctionMidBlock1D(in_channels=in_channels, out_channels=out_channels, embed_dim=embed_dim) + elif mid_block_type == "UNetMidBlock1D": + return UNetMidBlock1D(in_channels=in_channels, mid_channels=mid_channels, out_channels=out_channels) + raise ValueError(f"{mid_block_type} does not exist.") + + +def get_out_block(*, out_block_type, num_groups_out, embed_dim, out_channels, act_fn, fc_dim): + if out_block_type == "OutConv1DBlock": + return OutConv1DBlock(num_groups_out, out_channels, embed_dim, act_fn) + elif out_block_type == "ValueFunction": + return OutValueFunctionBlock(fc_dim, embed_dim) + return None diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index a19d91879c..c3e373d2bd 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -204,6 +204,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): # for rl-diffuser https://arxiv.org/abs/2205.09991 elif variance_type == "fixed_small_log": variance = torch.log(torch.clamp(variance, min=1e-20)) + variance = torch.exp(0.5 * variance) elif variance_type == "fixed_large": variance = self.betas[t] elif variance_type == "fixed_large_log": @@ -301,7 +302,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): variance_noise = torch.randn( model_output.shape, generator=generator, device=device, dtype=model_output.dtype ) - variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * variance_noise + if self.variance_type == "fixed_small_log": + variance = self._get_variance(t, predicted_variance=predicted_variance) * variance_noise + else: + variance = (self._get_variance(t, predicted_variance=predicted_variance) ** 0.5) * variance_noise pred_prev_sample = pred_prev_sample + variance diff --git a/tests/models/test_models_unet_1d.py b/tests/models/test_models_unet_1d.py index c274ce4192..41c4fdecfa 100644 --- a/tests/models/test_models_unet_1d.py +++ b/tests/models/test_models_unet_1d.py @@ -18,13 +18,120 @@ import unittest import torch from diffusers import UNet1DModel -from diffusers.utils import slow, torch_device +from diffusers.utils import floats_tensor, slow, torch_device + +from ..test_modeling_common import ModelTesterMixin torch.backends.cuda.matmul.allow_tf32 = False -class UnetModel1DTests(unittest.TestCase): +class UNet1DModelTests(ModelTesterMixin, unittest.TestCase): + model_class = UNet1DModel + + @property + def dummy_input(self): + batch_size = 4 + num_features = 14 + seq_len = 16 + + noise = floats_tensor((batch_size, num_features, seq_len)).to(torch_device) + time_step = torch.tensor([10] * batch_size).to(torch_device) + + return {"sample": noise, "timestep": time_step} + + @property + def input_shape(self): + return (4, 14, 16) + + @property + def output_shape(self): + return (4, 14, 16) + + def test_ema_training(self): + pass + + def test_training(self): + pass + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_determinism(self): + super().test_determinism() + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_outputs_equivalence(self): + super().test_outputs_equivalence() + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_from_pretrained_save_pretrained(self): + super().test_from_pretrained_save_pretrained() + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_model_from_config(self): + super().test_model_from_config() + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_output(self): + super().test_output() + + def prepare_init_args_and_inputs_for_common(self): + init_dict = { + "block_out_channels": (32, 64, 128, 256), + "in_channels": 14, + "out_channels": 14, + "time_embedding_type": "positional", + "use_timestep_embedding": True, + "flip_sin_to_cos": False, + "freq_shift": 1.0, + "out_block_type": "OutConv1DBlock", + "mid_block_type": "MidResTemporalBlock1D", + "down_block_types": ("DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"), + "up_block_types": ("UpResnetBlock1D", "UpResnetBlock1D", "UpResnetBlock1D"), + "act_fn": "mish", + } + inputs_dict = self.dummy_input + return init_dict, inputs_dict + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_from_pretrained_hub(self): + model, loading_info = UNet1DModel.from_pretrained( + "bglick13/hopper-medium-v2-value-function-hor32", output_loading_info=True, subfolder="unet" + ) + self.assertIsNotNone(model) + self.assertEqual(len(loading_info["missing_keys"]), 0) + + model.to(torch_device) + image = model(**self.dummy_input) + + assert image is not None, "Make sure output is not None" + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_output_pretrained(self): + model = UNet1DModel.from_pretrained("bglick13/hopper-medium-v2-value-function-hor32", subfolder="unet") + torch.manual_seed(0) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(0) + + num_features = model.in_channels + seq_len = 16 + noise = torch.randn((1, seq_len, num_features)).permute( + 0, 2, 1 + ) # match original, we can update values and remove + time_step = torch.full((num_features,), 0) + + with torch.no_grad(): + output = model(noise, time_step).sample.permute(0, 2, 1) + + output_slice = output[0, -3:, -3:].flatten() + # fmt: off + expected_output_slice = torch.tensor([-2.137172, 1.1426016, 0.3688687, -0.766922, 0.7303146, 0.11038864, -0.4760633, 0.13270172, 0.02591348]) + # fmt: on + self.assertTrue(torch.allclose(output_slice, expected_output_slice, rtol=1e-3)) + + def test_forward_with_norm_groups(self): + # Not implemented yet for this UNet + pass + @slow def test_unet_1d_maestro(self): model_id = "harmonai/maestro-150k" @@ -43,3 +150,127 @@ class UnetModel1DTests(unittest.TestCase): assert (output_sum - 224.0896).abs() < 4e-2 assert (output_max - 0.0607).abs() < 4e-4 + + +class UNetRLModelTests(ModelTesterMixin, unittest.TestCase): + model_class = UNet1DModel + + @property + def dummy_input(self): + batch_size = 4 + num_features = 14 + seq_len = 16 + + noise = floats_tensor((batch_size, num_features, seq_len)).to(torch_device) + time_step = torch.tensor([10] * batch_size).to(torch_device) + + return {"sample": noise, "timestep": time_step} + + @property + def input_shape(self): + return (4, 14, 16) + + @property + def output_shape(self): + return (4, 14, 1) + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_determinism(self): + super().test_determinism() + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_outputs_equivalence(self): + super().test_outputs_equivalence() + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_from_pretrained_save_pretrained(self): + super().test_from_pretrained_save_pretrained() + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_model_from_config(self): + super().test_model_from_config() + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_output(self): + # UNetRL is a value-function is different output shape + init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() + model = self.model_class(**init_dict) + model.to(torch_device) + model.eval() + + with torch.no_grad(): + output = model(**inputs_dict) + + if isinstance(output, dict): + output = output.sample + + self.assertIsNotNone(output) + expected_shape = torch.Size((inputs_dict["sample"].shape[0], 1)) + self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match") + + def test_ema_training(self): + pass + + def test_training(self): + pass + + def prepare_init_args_and_inputs_for_common(self): + init_dict = { + "in_channels": 14, + "out_channels": 14, + "down_block_types": ["DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D", "DownResnetBlock1D"], + "up_block_types": [], + "out_block_type": "ValueFunction", + "mid_block_type": "ValueFunctionMidBlock1D", + "block_out_channels": [32, 64, 128, 256], + "layers_per_block": 1, + "downsample_each_block": True, + "use_timestep_embedding": True, + "freq_shift": 1.0, + "flip_sin_to_cos": False, + "time_embedding_type": "positional", + "act_fn": "mish", + } + inputs_dict = self.dummy_input + return init_dict, inputs_dict + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_from_pretrained_hub(self): + value_function, vf_loading_info = UNet1DModel.from_pretrained( + "bglick13/hopper-medium-v2-value-function-hor32", output_loading_info=True, subfolder="value_function" + ) + self.assertIsNotNone(value_function) + self.assertEqual(len(vf_loading_info["missing_keys"]), 0) + + value_function.to(torch_device) + image = value_function(**self.dummy_input) + + assert image is not None, "Make sure output is not None" + + @unittest.skipIf(torch_device == "mps", "mish op not supported in MPS") + def test_output_pretrained(self): + value_function, vf_loading_info = UNet1DModel.from_pretrained( + "bglick13/hopper-medium-v2-value-function-hor32", output_loading_info=True, subfolder="value_function" + ) + torch.manual_seed(0) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(0) + + num_features = value_function.in_channels + seq_len = 14 + noise = torch.randn((1, seq_len, num_features)).permute( + 0, 2, 1 + ) # match original, we can update values and remove + time_step = torch.full((num_features,), 0) + + with torch.no_grad(): + output = value_function(noise, time_step).sample + + # fmt: off + expected_output_slice = torch.tensor([165.25] * seq_len) + # fmt: on + self.assertTrue(torch.allclose(output, expected_output_slice, rtol=1e-3)) + + def test_forward_with_norm_groups(self): + # Not implemented yet for this UNet + pass diff --git a/tests/pipelines/dance_diffusion/test_dance_diffusion.py b/tests/pipelines/dance_diffusion/test_dance_diffusion.py index 72e67e4479..a63ef84c63 100644 --- a/tests/pipelines/dance_diffusion/test_dance_diffusion.py +++ b/tests/pipelines/dance_diffusion/test_dance_diffusion.py @@ -44,6 +44,10 @@ class PipelineFastTests(unittest.TestCase): sample_rate=16_000, in_channels=2, out_channels=2, + flip_sin_to_cos=True, + use_timestep_embedding=False, + time_embedding_type="fourier", + mid_block_type="UNetMidBlock1D", down_block_types=["DownBlock1DNoSkip"] + ["DownBlock1D"] + ["AttnDownBlock1D"], up_block_types=["AttnUpBlock1D"] + ["UpBlock1D"] + ["UpBlock1DNoSkip"], ) From 57525bb41879d42fa8a0379b0c800011e5802277 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 14 Nov 2022 23:54:09 +0200 Subject: [PATCH 88/88] Fix documentation typo for `UNet2DModel` and `UNet2DConditionModel` (#1275) * Fix documentation typo * Fix other typo --- src/diffusers/models/unet_2d.py | 2 +- src/diffusers/models/unet_2d_condition.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/models/unet_2d.py b/src/diffusers/models/unet_2d.py index 641c253c86..0432405760 100644 --- a/src/diffusers/models/unet_2d.py +++ b/src/diffusers/models/unet_2d.py @@ -51,7 +51,7 @@ class UNet2DModel(ModelMixin, ConfigMixin): time_embedding_type (`str`, *optional*, defaults to `"positional"`): Type of time embedding to use. freq_shift (`int`, *optional*, defaults to 0): Frequency shift for fourier time embedding. flip_sin_to_cos (`bool`, *optional*, defaults to : - obj:`False`): Whether to flip sin to cos for fourier time embedding. + obj:`True`): Whether to flip sin to cos for fourier time embedding. down_block_types (`Tuple[str]`, *optional*, defaults to : obj:`("DownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D")`): Tuple of downsample block types. diff --git a/src/diffusers/models/unet_2d_condition.py b/src/diffusers/models/unet_2d_condition.py index 7f7f3ecd44..becae75683 100644 --- a/src/diffusers/models/unet_2d_condition.py +++ b/src/diffusers/models/unet_2d_condition.py @@ -60,7 +60,7 @@ class UNet2DConditionModel(ModelMixin, ConfigMixin): in_channels (`int`, *optional*, defaults to 4): The number of channels in the input sample. out_channels (`int`, *optional*, defaults to 4): The number of channels in the output. center_input_sample (`bool`, *optional*, defaults to `False`): Whether to center the input sample. - flip_sin_to_cos (`bool`, *optional*, defaults to `False`): + flip_sin_to_cos (`bool`, *optional*, defaults to `True`): Whether to flip the sin to cos in the time embedding. freq_shift (`int`, *optional*, defaults to 0): The frequency shift to apply to the time embedding. down_block_types (`Tuple[str]`, *optional*, defaults to `("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")`):