From 2fb9baf934629db718f00d4eaeeddd9dac80dfdd Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Fri, 19 Jan 2024 07:24:19 +0000 Subject: [PATCH] update --- src/diffusers/loaders/autoencoder.py | 222 ------------------ src/diffusers/loaders/controlnet.py | 167 ------------- src/diffusers/loaders/single_file.py | 3 +- src/diffusers/loaders/single_file_utils.py | 34 +-- .../models/autoencoders/autoencoder_kl.py | 4 +- 5 files changed, 16 insertions(+), 414 deletions(-) delete mode 100644 src/diffusers/loaders/autoencoder.py delete mode 100644 src/diffusers/loaders/controlnet.py diff --git a/src/diffusers/loaders/autoencoder.py b/src/diffusers/loaders/autoencoder.py deleted file mode 100644 index 94240c0f4b..0000000000 --- a/src/diffusers/loaders/autoencoder.py +++ /dev/null @@ -1,222 +0,0 @@ -from contextlib import nullcontext -from io import BytesIO -from pathlib import Path - -import requests -import torch -from huggingface_hub import hf_hub_download -from huggingface_hub.utils import validate_hf_hub_args - -from ..utils import ( - is_accelerate_available, - is_transformers_available, - logging, -) -from ..utils.import_utils import BACKENDS_MAPPING - - -if is_transformers_available(): - pass - -if is_accelerate_available(): - from accelerate import init_empty_weights - -logger = logging.get_logger(__name__) - - -class FromOriginalVAEMixin: - """ - Load pretrained ControlNet weights saved in the `.ckpt` or `.safetensors` format into an [`AutoencoderKL`]. - """ - - @classmethod - @validate_hf_hub_args - def from_single_file(cls, pretrained_model_link_or_path, **kwargs): - r""" - Instantiate a [`AutoencoderKL`] from pretrained ControlNet weights saved in the original `.ckpt` or - `.safetensors` format. The pipeline is set in evaluation mode (`model.eval()`) by default. - - Parameters: - pretrained_model_link_or_path (`str` or `os.PathLike`, *optional*): - Can be either: - - A link to the `.ckpt` file (for example - `"https://huggingface.co//blob/main/.ckpt"`) on the Hub. - - A path to a *file* containing all pipeline weights. - torch_dtype (`str` or `torch.dtype`, *optional*): - Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the - dtype is automatically derived from the model's weights. - force_download (`bool`, *optional*, defaults to `False`): - Whether or not to force the (re-)download of the model weights and configuration files, overriding the - cached versions if they exist. - cache_dir (`Union[str, os.PathLike]`, *optional*): - Path to a directory where a downloaded pretrained model configuration is cached if the standard cache - is not used. - resume_download (`bool`, *optional*, defaults to `False`): - Whether or not to resume downloading the model weights and configuration files. If set to `False`, any - incompletely downloaded files are deleted. - proxies (`Dict[str, str]`, *optional*): - A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', - 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. - local_files_only (`bool`, *optional*, defaults to `False`): - Whether to only load local model weights and configuration files or not. If set to True, the model - won't be downloaded from the Hub. - token (`str` or *bool*, *optional*): - The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from - `diffusers-cli login` (stored in `~/.huggingface`) is used. - revision (`str`, *optional*, defaults to `"main"`): - The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier - allowed by Git. - image_size (`int`, *optional*, defaults to 512): - The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable - Diffusion v2 base model. Use 768 for Stable Diffusion v2. - use_safetensors (`bool`, *optional*, defaults to `None`): - If set to `None`, the safetensors weights are downloaded if they're available **and** if the - safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors - weights. If set to `False`, safetensors weights are not loaded. - upcast_attention (`bool`, *optional*, defaults to `None`): - Whether the attention computation should always be upcasted. - scaling_factor (`float`, *optional*, defaults to 0.18215): - The component-wise standard deviation of the trained latent space computed using the first batch of the - training set. This is used to scale the latent space to have unit variance when training the diffusion - model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the - diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z - = 1 / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution - Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper. - kwargs (remaining dictionary of keyword arguments, *optional*): - Can be used to overwrite load and saveable variables (for example the pipeline components of the - specific pipeline class). The overwritten components are directly passed to the pipelines `__init__` - method. See example below for more information. - - - - Make sure to pass both `image_size` and `scaling_factor` to `from_single_file()` if you're loading - a VAE from SDXL or a Stable Diffusion v2 model or higher. - - - - Examples: - - ```py - from diffusers import AutoencoderKL - - url = "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors" # can also be local file - model = AutoencoderKL.from_single_file(url) - ``` - """ - if not is_omegaconf_available(): - raise ValueError(BACKENDS_MAPPING["omegaconf"][1]) - - from omegaconf import OmegaConf - - from ..models import AutoencoderKL - - # import here to avoid circular dependency - from ..pipelines.stable_diffusion.convert_from_ckpt import ( - convert_ldm_vae_checkpoint, - create_vae_diffusers_config, - ) - - config_file = kwargs.pop("config_file", None) - cache_dir = kwargs.pop("cache_dir", None) - resume_download = kwargs.pop("resume_download", False) - force_download = kwargs.pop("force_download", False) - proxies = kwargs.pop("proxies", None) - local_files_only = kwargs.pop("local_files_only", None) - token = kwargs.pop("token", None) - revision = kwargs.pop("revision", None) - image_size = kwargs.pop("image_size", None) - scaling_factor = kwargs.pop("scaling_factor", None) - kwargs.pop("upcast_attention", None) - - torch_dtype = kwargs.pop("torch_dtype", None) - - use_safetensors = kwargs.pop("use_safetensors", None) - - file_extension = pretrained_model_link_or_path.rsplit(".", 1)[-1] - from_safetensors = file_extension == "safetensors" - - if from_safetensors and use_safetensors is False: - raise ValueError("Make sure to install `safetensors` with `pip install safetensors`.") - - # remove huggingface url - for prefix in ["https://huggingface.co/", "huggingface.co/", "hf.co/", "https://hf.co/"]: - if pretrained_model_link_or_path.startswith(prefix): - pretrained_model_link_or_path = pretrained_model_link_or_path[len(prefix) :] - - # Code based on diffusers.pipelines.pipeline_utils.DiffusionPipeline.from_pretrained - ckpt_path = Path(pretrained_model_link_or_path) - if not ckpt_path.is_file(): - # get repo_id and (potentially nested) file path of ckpt in repo - repo_id = "/".join(ckpt_path.parts[:2]) - file_path = "/".join(ckpt_path.parts[2:]) - - if file_path.startswith("blob/"): - file_path = file_path[len("blob/") :] - - if file_path.startswith("main/"): - file_path = file_path[len("main/") :] - - pretrained_model_link_or_path = hf_hub_download( - repo_id, - filename=file_path, - cache_dir=cache_dir, - resume_download=resume_download, - proxies=proxies, - local_files_only=local_files_only, - token=token, - revision=revision, - force_download=force_download, - ) - - if from_safetensors: - from safetensors import safe_open - - checkpoint = {} - with safe_open(pretrained_model_link_or_path, framework="pt", device="cpu") as f: - for key in f.keys(): - checkpoint[key] = f.get_tensor(key) - else: - checkpoint = torch.load(pretrained_model_link_or_path, map_location="cpu") - - if "state_dict" in checkpoint: - checkpoint = checkpoint["state_dict"] - - if config_file is None: - config_url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml" - config_file = BytesIO(requests.get(config_url).content) - - original_config = OmegaConf.load(config_file) - - # default to sd-v1-5 - image_size = image_size or 512 - - vae_config = create_vae_diffusers_config(original_config, image_size=image_size) - converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config) - - if scaling_factor is None: - if ( - "model" in original_config - and "params" in original_config.model - and "scale_factor" in original_config.model.params - ): - vae_scaling_factor = original_config.model.params.scale_factor - else: - vae_scaling_factor = 0.18215 # default SD scaling factor - - vae_config["scaling_factor"] = vae_scaling_factor - - ctx = init_empty_weights if is_accelerate_available() else nullcontext - with ctx(): - vae = AutoencoderKL(**vae_config) - - if is_accelerate_available(): - from ..models.modeling_utils import load_model_dict_into_meta - - load_model_dict_into_meta(vae, converted_vae_checkpoint, device="cpu") - else: - vae.load_state_dict(converted_vae_checkpoint) - - if torch_dtype is not None: - vae.to(dtype=torch_dtype) - - return vae diff --git a/src/diffusers/loaders/controlnet.py b/src/diffusers/loaders/controlnet.py deleted file mode 100644 index 4f709d75be..0000000000 --- a/src/diffusers/loaders/controlnet.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from io import BytesIO -from pathlib import Path - -import requests -from huggingface_hub import hf_hub_download -from huggingface_hub.utils import validate_hf_hub_args - - -class FromOriginalControlnetMixin: - """ - Load pretrained ControlNet weights saved in the `.ckpt` or `.safetensors` format into a [`ControlNetModel`]. - """ - - @classmethod - @validate_hf_hub_args - def from_single_file(cls, pretrained_model_link_or_path, **kwargs): - r""" - Instantiate a [`ControlNetModel`] from pretrained ControlNet weights saved in the original `.ckpt` or - `.safetensors` format. The pipeline is set in evaluation mode (`model.eval()`) by default. - - Parameters: - pretrained_model_link_or_path (`str` or `os.PathLike`, *optional*): - Can be either: - - A link to the `.ckpt` file (for example - `"https://huggingface.co//blob/main/.ckpt"`) on the Hub. - - A path to a *file* containing all pipeline weights. - torch_dtype (`str` or `torch.dtype`, *optional*): - Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the - dtype is automatically derived from the model's weights. - force_download (`bool`, *optional*, defaults to `False`): - Whether or not to force the (re-)download of the model weights and configuration files, overriding the - cached versions if they exist. - cache_dir (`Union[str, os.PathLike]`, *optional*): - Path to a directory where a downloaded pretrained model configuration is cached if the standard cache - is not used. - resume_download (`bool`, *optional*, defaults to `False`): - Whether or not to resume downloading the model weights and configuration files. If set to `False`, any - incompletely downloaded files are deleted. - proxies (`Dict[str, str]`, *optional*): - A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128', - 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. - local_files_only (`bool`, *optional*, defaults to `False`): - Whether to only load local model weights and configuration files or not. If set to True, the model - won't be downloaded from the Hub. - token (`str` or *bool*, *optional*): - The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from - `diffusers-cli login` (stored in `~/.huggingface`) is used. - revision (`str`, *optional*, defaults to `"main"`): - The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier - allowed by Git. - use_safetensors (`bool`, *optional*, defaults to `None`): - If set to `None`, the safetensors weights are downloaded if they're available **and** if the - safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors - weights. If set to `False`, safetensors weights are not loaded. - image_size (`int`, *optional*, defaults to 512): - The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable - Diffusion v2 base model. Use 768 for Stable Diffusion v2. - upcast_attention (`bool`, *optional*, defaults to `None`): - Whether the attention computation should always be upcasted. - kwargs (remaining dictionary of keyword arguments, *optional*): - Can be used to overwrite load and saveable variables (for example the pipeline components of the - specific pipeline class). The overwritten components are directly passed to the pipelines `__init__` - method. See example below for more information. - - Examples: - - ```py - from diffusers import StableDiffusionControlNetPipeline, ControlNetModel - - url = "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" # can also be a local path - model = ControlNetModel.from_single_file(url) - - url = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned.safetensors" # can also be a local path - pipe = StableDiffusionControlNetPipeline.from_single_file(url, controlnet=controlnet) - ``` - """ - # import here to avoid circular dependency - from ..pipelines.stable_diffusion.convert_from_ckpt import download_controlnet_from_original_ckpt - - config_file = kwargs.pop("config_file", None) - cache_dir = kwargs.pop("cache_dir", None) - resume_download = kwargs.pop("resume_download", False) - force_download = kwargs.pop("force_download", False) - proxies = kwargs.pop("proxies", None) - local_files_only = kwargs.pop("local_files_only", None) - token = kwargs.pop("token", None) - num_in_channels = kwargs.pop("num_in_channels", None) - use_linear_projection = kwargs.pop("use_linear_projection", None) - revision = kwargs.pop("revision", None) - extract_ema = kwargs.pop("extract_ema", False) - image_size = kwargs.pop("image_size", None) - upcast_attention = kwargs.pop("upcast_attention", None) - - torch_dtype = kwargs.pop("torch_dtype", None) - - use_safetensors = kwargs.pop("use_safetensors", None) - - file_extension = pretrained_model_link_or_path.rsplit(".", 1)[-1] - from_safetensors = file_extension == "safetensors" - - if from_safetensors and use_safetensors is False: - raise ValueError("Make sure to install `safetensors` with `pip install safetensors`.") - - # remove huggingface url - for prefix in ["https://huggingface.co/", "huggingface.co/", "hf.co/", "https://hf.co/"]: - if pretrained_model_link_or_path.startswith(prefix): - pretrained_model_link_or_path = pretrained_model_link_or_path[len(prefix) :] - - # Code based on diffusers.pipelines.pipeline_utils.DiffusionPipeline.from_pretrained - ckpt_path = Path(pretrained_model_link_or_path) - if not ckpt_path.is_file(): - # get repo_id and (potentially nested) file path of ckpt in repo - repo_id = "/".join(ckpt_path.parts[:2]) - file_path = "/".join(ckpt_path.parts[2:]) - - if file_path.startswith("blob/"): - file_path = file_path[len("blob/") :] - - if file_path.startswith("main/"): - file_path = file_path[len("main/") :] - - pretrained_model_link_or_path = hf_hub_download( - repo_id, - filename=file_path, - cache_dir=cache_dir, - resume_download=resume_download, - proxies=proxies, - local_files_only=local_files_only, - token=token, - revision=revision, - force_download=force_download, - ) - - if config_file is None: - config_url = "https://raw.githubusercontent.com/lllyasviel/ControlNet/main/models/cldm_v15.yaml" - config_file = BytesIO(requests.get(config_url).content) - - image_size = image_size or 512 - - controlnet = download_controlnet_from_original_ckpt( - pretrained_model_link_or_path, - original_config_file=config_file, - image_size=image_size, - extract_ema=extract_ema, - num_in_channels=num_in_channels, - upcast_attention=upcast_attention, - from_safetensors=from_safetensors, - use_linear_projection=use_linear_projection, - ) - - if torch_dtype is not None: - controlnet.to(dtype=torch_dtype) - - return controlnet diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index 1896440b6c..d07b58bac5 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -231,7 +231,6 @@ class FromSingleFileMixin: ``` """ original_config_file = kwargs.pop("original_config_file", None) - config_files = kwargs.pop("config_files", None) resume_download = kwargs.pop("resume_download", False) force_download = kwargs.pop("force_download", False) proxies = kwargs.pop("proxies", None) @@ -270,7 +269,7 @@ class FromSingleFileMixin: while "state_dict" in checkpoint: checkpoint = checkpoint["state_dict"] - original_config = fetch_original_config(class_name, checkpoint, original_config_file, config_files) + original_config = fetch_original_config(class_name, checkpoint, original_config_file) if class_name == "AutoencoderKL": image_size = kwargs.pop("image_size", None) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index a35ec5b390..3ab0aa716f 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -14,6 +14,8 @@ # limitations under the License. """ Conversion script for the Stable Diffusion checkpoints.""" +import os +import re from contextlib import nullcontext from io import BytesIO @@ -188,7 +190,7 @@ SD_2_TEXT_ENCODER_KEYS_TO_IGNORE = [ ] -def fetch_original_config_file_from_url(class_name, checkpoint): +def infer_original_config_file(class_name, checkpoint): if CHECKPOINT_KEY_NAMES["v2"] in checkpoint and checkpoint[CHECKPOINT_KEY_NAMES["v2"]].shape[-1] == 1024: config_url = CONFIG_URLS["v2"] @@ -212,30 +214,20 @@ def fetch_original_config_file_from_url(class_name, checkpoint): return original_config_file -def fetch_original_config_file_from_file(config_files: list): - if "v2" in config_files: - return config_files["v2"] +def fetch_original_config(pipeline_class_name, checkpoint, original_config_file=None): - elif "xl" in config_files: - return config_files["xl"] + def is_valid_url(url): + pattern = r'^(http|https):\/\/([\w.-]+)(\.[\w.-]+)+([\/\w\.-]*)*\/?$' + return bool(re.match(pattern, url)) - elif "xl_refiner" in config_files: - return config_files["xl_refiner"] + if os.path.isfile(original_config_file): + with open(original_config_file, "r") as fp: + original_config_file = fp.read() + elif is_valid_url(original_config_file): + original_config_file = BytesIO(requests.get(original_config_file).content) else: - return config_files["v1"] - - -def fetch_original_config(pipeline_class_name, checkpoint, original_config_file=None, config_files=None): - if original_config_file: - original_config = yaml.safe_load(original_config_file) - return original_config - - elif config_files: - original_config_file = fetch_original_config_file_from_file(config_files) - - else: - original_config_file = fetch_original_config_file_from_url(pipeline_class_name, checkpoint) + original_config_file = infer_original_config_file(pipeline_class_name, checkpoint) original_config = yaml.safe_load(original_config_file) diff --git a/src/diffusers/models/autoencoders/autoencoder_kl.py b/src/diffusers/models/autoencoders/autoencoder_kl.py index 10a3ae58de..92d12a220f 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl.py @@ -17,7 +17,7 @@ import torch import torch.nn as nn from ...configuration_utils import ConfigMixin, register_to_config -from ...loaders import FromOriginalVAEMixin +from ...loaders import FromSingleFileMixin from ...utils.accelerate_utils import apply_forward_hook from ..attention_processor import ( ADDED_KV_ATTENTION_PROCESSORS, @@ -32,7 +32,7 @@ from ..modeling_utils import ModelMixin from .vae import Decoder, DecoderOutput, DiagonalGaussianDistribution, Encoder -class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalVAEMixin): +class AutoencoderKL(ModelMixin, ConfigMixin, FromSingleFileMixin): r""" A VAE model with KL loss for encoding images into latents and decoding latent representations into images.