From b69cddb0550cbab718ec5fe956ce47b6891ed087 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Tue, 2 Jan 2024 11:44:25 +0000 Subject: [PATCH] update --- src/diffusers/loaders/single_file_utils.py | 118 +++------------------ 1 file changed, 15 insertions(+), 103 deletions(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index f6e94ebc25..4b7f4cafac 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -24,7 +24,6 @@ import torch from omegaconf import OmegaConf from safetensors.torch import load_file as safe_load from transformers import ( - AutoFeatureExtractor, BertTokenizerFast, CLIPImageProcessor, CLIPTextConfig, @@ -35,7 +34,7 @@ from transformers import ( CLIPVisionModelWithProjection, ) -from ..models import AutoencoderKL, ControlNetModel, PriorTransformer, UNet2DConditionModel +from ..models import AutoencoderKL, PriorTransformer, UNet2DConditionModel from ..pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel from ..pipelines.paint_by_example import PaintByExampleImageEncoder from ..pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer @@ -240,6 +239,7 @@ def determine_image_size(pipeline_class_name, original_config, checkpoint, **kwa return image_size +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.shave_segments def shave_segments(path, n_shave_prefix_segments=1): """ Removes segments. Positive values shave the first segments, negative shave the last segments. @@ -250,6 +250,7 @@ def shave_segments(path, n_shave_prefix_segments=1): return ".".join(path.split(".")[:n_shave_prefix_segments]) +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.renew_resnet_paths def renew_resnet_paths(old_list, n_shave_prefix_segments=0): """ Updates paths inside resnets to the new naming scheme (local renaming) @@ -272,6 +273,7 @@ def renew_resnet_paths(old_list, n_shave_prefix_segments=0): return mapping +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.renew_vae_resnet_paths def renew_vae_resnet_paths(old_list, n_shave_prefix_segments=0): """ Updates paths inside resnets to the new naming scheme (local renaming) @@ -288,6 +290,7 @@ def renew_vae_resnet_paths(old_list, n_shave_prefix_segments=0): return mapping +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.renew_attention_paths def renew_attention_paths(old_list, n_shave_prefix_segments=0): """ Updates paths inside attentions to the new naming scheme (local renaming) @@ -309,6 +312,7 @@ def renew_attention_paths(old_list, n_shave_prefix_segments=0): return mapping +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.renew_vae_attention_paths def renew_vae_attention_paths(old_list, n_shave_prefix_segments=0): """ Updates paths inside attentions to the new naming scheme (local renaming) @@ -339,6 +343,7 @@ def renew_vae_attention_paths(old_list, n_shave_prefix_segments=0): return mapping +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.assign_to_checkpoint def assign_to_checkpoint( paths, checkpoint, old_checkpoint, attention_paths_to_split=None, additional_replacements=None, config=None ): @@ -394,6 +399,7 @@ def assign_to_checkpoint( checkpoint[new_path] = old_checkpoint[path["old"]] +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.conv_attn_to_linear def conv_attn_to_linear(checkpoint): keys = list(checkpoint.keys()) attn_keys = ["query.weight", "key.weight", "value.weight"] @@ -406,6 +412,7 @@ def conv_attn_to_linear(checkpoint): checkpoint[key] = checkpoint[key][:, :, 0] +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.create_unet_diffusers_config def create_unet_diffusers_config(original_config, image_size: int, controlnet=False): """ Creates a config for the diffusers based on the config of the LDM model. @@ -510,6 +517,7 @@ def create_unet_diffusers_config(original_config, image_size: int, controlnet=Fa return config +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.create_vae_diffusers_config def create_vae_diffusers_config(original_config, image_size: int): """ Creates a config for the diffusers based on the config of the LDM model. @@ -534,6 +542,7 @@ def create_vae_diffusers_config(original_config, image_size: int): return config +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.convert_ldm_unet_checkpoint def convert_ldm_unet_checkpoint( checkpoint, config, path=None, extract_ema=False, controlnet=False, skip_extract_state_dict=False ): @@ -782,6 +791,7 @@ def convert_ldm_unet_checkpoint( return new_checkpoint +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.convert_ldm_vae_checkpoint def convert_ldm_vae_checkpoint(checkpoint, config): # extract state dict for VAE vae_state_dict = {} @@ -889,6 +899,7 @@ def convert_ldm_vae_checkpoint(checkpoint, config): return new_checkpoint +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.convert_ldm_bert_checkpoint def convert_ldm_bert_checkpoint(checkpoint, config): def _copy_attn_layer(hf_attn_layer, pt_attn_layer): hf_attn_layer.q_proj.weight.data = pt_attn_layer.to_q.weight @@ -939,6 +950,7 @@ def convert_ldm_bert_checkpoint(checkpoint, config): return hf_model +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.convert_ldm_clip_checkpoint def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder=None): if text_encoder is None: config_name = "openai/clip-vit-large-patch14" @@ -978,56 +990,7 @@ def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder return text_model -def convert_controlnet_checkpoint( - checkpoint, - original_config, - checkpoint_path, - image_size, - upcast_attention, - extract_ema, - use_linear_projection=None, - cross_attention_dim=None, -): - ctrlnet_config = create_unet_diffusers_config(original_config, image_size=image_size, controlnet=True) - ctrlnet_config["upcast_attention"] = upcast_attention - - ctrlnet_config.pop("sample_size") - - if use_linear_projection is not None: - ctrlnet_config["use_linear_projection"] = use_linear_projection - - if cross_attention_dim is not None: - ctrlnet_config["cross_attention_dim"] = cross_attention_dim - - ctx = init_empty_weights if is_accelerate_available() else nullcontext - with ctx(): - controlnet = ControlNetModel(**ctrlnet_config) - - # Some controlnet ckpt files are distributed independently from the rest of the - # model components i.e. https://huggingface.co/thibaud/controlnet-sd21/ - if "time_embed.0.weight" in checkpoint: - skip_extract_state_dict = True - else: - skip_extract_state_dict = False - - converted_ctrl_checkpoint = convert_ldm_unet_checkpoint( - checkpoint, - ctrlnet_config, - path=checkpoint_path, - extract_ema=extract_ema, - controlnet=True, - skip_extract_state_dict=skip_extract_state_dict, - ) - - if is_accelerate_available(): - for param_name, param in converted_ctrl_checkpoint.items(): - set_module_tensor_to_device(controlnet, param_name, "cpu", value=param) - else: - controlnet.load_state_dict(converted_ctrl_checkpoint) - - return controlnet - - +# Copied from diffusers.pipelines.stable_diffusion.convert_from_ckpt.convert_open_clip_checkpoint def convert_open_clip_checkpoint( checkpoint, config_name, @@ -1312,28 +1275,6 @@ def create_unet_model(pipeline_class_name, original_config, checkpoint, checkpoi return {"unet": unet} -def create_controlnet_model( - pipeline_class_name, original_config, checkpoint, checkpoint_path_or_dict, image_size, **kwargs -): - if "control_stage_config" not in original_config.model.params: - raise ValueError("Config does not have controlnet information") - - path = checkpoint_path_or_dict if isinstance(checkpoint_path_or_dict, str) else "" - extract_ema = kwargs.get("extract_ema", False) - upcast_attention = kwargs.get("upcast_attention", False) - - controlnet = convert_controlnet_checkpoint( - checkpoint, - original_config, - path, - image_size, - upcast_attention, - extract_ema, - ) - - return {"controlnet": controlnet} - - def create_vae_model(pipeline_class_name, original_config, checkpoint, checkpoint_path_or_dict, **kwargs): image_size = determine_image_size(pipeline_class_name, original_config, checkpoint, **kwargs) @@ -1601,32 +1542,3 @@ def create_stable_unclip_components( } return - - -def create_paint_by_example_components( - pipeline_class_name, original_config, checkpoint, checkpoint_path_or_dict, **kwargs -): - local_files_only = kwargs.get("local_files_only", False) - image_encoder = convert_paint_by_example_checkpoint(checkpoint) - - try: - config_name = "openai/clip-vit-large-patch14" - tokenizer = CLIPTokenizer.from_pretrained(config_name, local_files_only=local_files_only) - except Exception: - raise ValueError( - f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'openai/clip-vit-large-patch14'." - ) - - try: - config_name = "CompVis/stable-diffusion-safety-checker" - feature_extractor = AutoFeatureExtractor.from_pretrained(config_name, local_files_only=local_files_only) - except Exception: - raise ValueError( - f"With local_files_only set to {local_files_only}, you must first locally save the feature_extractor in the following path: 'CompVis/stable-diffusion-safety-checker'." - ) - - return { - "image_encoder": image_encoder, - "tokenizer": tokenizer, - "feature_extractor": feature_extractor, - }