From 9e910c463394aaa5ae31be5f7529d1db79e26749 Mon Sep 17 00:00:00 2001 From: Teriks Date: Mon, 3 Mar 2025 07:30:39 -0600 Subject: [PATCH] Fix SD2.X clip single file load projection_dim (#10770) * Fix SD2.X clip single file load projection_dim Infer projection_dim from the checkpoint before loading from pretrained, override any incorrect hub config. Hub configuration for SD2.X specifies projection_dim=512 which is incorrect for SD2.X checkpoints loaded from civitai and similar. Exception was previously thrown upon attempting to load_model_dict_into_meta for SD2.X single file checkpoints. Such LDM models usually require projection_dim=1024 * convert_open_clip_checkpoint use hidden_size for text_proj_dim * convert_open_clip_checkpoint, revert checkpoint[text_proj_key].shape[1] -> [0] values are identical --------- Co-authored-by: Teriks Co-authored-by: Dhruv Nair --- src/diffusers/loaders/single_file_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 59060efade..cc421d0291 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -1448,8 +1448,8 @@ def convert_open_clip_checkpoint( if text_proj_key in checkpoint: text_proj_dim = int(checkpoint[text_proj_key].shape[0]) - elif hasattr(text_model.config, "projection_dim"): - text_proj_dim = text_model.config.projection_dim + elif hasattr(text_model.config, "hidden_size"): + text_proj_dim = text_model.config.hidden_size else: text_proj_dim = LDM_OPEN_CLIP_TEXT_PROJECTION_DIM