1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-29 07:22:12 +03:00

Fix small bugs in pipeline_unidiffuser.py and convert_from_ckpt.py.

This commit is contained in:
Daniel Gu
2023-04-24 21:14:06 -07:00
parent 48f2c25f36
commit 240940a1ea
2 changed files with 4 additions and 4 deletions

View File

@@ -613,13 +613,13 @@ def convert_caption_decoder_to_diffusers(ckpt, diffusers_model):
def main(args):
# Create corresponding models, hardcoded for now.
vae_config = create_vae_diffusers_config(args)
AutoencoderKL(**vae_config)
vae = AutoencoderKL(**vae_config)
unet_config = create_unidiffuser_unet_config(args)
unet = UniDiffuserModel(**unet_config)
text_decoder_config = create_text_decoder_config(args)
UniDiffuserTextDecoder(**text_decoder_config)
text_decoder = UniDiffuserTextDecoder(**text_decoder_config)
print("Converting VAE checkpoint...")
vae = convert_vae_to_diffusers(args.vae_ckpt, vae)

View File

@@ -151,7 +151,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
self.text_encoder_hidden_size = text_encoder.config.hidden_size
self.image_encoder_hidden_size = image_encoder.config.hidden_size
self.text_intermediate_dim = 0
self.text_intermediate_dim = self.text_encoder_hidden_size
if self.text_decoder.prefix_hidden_dim is not None:
self.text_intermediate_dim = self.text_decoder.prefix_hidden_dim
@@ -934,7 +934,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
mode = self._infer_mode(prompt, prompt_embeds, image, prompt_latents, vae_latents, clip_latents)
batch_size = self._infer_batch_size(mode, prompt, prompt_embeds, image, num_samples)
device = self._execution_device
reduce_text_emb_dim = self.text_intermediate_dim < self.text_encoder_hidden_size or self.mode != "t2i"
reduce_text_emb_dim = self.text_intermediate_dim < self.text_encoder_hidden_size or self.mode != "text2img"
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`