From 95ea538c7969b74f1da8971dfd3bfe3e794c96cc Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Wed, 21 Jun 2023 07:23:18 -1000 Subject: [PATCH] Add ddpm kandinsky (#3783) * update doc --------- Co-authored-by: yiyixuxu --- docs/source/en/api/pipelines/kandinsky.mdx | 14 ++++++++++++++ .../pipelines/kandinsky/pipeline_kandinsky.py | 9 +++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/docs/source/en/api/pipelines/kandinsky.mdx b/docs/source/en/api/pipelines/kandinsky.mdx index 1cac981098..bf551249ef 100644 --- a/docs/source/en/api/pipelines/kandinsky.mdx +++ b/docs/source/en/api/pipelines/kandinsky.mdx @@ -55,6 +55,20 @@ t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1" t2i_pipe.to("cuda") ``` + + +By default, the text-to-image pipeline use [`DDIMScheduler`], you can change the scheduler to [`DDPMScheduler`] + +```py +scheduler = DDPMScheduler.from_pretrained("kandinsky-community/kandinsky-2-1", subfolder="ddpm_scheduler") +t2i_pipe = DiffusionPipeline.from_pretrained( + "kandinsky-community/kandinsky-2-1", scheduler=scheduler, torch_dtype=torch.float16 +) +t2i_pipe.to("cuda") +``` + + + Now we pass the prompt through the prior to generate image embeddings. The prior returns both the image embeddings corresponding to the prompt and negative/unconditional image embeddings corresponding to an empty string. diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py index 6de9cf4451..7b3537ea68 100644 --- a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py +++ b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky.py @@ -22,7 +22,7 @@ from transformers import ( from ...models import UNet2DConditionModel, VQModel from ...pipelines import DiffusionPipeline from ...pipelines.pipeline_utils import ImagePipelineOutput -from ...schedulers import DDIMScheduler +from ...schedulers import DDIMScheduler, DDPMScheduler from ...utils import ( is_accelerate_available, is_accelerate_version, @@ -88,7 +88,7 @@ class KandinskyPipeline(DiffusionPipeline): Frozen text-encoder. tokenizer ([`XLMRobertaTokenizer`]): Tokenizer of class - scheduler ([`DDIMScheduler`]): + scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]): A scheduler to be used in combination with `unet` to generate image latents. unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the image embedding. @@ -101,7 +101,7 @@ class KandinskyPipeline(DiffusionPipeline): text_encoder: MultilingualCLIP, tokenizer: XLMRobertaTokenizer, unet: UNet2DConditionModel, - scheduler: DDIMScheduler, + scheduler: Union[DDIMScheduler, DDPMScheduler], movq: VQModel, ): super().__init__() @@ -439,9 +439,6 @@ class KandinskyPipeline(DiffusionPipeline): noise_pred, t, latents, - # YiYi notes: only reason this pipeline can't work with unclip scheduler is that can't pass down this argument - # need to use DDPM scheduler instead - # prev_timestep=prev_timestep, generator=generator, ).prev_sample # post-processing