From 44091d8b2aa9fc3e958f9340c16285dcc1bee090 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Wed, 7 Sep 2022 18:34:34 +0200 Subject: [PATCH] Score sde ve doc (#400) * initial score_sde_ve docs * fixed typo * fix VE term --- docs/source/api/pipelines/score_sde_ve.mdx | 26 ++++++++++++++++++- docs/source/training/text_inversion.mdx | 2 +- examples/textual_inversion/README.md | 2 +- .../score_sde_ve/pipeline_score_sde_ve.py | 22 +++++++++++++++- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/docs/source/api/pipelines/score_sde_ve.mdx b/docs/source/api/pipelines/score_sde_ve.mdx index 330b11f6a8..b4bda54ff0 100644 --- a/docs/source/api/pipelines/score_sde_ve.mdx +++ b/docs/source/api/pipelines/score_sde_ve.mdx @@ -1 +1,25 @@ -# GLIDE MODEL \ No newline at end of file +# Score SDE VE + +## Overview + +[Score-Based Generative Modeling through Stochastic Differential Equations](https://arxiv.org/abs/2011.13456) (Score SDE) by Yang Song, Jascha Sohl-Dickstein, Diederik P. Kingma, Abhishek Kumar, Stefano Ermon and Ben Poole. + +The abstract of the paper is the following: + +Creating noise from data is easy; creating data from noise is generative modeling. We present a stochastic differential equation (SDE) that smoothly transforms a complex data distribution to a known prior distribution by slowly injecting noise, and a corresponding reverse-time SDE that transforms the prior distribution back into the data distribution by slowly removing the noise. Crucially, the reverse-time SDE depends only on the time-dependent gradient field (\aka, score) of the perturbed data distribution. By leveraging advances in score-based generative modeling, we can accurately estimate these scores with neural networks, and use numerical SDE solvers to generate samples. We show that this framework encapsulates previous approaches in score-based generative modeling and diffusion probabilistic modeling, allowing for new sampling procedures and new modeling capabilities. In particular, we introduce a predictor-corrector framework to correct errors in the evolution of the discretized reverse-time SDE. We also derive an equivalent neural ODE that samples from the same distribution as the SDE, but additionally enables exact likelihood computation, and improved sampling efficiency. In addition, we provide a new way to solve inverse problems with score-based models, as demonstrated with experiments on class-conditional generation, image inpainting, and colorization. Combined with multiple architectural improvements, we achieve record-breaking performance for unconditional image generation on CIFAR-10 with an Inception score of 9.89 and FID of 2.20, a competitive likelihood of 2.99 bits/dim, and demonstrate high fidelity generation of 1024 x 1024 images for the first time from a score-based generative model. + +The original codebase can be found [here](https://github.com/yang-song/score_sde_pytorch). + +This pipeline implements the Variance Expanding (VE) variant of the method. + +## Available Pipelines: + +| Pipeline | Tasks | Colab +|---|---|:---:| +| [pipeline_score_sde_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py) | *Unconditional Image Generation* | - | + +## API + +[[autodoc]] pipelines.score_sde_ve.pipeline_score_sde_ve.ScoreSdeVePipeline + - __call__ + diff --git a/docs/source/training/text_inversion.mdx b/docs/source/training/text_inversion.mdx index f9b9a98792..107cd706f4 100644 --- a/docs/source/training/text_inversion.mdx +++ b/docs/source/training/text_inversion.mdx @@ -113,7 +113,7 @@ from torch import autocast from diffusers import StableDiffusionPipeline model_id = "path-to-your-trained-model" -pipe = pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") +pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") prompt = "A backpack" diff --git a/examples/textual_inversion/README.md b/examples/textual_inversion/README.md index b731797ecb..ad3b405d0f 100644 --- a/examples/textual_inversion/README.md +++ b/examples/textual_inversion/README.md @@ -79,7 +79,7 @@ from torch import autocast from diffusers import StableDiffusionPipeline model_id = "path-to-your-trained-model" -pipe = pipe = StableDiffusionPipeline.from_pretrained(model_id,torch_dtype=torch.float16).to("cuda") +pipe = StableDiffusionPipeline.from_pretrained(model_id,torch_dtype=torch.float16).to("cuda") prompt = "A backpack" diff --git a/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py b/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py index 088f314114..1f5cfe763e 100644 --- a/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py +++ b/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py @@ -10,7 +10,13 @@ from ...schedulers import ScoreSdeVeScheduler class ScoreSdeVePipeline(DiffusionPipeline): - + r""" + Parameters: + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. scheduler ([`SchedulerMixin`]): + The [`ScoreSdeVeScheduler`] scheduler to be used in combination with `unet` to denoise the encoded image. + """ unet: UNet2DModel scheduler: ScoreSdeVeScheduler @@ -28,6 +34,20 @@ class ScoreSdeVePipeline(DiffusionPipeline): return_dict: bool = True, **kwargs, ) -> Union[ImagePipelineOutput, Tuple]: + r""" + Args: + batch_size (:obj:`int`, *optional*, defaults to 1): + The number of images to generate. + generator (:obj:`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + output_type (:obj:`str`, *optional*, defaults to :obj:`"pil"`): + The output format of the generate image. Choose between + [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`. + return_dict (:obj:`bool`, *optional*, defaults to :obj:`True`): + Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. + """ + if "torch_device" in kwargs: device = kwargs.pop("torch_device") warnings.warn(