1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-29 07:22:12 +03:00

[docs] add doc for PixArtSigmaPipeline (#7857)

* 1. add doc for PixArtSigmaPipeline;

---------

Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
Co-authored-by: Guillaume LEGENDRE <glegendre01@gmail.com>
Co-authored-by: Álvaro Somoza <asomoza@users.noreply.github.com>
Co-authored-by: Bagheera <59658056+bghira@users.noreply.github.com>
Co-authored-by: bghira <bghira@users.github.com>
Co-authored-by: Hyoungwon Cho <jhw9811@korea.ac.kr>
Co-authored-by: yiyixuxu <yixu310@gmail.com>
Co-authored-by: Tolga Cangöz <46008593+standardAI@users.noreply.github.com>
Co-authored-by: Philip Pham <phillypham@google.com>
This commit is contained in:
Junsong Chen
2024-05-21 00:40:57 +08:00
committed by GitHub
parent 19df9f3ec0
commit 0f0defdb65
4 changed files with 160 additions and 7 deletions

View File

@@ -23,7 +23,7 @@ from transformers import T5EncoderModel, T5Tokenizer
from ...image_processor import PixArtImageProcessor
from ...models import AutoencoderKL, Transformer2DModel
from ...schedulers import DPMSolverMultistepScheduler
from ...schedulers import KarrasDiffusionSchedulers
from ...utils import (
BACKENDS_MAPPING,
deprecate,
@@ -203,7 +203,7 @@ class PixArtSigmaPipeline(DiffusionPipeline):
text_encoder: T5EncoderModel,
vae: AutoencoderKL,
transformer: Transformer2DModel,
scheduler: DPMSolverMultistepScheduler,
scheduler: KarrasDiffusionSchedulers,
):
super().__init__()
@@ -214,7 +214,7 @@ class PixArtSigmaPipeline(DiffusionPipeline):
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.image_processor = PixArtImageProcessor(vae_scale_factor=self.vae_scale_factor)
# Copied from diffusers.pipelines.pixart_alpha.pipeline_pixart_alpha.PixArtAlphaPipeline.encode_prompt
# Copied from diffusers.pipelines.pixart_alpha.pipeline_pixart_alpha.PixArtAlphaPipeline.encode_prompt with 120->300
def encode_prompt(
self,
prompt: Union[str, List[str]],
@@ -227,7 +227,7 @@ class PixArtSigmaPipeline(DiffusionPipeline):
prompt_attention_mask: Optional[torch.Tensor] = None,
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
clean_caption: bool = False,
max_sequence_length: int = 120,
max_sequence_length: int = 300,
**kwargs,
):
r"""
@@ -254,7 +254,7 @@ class PixArtSigmaPipeline(DiffusionPipeline):
string.
clean_caption (`bool`, defaults to `False`):
If `True`, the function will preprocess and clean the provided caption before encoding.
max_sequence_length (`int`, defaults to 120): Maximum sequence length to use for the prompt.
max_sequence_length (`int`, defaults to 300): Maximum sequence length to use for the prompt.
"""
if "mask_feature" in kwargs:
@@ -707,7 +707,7 @@ class PixArtSigmaPipeline(DiffusionPipeline):
If set to `True`, the requested height and width are first mapped to the closest resolutions using
`ASPECT_RATIO_1024_BIN`. After the produced latents are decoded into images, they are resized back to
the requested resolution. Useful for generating non-square images.
max_sequence_length (`int` defaults to 120): Maximum sequence length to use with the `prompt`.
max_sequence_length (`int` defaults to 300): Maximum sequence length to use with the `prompt`.
Examples: