mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
* begin animatediff img2video and video2video * revert animatediff to original implementation * add img2video as pipeline * update * add vid2vid pipeline * update imports * update * remove copied from line for check_inputs * update * update examples * add multi-batch support * fix __init__.py files * move img2vid to community * update community readme and examples * fix * make fix-copies * add vid2vid batch params * apply suggestions from review Co-Authored-By: Dhruv Nair <dhruv.nair@gmail.com> * add test for animatediff vid2vid * torch.stack -> torch.cat Co-Authored-By: Dhruv Nair <dhruv.nair@gmail.com> * make style * docs for vid2vid * update * fix prepare_latents * fix docs * remove img2vid * update README to :main * remove slow test * refactor pipeline output * update docs * update docs * merge community readme from :main * final fix i promise * add support for url in animatediff example * update example * update callbacks to latest implementation * Update src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Update src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * fix merge * Apply suggestions from code review * remove callback and callback_steps as suggested in review * Update tests/pipelines/animatediff/test_animatediff_video2video.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * fix import error caused due to unet refactor in #6630 * fix numpy import error after tensor2vid refactor in #6626 * make fix-copies * fix numpy error * fix progress bar test --------- Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
130 lines
3.3 KiB
Python
130 lines
3.3 KiB
Python
# These are canonical sets of parameters for different types of pipelines.
|
|
# They are set on subclasses of `PipelineTesterMixin` as `params` and
|
|
# `batch_params`.
|
|
#
|
|
# If a pipeline's set of arguments has minor changes from one of the common sets
|
|
# of arguments, do not make modifications to the existing common sets of arguments.
|
|
# I.e. a text to image pipeline with non-configurable height and width arguments
|
|
# should set its attribute as `params = TEXT_TO_IMAGE_PARAMS - {'height', 'width'}`.
|
|
|
|
TEXT_TO_IMAGE_PARAMS = frozenset(
|
|
[
|
|
"prompt",
|
|
"height",
|
|
"width",
|
|
"guidance_scale",
|
|
"negative_prompt",
|
|
"prompt_embeds",
|
|
"negative_prompt_embeds",
|
|
"cross_attention_kwargs",
|
|
]
|
|
)
|
|
|
|
TEXT_TO_IMAGE_BATCH_PARAMS = frozenset(["prompt", "negative_prompt"])
|
|
|
|
TEXT_TO_IMAGE_IMAGE_PARAMS = frozenset([])
|
|
|
|
IMAGE_TO_IMAGE_IMAGE_PARAMS = frozenset(["image"])
|
|
|
|
IMAGE_VARIATION_PARAMS = frozenset(
|
|
[
|
|
"image",
|
|
"height",
|
|
"width",
|
|
"guidance_scale",
|
|
]
|
|
)
|
|
|
|
IMAGE_VARIATION_BATCH_PARAMS = frozenset(["image"])
|
|
|
|
TEXT_GUIDED_IMAGE_VARIATION_PARAMS = frozenset(
|
|
[
|
|
"prompt",
|
|
"image",
|
|
"height",
|
|
"width",
|
|
"guidance_scale",
|
|
"negative_prompt",
|
|
"prompt_embeds",
|
|
"negative_prompt_embeds",
|
|
]
|
|
)
|
|
|
|
TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS = frozenset(["prompt", "image", "negative_prompt"])
|
|
|
|
TEXT_GUIDED_IMAGE_INPAINTING_PARAMS = frozenset(
|
|
[
|
|
# Text guided image variation with an image mask
|
|
"prompt",
|
|
"image",
|
|
"mask_image",
|
|
"height",
|
|
"width",
|
|
"guidance_scale",
|
|
"negative_prompt",
|
|
"prompt_embeds",
|
|
"negative_prompt_embeds",
|
|
]
|
|
)
|
|
|
|
TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["prompt", "image", "mask_image", "negative_prompt"])
|
|
|
|
IMAGE_INPAINTING_PARAMS = frozenset(
|
|
[
|
|
# image variation with an image mask
|
|
"image",
|
|
"mask_image",
|
|
"height",
|
|
"width",
|
|
"guidance_scale",
|
|
]
|
|
)
|
|
|
|
IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["image", "mask_image"])
|
|
|
|
IMAGE_GUIDED_IMAGE_INPAINTING_PARAMS = frozenset(
|
|
[
|
|
"example_image",
|
|
"image",
|
|
"mask_image",
|
|
"height",
|
|
"width",
|
|
"guidance_scale",
|
|
]
|
|
)
|
|
|
|
IMAGE_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS = frozenset(["example_image", "image", "mask_image"])
|
|
|
|
CLASS_CONDITIONED_IMAGE_GENERATION_PARAMS = frozenset(["class_labels"])
|
|
|
|
CLASS_CONDITIONED_IMAGE_GENERATION_BATCH_PARAMS = frozenset(["class_labels"])
|
|
|
|
UNCONDITIONAL_IMAGE_GENERATION_PARAMS = frozenset(["batch_size"])
|
|
|
|
UNCONDITIONAL_IMAGE_GENERATION_BATCH_PARAMS = frozenset([])
|
|
|
|
UNCONDITIONAL_AUDIO_GENERATION_PARAMS = frozenset(["batch_size"])
|
|
|
|
UNCONDITIONAL_AUDIO_GENERATION_BATCH_PARAMS = frozenset([])
|
|
|
|
TEXT_TO_AUDIO_PARAMS = frozenset(
|
|
[
|
|
"prompt",
|
|
"audio_length_in_s",
|
|
"guidance_scale",
|
|
"negative_prompt",
|
|
"prompt_embeds",
|
|
"negative_prompt_embeds",
|
|
"cross_attention_kwargs",
|
|
]
|
|
)
|
|
|
|
TEXT_TO_AUDIO_BATCH_PARAMS = frozenset(["prompt", "negative_prompt"])
|
|
TOKENS_TO_AUDIO_GENERATION_PARAMS = frozenset(["input_tokens"])
|
|
|
|
TOKENS_TO_AUDIO_GENERATION_BATCH_PARAMS = frozenset(["input_tokens"])
|
|
|
|
TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS = frozenset(["prompt_embeds"])
|
|
|
|
VIDEO_TO_VIDEO_BATCH_PARAMS = frozenset(["prompt", "negative_prompt", "video"])
|