mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-29 07:22:12 +03:00
update
This commit is contained in:
@@ -436,7 +436,7 @@ class AnimateDiffControlNetPipeline(
|
||||
image_embeds = ip_adapter_image_embeds
|
||||
return image_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
def decode_latents(self, latents):
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
|
||||
@@ -663,7 +663,7 @@ class AnimateDiffControlNetPipeline(
|
||||
f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
def prepare_latents(
|
||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||
):
|
||||
|
||||
@@ -553,7 +553,7 @@ class AnimateDiffImgToVideoPipeline(
|
||||
image_embeds = ip_adapter_image_embeds
|
||||
return image_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
def decode_latents(self, latents):
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
|
||||
|
||||
@@ -425,7 +425,7 @@ class AnimateDiffPipelineIpex(
|
||||
|
||||
return image_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
def decode_latents(self, latents):
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
|
||||
@@ -520,7 +520,7 @@ class AnimateDiffPipelineIpex(
|
||||
f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
def prepare_latents(
|
||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||
):
|
||||
|
||||
@@ -427,7 +427,7 @@ class CogVideoXSTGPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.latte.pipeline_latte.LattePipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.latte.pipeline_latte.LattePipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -18,7 +18,7 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
UNet2DModel,
|
||||
)
|
||||
from diffusers.pipelines.unclip import UnCLIPTextProjModel
|
||||
from diffusers.pipelines.deprecated.unclip import UnCLIPTextProjModel
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
@@ -84,7 +84,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
|
||||
decoder_scheduler: UnCLIPScheduler
|
||||
super_res_scheduler: UnCLIPScheduler
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline.__init__
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline.__init__
|
||||
def __init__(
|
||||
self,
|
||||
decoder: UNet2DConditionModel,
|
||||
@@ -113,7 +113,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
|
||||
super_res_scheduler=super_res_scheduler,
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
@@ -125,7 +125,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
|
||||
latents = latents * scheduler.init_noise_sigma
|
||||
return latents
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline._encode_prompt
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline._encode_prompt
|
||||
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance):
|
||||
batch_size = len(prompt) if isinstance(prompt, list) else 1
|
||||
|
||||
@@ -189,7 +189,7 @@ class UnCLIPImageInterpolationPipeline(DiffusionPipeline):
|
||||
|
||||
return prompt_embeds, text_encoder_hidden_states, text_mask
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline._encode_image
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip_image_variation.UnCLIPImageVariationPipeline._encode_image
|
||||
def _encode_image(self, image, device, num_images_per_prompt, image_embeddings: Optional[torch.Tensor] = None):
|
||||
dtype = next(self.image_encoder.parameters()).dtype
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
UNet2DModel,
|
||||
)
|
||||
from diffusers.pipelines.unclip import UnCLIPTextProjModel
|
||||
from diffusers.pipelines.deprecated.unclip import UnCLIPTextProjModel
|
||||
from diffusers.utils import logging
|
||||
from diffusers.utils.torch_utils import randn_tensor
|
||||
|
||||
@@ -78,7 +78,7 @@ class UnCLIPTextInterpolationPipeline(DiffusionPipeline):
|
||||
decoder_scheduler: UnCLIPScheduler
|
||||
super_res_scheduler: UnCLIPScheduler
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.__init__
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.__init__
|
||||
def __init__(
|
||||
self,
|
||||
prior: PriorTransformer,
|
||||
@@ -107,7 +107,7 @@ class UnCLIPTextInterpolationPipeline(DiffusionPipeline):
|
||||
super_res_scheduler=super_res_scheduler,
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
@@ -119,7 +119,7 @@ class UnCLIPTextInterpolationPipeline(DiffusionPipeline):
|
||||
latents = latents * scheduler.init_noise_sigma
|
||||
return latents
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt
|
||||
def _encode_prompt(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -7,7 +7,7 @@ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
||||
|
||||
from diffusers import UnCLIPPipeline, UNet2DConditionModel, UNet2DModel
|
||||
from diffusers.models.transformers.prior_transformer import PriorTransformer
|
||||
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
|
||||
from diffusers.pipelines.deprecated.unclip.text_proj import UnCLIPTextProjModel
|
||||
from diffusers.schedulers.scheduling_unclip import UnCLIPScheduler
|
||||
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ from ..attention_processor import Attention
|
||||
from ..modeling_utils import ModelMixin
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.wuerstchen.modeling_wuerstchen_common.WuerstchenLayerNorm with WuerstchenLayerNorm -> SDCascadeLayerNorm
|
||||
# Copied from diffusers.pipelines.deprecated.wuerstchen.modeling_wuerstchen_common.WuerstchenLayerNorm with WuerstchenLayerNorm -> SDCascadeLayerNorm
|
||||
class SDCascadeLayerNorm(nn.LayerNorm):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@@ -635,7 +635,7 @@ class AnimateDiffSDXLPipeline(
|
||||
|
||||
return ip_adapter_image_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
def decode_latents(self, latents):
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
|
||||
@@ -738,7 +738,7 @@ class AnimateDiffSDXLPipeline(
|
||||
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
def prepare_latents(
|
||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||
):
|
||||
|
||||
@@ -458,7 +458,7 @@ class AnimateDiffSparseControlNetPipeline(
|
||||
|
||||
return ip_adapter_image_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
def decode_latents(self, latents):
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
|
||||
@@ -621,7 +621,7 @@ class AnimateDiffSparseControlNetPipeline(
|
||||
f"If image batch size is not 1, image batch size must be same as prompt batch size. image batch size: {image_batch_size}, prompt batch size: {prompt_batch_size}"
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
def prepare_latents(
|
||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||
):
|
||||
|
||||
@@ -373,7 +373,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.latte.pipeline_latte.LattePipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.latte.pipeline_latte.LattePipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -333,7 +333,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.latte.pipeline_latte.LattePipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.latte.pipeline_latte.LattePipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -479,7 +479,7 @@ class I2VGenXLPipeline(
|
||||
|
||||
return image_latents
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
def prepare_latents(
|
||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||
):
|
||||
|
||||
@@ -561,7 +561,7 @@ class LattePipeline(DiffusionPipeline):
|
||||
|
||||
return caption.strip()
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
def prepare_latents(
|
||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||
):
|
||||
|
||||
@@ -422,7 +422,7 @@ class PIAPipeline(
|
||||
|
||||
return image_embeds, uncond_image_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis/pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
def decode_latents(self, latents):
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
|
||||
@@ -562,7 +562,7 @@ class PIAPipeline(
|
||||
|
||||
return ip_adapter_image_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents
|
||||
def prepare_latents(
|
||||
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
||||
):
|
||||
|
||||
@@ -143,7 +143,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -127,7 +127,7 @@ class ShapEPipeline(DiffusionPipeline):
|
||||
shap_e_renderer=shap_e_renderer,
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -128,7 +128,7 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
|
||||
shap_e_renderer=shap_e_renderer,
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -72,7 +72,7 @@ EXAMPLE_DOC_STRING = """
|
||||
"""
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.ModelWrapper
|
||||
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.ModelWrapper
|
||||
class ModelWrapper:
|
||||
def __init__(self, model, alphas_cumprod):
|
||||
self.model = model
|
||||
@@ -187,7 +187,7 @@ class StableDiffusionXLKDiffusionPipeline(
|
||||
else:
|
||||
self.k_diffusion_model = CompVisDenoiser(model)
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.set_scheduler
|
||||
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion..pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.set_scheduler
|
||||
def set_scheduler(self, scheduler_type: str):
|
||||
library = importlib.import_module("k_diffusion")
|
||||
sampling = getattr(library, "sampling")
|
||||
|
||||
@@ -372,7 +372,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -490,7 +490,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -363,7 +363,7 @@ class TextToVideoSDPipeline(
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -368,7 +368,7 @@ class VideoToVideoSDPipeline(
|
||||
|
||||
return prompt_embeds, negative_prompt_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.decode_latents
|
||||
def decode_latents(self, latents):
|
||||
latents = 1 / self.vae.config.scaling_factor * latents
|
||||
|
||||
|
||||
@@ -464,7 +464,7 @@ class TextToVideoZeroPipeline(
|
||||
|
||||
return latents.clone().detach()
|
||||
|
||||
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -55,32 +55,32 @@ else:
|
||||
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_0
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_0
|
||||
def rearrange_0(tensor, f):
|
||||
F, C, H, W = tensor.size()
|
||||
tensor = torch.permute(torch.reshape(tensor, (F // f, f, C, H, W)), (0, 2, 1, 3, 4))
|
||||
return tensor
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_1
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_1
|
||||
def rearrange_1(tensor):
|
||||
B, C, F, H, W = tensor.size()
|
||||
return torch.reshape(torch.permute(tensor, (0, 2, 1, 3, 4)), (B * F, C, H, W))
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_3
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_3
|
||||
def rearrange_3(tensor, f):
|
||||
F, D, C = tensor.size()
|
||||
return torch.reshape(tensor, (F // f, f, D, C))
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_4
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.rearrange_4
|
||||
def rearrange_4(tensor):
|
||||
B, F, D, C = tensor.size()
|
||||
return torch.reshape(tensor, (B * F, D, C))
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor
|
||||
class CrossFrameAttnProcessor:
|
||||
"""
|
||||
Cross frame attention processor. Each frame attends the first frame.
|
||||
@@ -140,7 +140,7 @@ class CrossFrameAttnProcessor:
|
||||
return hidden_states
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor2_0
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.CrossFrameAttnProcessor2_0
|
||||
class CrossFrameAttnProcessor2_0:
|
||||
"""
|
||||
Cross frame attention processor with scaled_dot_product attention of Pytorch 2.0.
|
||||
@@ -230,7 +230,7 @@ class TextToVideoSDXLPipelineOutput(BaseOutput):
|
||||
images: Union[List[PIL.Image.Image], np.ndarray]
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.coords_grid
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.coords_grid
|
||||
def coords_grid(batch, ht, wd, device):
|
||||
# Adapted from https://github.com/princeton-vl/RAFT/blob/master/core/utils/utils.py
|
||||
coords = torch.meshgrid(torch.arange(ht, device=device), torch.arange(wd, device=device))
|
||||
@@ -238,7 +238,7 @@ def coords_grid(batch, ht, wd, device):
|
||||
return coords[None].repeat(batch, 1, 1, 1)
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.warp_single_latent
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.warp_single_latent
|
||||
def warp_single_latent(latent, reference_flow):
|
||||
"""
|
||||
Warp latent of a single frame with given flow
|
||||
@@ -266,7 +266,7 @@ def warp_single_latent(latent, reference_flow):
|
||||
return warped
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.create_motion_field
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.create_motion_field
|
||||
def create_motion_field(motion_field_strength_x, motion_field_strength_y, frame_ids, device, dtype):
|
||||
"""
|
||||
Create translation motion field
|
||||
@@ -290,7 +290,7 @@ def create_motion_field(motion_field_strength_x, motion_field_strength_y, frame_
|
||||
return reference_flow
|
||||
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.create_motion_field_and_warp_latents
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.create_motion_field_and_warp_latents
|
||||
def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_strength_y, frame_ids, latents):
|
||||
"""
|
||||
Creates translation motion and warps the latents accordingly
|
||||
@@ -832,7 +832,7 @@ class TextToVideoZeroSDXLPipeline(
|
||||
|
||||
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
||||
|
||||
# Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoZeroPipeline.forward_loop
|
||||
# Copied from diffusers.pipelines.deprecated.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoZeroPipeline.forward_loop
|
||||
def forward_loop(self, x_t0, t0, t1, generator):
|
||||
"""
|
||||
Perform DDPM forward process from time t0 to t1. This is the same as adding noise with corresponding variance.
|
||||
|
||||
@@ -114,7 +114,7 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
|
||||
super_res_scheduler=super_res_scheduler,
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -107,7 +107,7 @@ class WuerstchenDecoderPipeline(DiffusionPipeline):
|
||||
)
|
||||
self.register_to_config(latent_dim_scale=latent_dim_scale)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -126,7 +126,7 @@ class WuerstchenPriorPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin)
|
||||
latent_mean=latent_mean, latent_std=latent_std, resolution_multiple=resolution_multiple
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -123,7 +123,7 @@ class KandinskyPipeline(DiffusionPipeline):
|
||||
)
|
||||
self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -288,7 +288,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
|
||||
self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
|
||||
self._warn_has_been_called = False
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -285,7 +285,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
|
||||
|
||||
return KandinskyPriorPipelineOutput(image_embeds=image_emb, negative_image_embeds=zero_image_emb)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -103,7 +103,7 @@ class KandinskyV22Pipeline(DiffusionPipeline):
|
||||
)
|
||||
self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -145,7 +145,7 @@ class KandinskyV22ControlnetPipeline(DiffusionPipeline):
|
||||
)
|
||||
self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -275,7 +275,7 @@ class KandinskyV22InpaintPipeline(DiffusionPipeline):
|
||||
self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
|
||||
self._warn_has_been_called = False
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -244,7 +244,7 @@ class KandinskyV22PriorPipeline(DiffusionPipeline):
|
||||
|
||||
return KandinskyPriorPipelineOutput(image_embeds=image_emb, negative_image_embeds=zero_image_emb)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -452,7 +452,7 @@ class AnimateDiffPAGPipeline(
|
||||
extra_step_kwargs["generator"] = generator
|
||||
return extra_step_kwargs
|
||||
|
||||
# Copied from diffusers.pipelines.pia.pipeline_pia.PIAPipeline.check_inputs
|
||||
# Copied from diffusers.pipelines.deprecated.pia.pipeline_pia.PIAPipeline.check_inputs
|
||||
def check_inputs(
|
||||
self,
|
||||
prompt,
|
||||
|
||||
@@ -166,7 +166,7 @@ class StableUnCLIPPipeline(
|
||||
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
||||
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt with _encode_prompt->_encode_prior_prompt, tokenizer->prior_tokenizer, text_encoder->prior_text_encoder
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt with _encode_prompt->_encode_prior_prompt, tokenizer->prior_tokenizer, text_encoder->prior_text_encoder
|
||||
def _encode_prior_prompt(
|
||||
self,
|
||||
prompt,
|
||||
@@ -584,7 +584,7 @@ class StableUnCLIPPipeline(
|
||||
f"`noise_level` must be between 0 and {self.image_noising_scheduler.config.num_train_timesteps - 1}, inclusive."
|
||||
)
|
||||
|
||||
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
# Copied from diffusers.pipelines.deprecated.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
|
||||
def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
|
||||
if latents is None:
|
||||
latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
||||
|
||||
@@ -21,7 +21,7 @@ import torch
|
||||
from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer
|
||||
|
||||
from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel
|
||||
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
|
||||
from diffusers.pipelines.deprecated.unclip.text_proj import UnCLIPTextProjModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
load_numpy,
|
||||
|
||||
@@ -35,7 +35,7 @@ from diffusers import (
|
||||
UNet2DConditionModel,
|
||||
UNet2DModel,
|
||||
)
|
||||
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
|
||||
from diffusers.pipelines.deprecated.unclip.text_proj import UnCLIPTextProjModel
|
||||
from diffusers.utils.testing_utils import (
|
||||
enable_full_determinism,
|
||||
floats_tensor,
|
||||
|
||||
Reference in New Issue
Block a user