diff --git a/src/diffusers/models/transformers/transformer_longcat_image.py b/src/diffusers/models/transformers/transformer_longcat_image.py index 2696f5e787..74685607a8 100644 --- a/src/diffusers/models/transformers/transformer_longcat_image.py +++ b/src/diffusers/models/transformers/transformer_longcat_image.py @@ -406,6 +406,7 @@ class LongCatImageTransformer2DModel( """ _supports_gradient_checkpointing = True + _repeated_blocks = ["LongCatImageTransformerBlock", "LongCatImageSingleTransformerBlock"] @register_to_config def __init__( diff --git a/src/diffusers/schedulers/scheduling_consistency_decoder.py b/src/diffusers/schedulers/scheduling_consistency_decoder.py index f4bd0cc2d7..23c0e138c4 100644 --- a/src/diffusers/schedulers/scheduling_consistency_decoder.py +++ b/src/diffusers/schedulers/scheduling_consistency_decoder.py @@ -14,7 +14,7 @@ from .scheduling_utils import SchedulerMixin def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -28,8 +28,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py index 74ade1d8bb..92c3e20013 100644 --- a/src/diffusers/schedulers/scheduling_ddim.py +++ b/src/diffusers/schedulers/scheduling_ddim.py @@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -65,8 +65,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_ddim_cogvideox.py b/src/diffusers/schedulers/scheduling_ddim_cogvideox.py index 92f7a5ab3a..1a77a65278 100644 --- a/src/diffusers/schedulers/scheduling_ddim_cogvideox.py +++ b/src/diffusers/schedulers/scheduling_ddim_cogvideox.py @@ -51,7 +51,7 @@ class DDIMSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -65,8 +65,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: @@ -100,14 +100,13 @@ def betas_for_alpha_bar( return torch.tensor(betas, dtype=torch.float32) -def rescale_zero_terminal_snr(alphas_cumprod): +def rescale_zero_terminal_snr(alphas_cumprod: torch.Tensor) -> torch.Tensor: """ - Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) - + Rescales betas to have zero terminal SNR Based on (Algorithm 1)[https://huggingface.co/papers/2305.08891] Args: - betas (`torch.Tensor`): - the betas that the scheduler is being initialized with. + alphas_cumprod (`torch.Tensor`): + The alphas cumulative products that the scheduler is being initialized with. Returns: `torch.Tensor`: rescaled betas with zero terminal SNR @@ -142,11 +141,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): Args: num_train_timesteps (`int`, defaults to 1000): The number of diffusion steps to train the model. - beta_start (`float`, defaults to 0.0001): + beta_start (`float`, defaults to 0.00085): The starting `beta` value of inference. - beta_end (`float`, defaults to 0.02): + beta_end (`float`, defaults to 0.0120): The final `beta` value. - beta_schedule (`str`, defaults to `"linear"`): + beta_schedule (`str`, defaults to `"scaled_linear"`): The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from `linear`, `scaled_linear`, or `squaredcos_cap_v2`. trained_betas (`np.ndarray`, *optional*): @@ -179,6 +178,8 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and dark samples instead of limiting it to samples with medium brightness. Loosely related to [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). + snr_shift_scale (`float`, defaults to 3.0): + Shift scale for SNR. """ _compatibles = [e.name for e in KarrasDiffusionSchedulers] @@ -190,15 +191,15 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): num_train_timesteps: int = 1000, beta_start: float = 0.00085, beta_end: float = 0.0120, - beta_schedule: str = "scaled_linear", + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "scaled_linear", trained_betas: Optional[Union[np.ndarray, List[float]]] = None, clip_sample: bool = True, set_alpha_to_one: bool = True, steps_offset: int = 0, - prediction_type: str = "epsilon", + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", clip_sample_range: float = 1.0, sample_max_value: float = 1.0, - timestep_spacing: str = "leading", + timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading", rescale_betas_zero_snr: bool = False, snr_shift_scale: float = 3.0, ): @@ -208,7 +209,15 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) elif beta_schedule == "scaled_linear": # this schedule is very specific to the latent diffusion model. - self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float64) ** 2 + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float64, + ) + ** 2 + ) elif beta_schedule == "squaredcos_cap_v2": # Glide cosine schedule self.betas = betas_for_alpha_bar(num_train_timesteps) @@ -238,7 +247,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): self.num_inference_steps = None self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) - def _get_variance(self, timestep, prev_timestep): + def _get_variance(self, timestep: int, prev_timestep: int) -> torch.Tensor: alpha_prod_t = self.alphas_cumprod[timestep] alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod beta_prod_t = 1 - alpha_prod_t @@ -265,7 +274,11 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): """ return sample - def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None): + def set_timesteps( + self, + num_inference_steps: int, + device: Optional[Union[str, torch.device]] = None, + ) -> None: """ Sets the discrete timesteps used for the diffusion chain (to be run before inference). @@ -317,7 +330,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): sample: torch.Tensor, eta: float = 0.0, use_clipped_model_output: bool = False, - generator=None, + generator: Optional[torch.Generator] = None, variance_noise: Optional[torch.Tensor] = None, return_dict: bool = True, ) -> Union[DDIMSchedulerOutput, Tuple]: @@ -328,7 +341,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): Args: model_output (`torch.Tensor`): The direct output from learned diffusion model. - timestep (`float`): + timestep (`int`): The current discrete timestep in the diffusion chain. sample (`torch.Tensor`): A current instance of a sample created by the diffusion process. @@ -487,5 +500,5 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample return velocity - def __len__(self): + def __len__(self) -> int: return self.config.num_train_timesteps diff --git a/src/diffusers/schedulers/scheduling_ddim_flax.py b/src/diffusers/schedulers/scheduling_ddim_flax.py index 802d8f7977..476f741bcd 100644 --- a/src/diffusers/schedulers/scheduling_ddim_flax.py +++ b/src/diffusers/schedulers/scheduling_ddim_flax.py @@ -22,6 +22,7 @@ import flax import jax.numpy as jnp from ..configuration_utils import ConfigMixin, register_to_config +from ..utils import logging from .scheduling_utils_flax import ( CommonSchedulerState, FlaxKarrasDiffusionSchedulers, @@ -32,6 +33,9 @@ from .scheduling_utils_flax import ( ) +logger = logging.get_logger(__name__) + + @flax.struct.dataclass class DDIMSchedulerState: common: CommonSchedulerState @@ -125,6 +129,10 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin): prediction_type: str = "epsilon", dtype: jnp.dtype = jnp.float32, ): + logger.warning( + "Flax classes are deprecated and will be removed in Diffusers v1.0.0. We " + "recommend migrating to PyTorch classes or pinning your version of Diffusers." + ) self.dtype = dtype def create_state(self, common: Optional[CommonSchedulerState] = None) -> DDIMSchedulerState: @@ -152,7 +160,10 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin): ) def scale_model_input( - self, state: DDIMSchedulerState, sample: jnp.ndarray, timestep: Optional[int] = None + self, + state: DDIMSchedulerState, + sample: jnp.ndarray, + timestep: Optional[int] = None, ) -> jnp.ndarray: """ Args: @@ -190,7 +201,9 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin): def _get_variance(self, state: DDIMSchedulerState, timestep, prev_timestep): alpha_prod_t = state.common.alphas_cumprod[timestep] alpha_prod_t_prev = jnp.where( - prev_timestep >= 0, state.common.alphas_cumprod[prev_timestep], state.final_alpha_cumprod + prev_timestep >= 0, + state.common.alphas_cumprod[prev_timestep], + state.final_alpha_cumprod, ) beta_prod_t = 1 - alpha_prod_t beta_prod_t_prev = 1 - alpha_prod_t_prev diff --git a/src/diffusers/schedulers/scheduling_ddim_inverse.py b/src/diffusers/schedulers/scheduling_ddim_inverse.py index e76ad9aa6c..a3c9ed1f62 100644 --- a/src/diffusers/schedulers/scheduling_ddim_inverse.py +++ b/src/diffusers/schedulers/scheduling_ddim_inverse.py @@ -49,7 +49,7 @@ class DDIMSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -63,8 +63,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: @@ -99,7 +99,7 @@ def betas_for_alpha_bar( # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr -def rescale_zero_terminal_snr(betas): +def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor: """ Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) @@ -187,14 +187,14 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin): num_train_timesteps: int = 1000, beta_start: float = 0.0001, beta_end: float = 0.02, - beta_schedule: str = "linear", + beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear", trained_betas: Optional[Union[np.ndarray, List[float]]] = None, clip_sample: bool = True, set_alpha_to_one: bool = True, steps_offset: int = 0, - prediction_type: str = "epsilon", + prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", clip_sample_range: float = 1.0, - timestep_spacing: str = "leading", + timestep_spacing: Literal["leading", "trailing"] = "leading", rescale_betas_zero_snr: bool = False, **kwargs, ): @@ -210,7 +210,15 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin): self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) elif beta_schedule == "scaled_linear": # this schedule is very specific to the latent diffusion model. - self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) elif beta_schedule == "squaredcos_cap_v2": # Glide cosine schedule self.betas = betas_for_alpha_bar(num_train_timesteps) @@ -256,7 +264,11 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin): """ return sample - def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None): + def set_timesteps( + self, + num_inference_steps: int, + device: Optional[Union[str, torch.device]] = None, + ) -> None: """ Sets the discrete timesteps used for the diffusion chain (to be run before inference). @@ -308,20 +320,10 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin): Args: model_output (`torch.Tensor`): The direct output from learned diffusion model. - timestep (`float`): + timestep (`int`): The current discrete timestep in the diffusion chain. sample (`torch.Tensor`): A current instance of a sample created by the diffusion process. - eta (`float`): - The weight of noise for added noise in diffusion step. - use_clipped_model_output (`bool`, defaults to `False`): - If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary - because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no - clipping has happened, "corrected" `model_output` would coincide with the one provided as input and - `use_clipped_model_output` has no effect. - variance_noise (`torch.Tensor`): - Alternative to generating noise with `generator` by directly providing the noise for the variance - itself. Useful for methods such as [`CycleDiffusion`]. return_dict (`bool`, *optional*, defaults to `True`): Whether or not to return a [`~schedulers.scheduling_ddim_inverse.DDIMInverseSchedulerOutput`] or `tuple`. @@ -335,7 +337,8 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin): # 1. get previous step value (=t+1) prev_timestep = timestep timestep = min( - timestep - self.config.num_train_timesteps // self.num_inference_steps, self.config.num_train_timesteps - 1 + timestep - self.config.num_train_timesteps // self.num_inference_steps, + self.config.num_train_timesteps - 1, ) # 2. compute alphas, betas @@ -378,5 +381,5 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin): return (prev_sample, pred_original_sample) return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample) - def __len__(self): + def __len__(self) -> int: return self.config.num_train_timesteps diff --git a/src/diffusers/schedulers/scheduling_ddim_parallel.py b/src/diffusers/schedulers/scheduling_ddim_parallel.py index 09f55ee4c2..d5660471b9 100644 --- a/src/diffusers/schedulers/scheduling_ddim_parallel.py +++ b/src/diffusers/schedulers/scheduling_ddim_parallel.py @@ -51,7 +51,7 @@ class DDIMParallelSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -65,8 +65,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index d0596bb918..e871e7afd4 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -48,7 +48,7 @@ class DDPMSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -62,8 +62,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: @@ -192,7 +192,12 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear", trained_betas: Optional[Union[np.ndarray, List[float]]] = None, variance_type: Literal[ - "fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range" + "fixed_small", + "fixed_small_log", + "fixed_large", + "fixed_large_log", + "learned", + "learned_range", ] = "fixed_small", clip_sample: bool = True, prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", @@ -210,7 +215,15 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) elif beta_schedule == "scaled_linear": # this schedule is very specific to the latent diffusion model. - self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) elif beta_schedule == "squaredcos_cap_v2": # Glide cosine schedule self.betas = betas_for_alpha_bar(num_train_timesteps) @@ -337,7 +350,14 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): t: int, predicted_variance: Optional[torch.Tensor] = None, variance_type: Optional[ - Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"] + Literal[ + "fixed_small", + "fixed_small_log", + "fixed_large", + "fixed_large_log", + "learned", + "learned_range", + ] ] = None, ) -> torch.Tensor: """ @@ -472,7 +492,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): prev_t = self.previous_timestep(t) - if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [ + "learned", + "learned_range", + ]: model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1) else: predicted_variance = None @@ -521,7 +544,10 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): if t > 0: device = model_output.device variance_noise = randn_tensor( - model_output.shape, generator=generator, device=device, dtype=model_output.dtype + model_output.shape, + generator=generator, + device=device, + dtype=model_output.dtype, ) if self.variance_type == "fixed_small_log": variance = self._get_variance(t, predicted_variance=predicted_variance) * variance_noise diff --git a/src/diffusers/schedulers/scheduling_ddpm_parallel.py b/src/diffusers/schedulers/scheduling_ddpm_parallel.py index ee7ab66be4..dd3d05ad9b 100644 --- a/src/diffusers/schedulers/scheduling_ddpm_parallel.py +++ b/src/diffusers/schedulers/scheduling_ddpm_parallel.py @@ -50,7 +50,7 @@ class DDPMParallelSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -64,8 +64,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: @@ -202,7 +202,12 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear", trained_betas: Optional[Union[np.ndarray, List[float]]] = None, variance_type: Literal[ - "fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range" + "fixed_small", + "fixed_small_log", + "fixed_large", + "fixed_large_log", + "learned", + "learned_range", ] = "fixed_small", clip_sample: bool = True, prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon", @@ -220,7 +225,15 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) elif beta_schedule == "scaled_linear": # this schedule is very specific to the latent diffusion model. - self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + self.betas = ( + torch.linspace( + beta_start**0.5, + beta_end**0.5, + num_train_timesteps, + dtype=torch.float32, + ) + ** 2 + ) elif beta_schedule == "squaredcos_cap_v2": # Glide cosine schedule self.betas = betas_for_alpha_bar(num_train_timesteps) @@ -350,7 +363,14 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): t: int, predicted_variance: Optional[torch.Tensor] = None, variance_type: Optional[ - Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"] + Literal[ + "fixed_small", + "fixed_small_log", + "fixed_large", + "fixed_large_log", + "learned", + "learned_range", + ] ] = None, ) -> torch.Tensor: """ diff --git a/src/diffusers/schedulers/scheduling_deis_multistep.py b/src/diffusers/schedulers/scheduling_deis_multistep.py index ebc3a33b27..7c2dfd8e50 100644 --- a/src/diffusers/schedulers/scheduling_deis_multistep.py +++ b/src/diffusers/schedulers/scheduling_deis_multistep.py @@ -34,7 +34,7 @@ if is_scipy_available(): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -48,8 +48,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_dpm_cogvideox.py b/src/diffusers/schedulers/scheduling_dpm_cogvideox.py index 66fb39c0bc..3e50ebbfe0 100644 --- a/src/diffusers/schedulers/scheduling_dpm_cogvideox.py +++ b/src/diffusers/schedulers/scheduling_dpm_cogvideox.py @@ -52,7 +52,7 @@ class DDIMSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -66,8 +66,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py index 990129f584..07cb64f32b 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py @@ -34,7 +34,7 @@ if is_scipy_available(): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -48,8 +48,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py index a9c4fe57b6..2da90d287c 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py @@ -34,7 +34,7 @@ if is_scipy_available(): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -48,8 +48,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_sde.py b/src/diffusers/schedulers/scheduling_dpmsolver_sde.py index 5f9ce1393d..6f905a623d 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_sde.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_sde.py @@ -117,7 +117,7 @@ class BrownianTreeNoiseSampler: def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -131,8 +131,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py index e92f880e5b..e9bf815aba 100644 --- a/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py +++ b/src/diffusers/schedulers/scheduling_dpmsolver_singlestep.py @@ -36,7 +36,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -50,8 +50,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py index 0258ea7777..11fec60c9c 100644 --- a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py @@ -51,7 +51,7 @@ class EulerAncestralDiscreteSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -65,8 +65,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py index 4238c976e4..8b141325fb 100644 --- a/src/diffusers/schedulers/scheduling_euler_discrete.py +++ b/src/diffusers/schedulers/scheduling_euler_discrete.py @@ -54,7 +54,7 @@ class EulerDiscreteSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -68,8 +68,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_heun_discrete.py b/src/diffusers/schedulers/scheduling_heun_discrete.py index 011f97ba5c..0c5e28ad06 100644 --- a/src/diffusers/schedulers/scheduling_heun_discrete.py +++ b/src/diffusers/schedulers/scheduling_heun_discrete.py @@ -51,7 +51,7 @@ class HeunDiscreteSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -65,8 +65,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py index 37849e28b2..ee49ae67b9 100644 --- a/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +++ b/src/diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py @@ -52,7 +52,7 @@ class KDPM2AncestralDiscreteSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -66,8 +66,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py b/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py index 1c2791837c..6effb3699b 100644 --- a/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py +++ b/src/diffusers/schedulers/scheduling_k_dpm_2_discrete.py @@ -51,7 +51,7 @@ class KDPM2DiscreteSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -65,8 +65,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_lcm.py b/src/diffusers/schedulers/scheduling_lcm.py index 66dedd5a6e..e32d6d0e74 100644 --- a/src/diffusers/schedulers/scheduling_lcm.py +++ b/src/diffusers/schedulers/scheduling_lcm.py @@ -53,7 +53,7 @@ class LCMSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -67,8 +67,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_lms_discrete.py b/src/diffusers/schedulers/scheduling_lms_discrete.py index 9fc9b1e64b..a1f9d27fd9 100644 --- a/src/diffusers/schedulers/scheduling_lms_discrete.py +++ b/src/diffusers/schedulers/scheduling_lms_discrete.py @@ -49,7 +49,7 @@ class LMSDiscreteSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -63,8 +63,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_pndm.py b/src/diffusers/schedulers/scheduling_pndm.py index e95a374457..0820f5baa8 100644 --- a/src/diffusers/schedulers/scheduling_pndm.py +++ b/src/diffusers/schedulers/scheduling_pndm.py @@ -28,7 +28,7 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -42,8 +42,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_repaint.py b/src/diffusers/schedulers/scheduling_repaint.py index fcebe7e21c..bec4a1bdf6 100644 --- a/src/diffusers/schedulers/scheduling_repaint.py +++ b/src/diffusers/schedulers/scheduling_repaint.py @@ -47,7 +47,7 @@ class RePaintSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -61,8 +61,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_sasolver.py b/src/diffusers/schedulers/scheduling_sasolver.py index 7c679a255c..565fae1c0d 100644 --- a/src/diffusers/schedulers/scheduling_sasolver.py +++ b/src/diffusers/schedulers/scheduling_sasolver.py @@ -35,7 +35,7 @@ if is_scipy_available(): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -49,8 +49,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_tcd.py b/src/diffusers/schedulers/scheduling_tcd.py index 7a385f6291..71079a88b6 100644 --- a/src/diffusers/schedulers/scheduling_tcd.py +++ b/src/diffusers/schedulers/scheduling_tcd.py @@ -52,7 +52,7 @@ class TCDSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -66,8 +66,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_unclip.py b/src/diffusers/schedulers/scheduling_unclip.py index bdc4feb0b1..14b09277da 100644 --- a/src/diffusers/schedulers/scheduling_unclip.py +++ b/src/diffusers/schedulers/scheduling_unclip.py @@ -48,7 +48,7 @@ class UnCLIPSchedulerOutput(BaseOutput): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -62,8 +62,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: diff --git a/src/diffusers/schedulers/scheduling_unipc_multistep.py b/src/diffusers/schedulers/scheduling_unipc_multistep.py index 0536e8d1ed..d8e24d1964 100644 --- a/src/diffusers/schedulers/scheduling_unipc_multistep.py +++ b/src/diffusers/schedulers/scheduling_unipc_multistep.py @@ -34,7 +34,7 @@ if is_scipy_available(): def betas_for_alpha_bar( num_diffusion_timesteps: int, max_beta: float = 0.999, - alpha_transform_type: Literal["cosine", "exp"] = "cosine", + alpha_transform_type: Literal["cosine", "exp", "laplace"] = "cosine", ) -> torch.Tensor: """ Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of @@ -48,8 +48,8 @@ def betas_for_alpha_bar( The number of betas to produce. max_beta (`float`, defaults to `0.999`): The maximum beta to use; use values lower than 1 to avoid numerical instability. - alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`): - The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`. + alpha_transform_type (`str`, defaults to `"cosine"`): + The type of noise schedule for `alpha_bar`. Choose from `cosine`, `exp`, or `laplace`. Returns: `torch.Tensor`: @@ -226,6 +226,7 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): time_shift_type: Literal["exponential"] = "exponential", sigma_min: Optional[float] = None, sigma_max: Optional[float] = None, + shift_terminal: Optional[float] = None, ) -> None: if self.config.use_beta_sigmas and not is_scipy_available(): raise ImportError("Make sure to install scipy if you want to use beta sigmas.") @@ -245,6 +246,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): self.betas = betas_for_alpha_bar(num_train_timesteps) else: raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + if shift_terminal is not None and not use_flow_sigmas: + raise ValueError("`shift_terminal` is only supported when `use_flow_sigmas=True`.") if rescale_betas_zero_snr: self.betas = rescale_zero_terminal_snr(self.betas) @@ -313,8 +316,12 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): self._begin_index = begin_index def set_timesteps( - self, num_inference_steps: int, device: Optional[Union[str, torch.device]] = None, mu: Optional[float] = None - ) -> None: + self, + num_inference_steps: Optional[int] = None, + device: Union[str, torch.device] = None, + sigmas: Optional[List[float]] = None, + mu: Optional[float] = None, + ): """ Sets the discrete timesteps used for the diffusion chain (to be run before inference). @@ -323,13 +330,24 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): The number of diffusion steps used when generating samples with a pre-trained model. device (`str` or `torch.device`, *optional*): The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + sigmas (`List[float]`, *optional*): + Custom values for sigmas to be used for each diffusion step. If `None`, the sigmas are computed + automatically. mu (`float`, *optional*): Optional mu parameter for dynamic shifting when using exponential time shift type. """ + if self.config.use_dynamic_shifting and mu is None: + raise ValueError("`mu` must be passed when `use_dynamic_shifting` is set to be `True`") + + if sigmas is not None: + if not self.config.use_flow_sigmas: + raise ValueError( + "Passing `sigmas` is only supported when `use_flow_sigmas=True`. " + "Please set `use_flow_sigmas=True` during scheduler initialization." + ) + num_inference_steps = len(sigmas) + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891 - if mu is not None: - assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential" - self.config.flow_shift = np.exp(mu) if self.config.timestep_spacing == "linspace": timesteps = ( np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1) @@ -354,8 +372,9 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." ) - sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) if self.config.use_karras_sigmas: + if sigmas is None: + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) log_sigmas = np.log(sigmas) sigmas = np.flip(sigmas).copy() sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) @@ -375,6 +394,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): ) sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) elif self.config.use_exponential_sigmas: + if sigmas is None: + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) log_sigmas = np.log(sigmas) sigmas = np.flip(sigmas).copy() sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps) @@ -389,6 +410,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): ) sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) elif self.config.use_beta_sigmas: + if sigmas is None: + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) log_sigmas = np.log(sigmas) sigmas = np.flip(sigmas).copy() sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps) @@ -403,9 +426,18 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): ) sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) elif self.config.use_flow_sigmas: - alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1) - sigmas = 1.0 - alphas - sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy() + if sigmas is None: + sigmas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)[:-1] + if self.config.use_dynamic_shifting: + sigmas = self.time_shift(mu, 1.0, sigmas) + else: + sigmas = self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas) + if self.config.shift_terminal: + sigmas = self.stretch_shift_to_terminal(sigmas) + eps = 1e-6 + if np.fabs(sigmas[0] - 1) < eps: + # to avoid inf torch.log(alpha_si) in multistep_uni_p_bh_update during first/second update + sigmas[0] -= eps timesteps = (sigmas * self.config.num_train_timesteps).copy() if self.config.final_sigmas_type == "sigma_min": sigma_last = sigmas[-1] @@ -417,6 +449,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): ) sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) else: + if sigmas is None: + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) if self.config.final_sigmas_type == "sigma_min": sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 @@ -446,6 +480,43 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): self._begin_index = None self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.time_shift + def time_shift(self, mu: float, sigma: float, t: torch.Tensor): + if self.config.time_shift_type == "exponential": + return self._time_shift_exponential(mu, sigma, t) + elif self.config.time_shift_type == "linear": + return self._time_shift_linear(mu, sigma, t) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler.stretch_shift_to_terminal + def stretch_shift_to_terminal(self, t: torch.Tensor) -> torch.Tensor: + r""" + Stretches and shifts the timestep schedule to ensure it terminates at the configured `shift_terminal` config + value. + + Reference: + https://github.com/Lightricks/LTX-Video/blob/a01a171f8fe3d99dce2728d60a73fecf4d4238ae/ltx_video/schedulers/rf.py#L51 + + Args: + t (`torch.Tensor`): + A tensor of timesteps to be stretched and shifted. + + Returns: + `torch.Tensor`: + A tensor of adjusted timesteps such that the final value equals `self.config.shift_terminal`. + """ + one_minus_z = 1 - t + scale_factor = one_minus_z[-1] / (1 - self.config.shift_terminal) + stretched_t = 1 - (one_minus_z / scale_factor) + return stretched_t + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_exponential + def _time_shift_exponential(self, mu, sigma, t): + return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma) + + # Copied from diffusers.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler._time_shift_linear + def _time_shift_linear(self, mu, sigma, t): + return mu / (mu + (1 / t - 1) ** sigma) + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: """