1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-29 07:22:12 +03:00

Add support for sample clipping for numerical stability and deprecate old kwarg

This commit is contained in:
Clarence Chen
2023-03-10 15:26:11 -08:00
parent 5228f55a14
commit e5f5b4af30
2 changed files with 25 additions and 11 deletions

View File

@@ -1156,8 +1156,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
# 7. Denoising loop where we obtain the cross-attention maps.
num_warmup_steps = len(timesteps) - num_inference_steps * self.inverse_scheduler.order
with self.progress_bar(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
with self.progress_bar(total=num_inference_steps - 1) as progress_bar:
for i, t in enumerate(timesteps[:-1]):
# expand the latents if we are doing classifier free guidance
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
latent_model_input = self.inverse_scheduler.scale_model_input(latent_model_input, t)

View File

@@ -96,15 +96,17 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
trained_betas (`np.ndarray`, optional):
option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
clip_sample (`bool`, default `True`):
option to clip predicted sample between -1 and 1 for numerical stability.
set_alpha_to_one (`bool`, default `True`):
option to clip predicted sample for numerical stability.
clip_sample_range (`float`, default `1.0`):
the maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
set_alpha_to_zero (`bool`, default `True`):
each diffusion step uses the value of alphas product at that step and at the previous one. For the final
step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
otherwise it uses the value of alpha at step 0.
step there is no previous alpha. When this option is `True` the previous alpha product is fixed to `0`,
otherwise it uses the value of alpha at step `num_train_timesteps - 1`.
steps_offset (`int`, default `0`):
an offset added to the inference steps. You can use a combination of `offset=1` and
`set_alpha_to_one=False`, to make the last step use step 0 for the previous alpha product, as done in
stable diffusion.
`set_alpha_to_zero=False`, to make the last step use step `num_train_timesteps - 1`
for the previous alpha product.
prediction_type (`str`, default `epsilon`, optional):
prediction type of the scheduler function, one of `epsilon` (predicting the noise of the diffusion
process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
@@ -125,7 +127,13 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
set_alpha_to_zero: bool = True,
steps_offset: int = 0,
prediction_type: str = "epsilon",
clip_sample_range: float = 1.0,
**kwargs
):
if kwargs.get("set_alpha_to_one", None) is not None:
deprecation_message = "The `set_alpha_to_one` argument is deprecated. Please use `set_alpha_to_zero` instead."
deprecate("set_alpha_to_one", "1.0.0", deprecation_message, standard_warn=False)
set_alpha_to_zero = kwargs["set_alpha_to_one"]
if trained_betas is not None:
self.betas = torch.tensor(trained_betas, dtype=torch.float32)
elif beta_schedule == "linear":
@@ -147,7 +155,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
# At every step in inverted ddim, we are looking into the next alphas_cumprod
# For the final step, there is no next alphas_cumprod, and the index is out of bounds
# `set_alpha_to_zero` decides whether we set this parameter simply to zero
# in this case, self.step() just normalizes output by self.config.prediction_type
# in this case, self.step() just output the predicted noise
# or whether we use the final alpha of the "non-previous" one.
self.final_alpha_cumprod = torch.tensor(0.0) if set_alpha_to_zero else self.alphas_cumprod[-1]
@@ -237,10 +245,16 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
" `v_prediction`"
)
# 4. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
# 4. Clip or threshold "predicted x_0"
if self.config.clip_sample:
pred_original_sample = pred_original_sample.clamp(
-self.config.clip_sample_range, self.config.clip_sample_range
)
# 5. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
pred_sample_direction = (1 - alpha_prod_t_prev) ** (0.5) * pred_epsilon
# 5. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
# 6. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
if not return_dict: