From b96c6ce1939a2c50ddc5c8a83e92f35635377785 Mon Sep 17 00:00:00 2001 From: patil-suraj Date: Mon, 13 Jun 2022 15:06:28 +0200 Subject: [PATCH 1/4] remove trained_betas from ddim and add in ddpm --- src/diffusers/schedulers/scheduling_ddim.py | 7 +------ src/diffusers/schedulers/scheduling_ddpm.py | 7 ++++++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py index 2c1875e647..4311db0461 100644 --- a/src/diffusers/schedulers/scheduling_ddim.py +++ b/src/diffusers/schedulers/scheduling_ddim.py @@ -26,8 +26,6 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): beta_start=0.0001, beta_end=0.02, beta_schedule="linear", - trained_betas=None, - timestep_values=None, clip_predicted_image=True, tensor_format="np", ): @@ -39,12 +37,9 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): beta_schedule=beta_schedule, ) self.timesteps = int(timesteps) - self.timestep_values = timestep_values # save the fixed timestep values for BDDM self.clip_image = clip_predicted_image - if trained_betas is not None: - self.betas = np.asarray(trained_betas) - elif beta_schedule == "linear": + if beta_schedule == "linear": self.betas = linear_beta_schedule(timesteps, beta_start=beta_start, beta_end=beta_end) elif beta_schedule == "squaredcos_cap_v2": # GLIDE cosine schedule diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index 22c5da63cc..97efe37634 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -26,6 +26,8 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): beta_start=0.0001, beta_end=0.02, beta_schedule="linear", + trained_betas=None, + timestep_values=None, variance_type="fixed_small", clip_predicted_image=True, tensor_format="np", @@ -40,10 +42,13 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): clip_predicted_image=clip_predicted_image, ) self.timesteps = int(timesteps) + self.timestep_values = timestep_values # save the fixed timestep values for BDDM self.clip_image = clip_predicted_image self.variance_type = variance_type - if beta_schedule == "linear": + if trained_betas is not None: + self.betas = np.asarray(trained_betas) + elif beta_schedule == "linear": self.betas = linear_beta_schedule(timesteps, beta_start=beta_start, beta_end=beta_end) elif beta_schedule == "squaredcos_cap_v2": # GLIDE cosine schedule From cdf58a4ec6d00d5ac05de0293c9cc51a45b12218 Mon Sep 17 00:00:00 2001 From: patil-suraj Date: Mon, 13 Jun 2022 15:40:48 +0200 Subject: [PATCH 2/4] fix BDDMPipeline --- src/diffusers/pipelines/pipeline_bddm.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/pipeline_bddm.py b/src/diffusers/pipelines/pipeline_bddm.py index 9838fc6e52..4c42087b47 100644 --- a/src/diffusers/pipelines/pipeline_bddm.py +++ b/src/diffusers/pipelines/pipeline_bddm.py @@ -269,13 +269,14 @@ class BDDMPipeline(DiffusionPipeline): self.register_modules(diffwave=diffwave, noise_scheduler=noise_scheduler) @torch.no_grad() - def __call__(self, mel_spectrogram, generator): + def __call__(self, mel_spectrogram, generator, torch_device=None): if torch_device is None: torch_device = "cuda" if torch.cuda.is_available() else "cpu" self.diffwave.to(torch_device) - - audio_length = mel_spectrogram.size(-1) * self.config.hop_len + + mel_spectrogram = mel_spectrogram.to(torch_device) + audio_length = mel_spectrogram.size(-1) * 256 audio_size = (1, 1, audio_length) # Sample gaussian noise to begin loop @@ -285,9 +286,8 @@ class BDDMPipeline(DiffusionPipeline): num_prediction_steps = len(self.noise_scheduler) for t in tqdm.tqdm(reversed(range(num_prediction_steps)), total=num_prediction_steps): # 1. predict noise residual - with torch.no_grad(): - t = (torch.tensor(timestep_values[t]) * torch.ones((1, 1))).to(torch_device) - residual = self.diffwave(audio, mel_spectrogram, t) + ts = (torch.tensor(timestep_values[t]) * torch.ones((1, 1))).to(torch_device) + residual = self.diffwave((audio, mel_spectrogram, ts)) # 2. predict previous mean of audio x_t-1 pred_prev_audio = self.noise_scheduler.step(residual, audio, t) From 29d9f02f83e3b22057f82cb1d446d7d44b5f00d7 Mon Sep 17 00:00:00 2001 From: patil-suraj Date: Mon, 13 Jun 2022 15:52:31 +0200 Subject: [PATCH 3/4] BDDMPipeline -> BDDM --- src/diffusers/__init__.py | 2 +- src/diffusers/pipelines/__init__.py | 2 +- src/diffusers/pipelines/pipeline_bddm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py index edf2f1c5ec..f93cd2943f 100644 --- a/src/diffusers/__init__.py +++ b/src/diffusers/__init__.py @@ -9,6 +9,6 @@ from .models.unet import UNetModel from .models.unet_glide import GLIDESuperResUNetModel, GLIDETextToImageUNetModel from .models.unet_ldm import UNetLDMModel from .pipeline_utils import DiffusionPipeline -from .pipelines import DDIM, DDPM, GLIDE, LatentDiffusion, BDDMPipeline +from .pipelines import DDIM, DDPM, GLIDE, LatentDiffusion, BDDM from .schedulers import DDIMScheduler, DDPMScheduler, SchedulerMixin from .schedulers.classifier_free_guidance import ClassifierFreeGuidanceScheduler diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py index 288a6878f3..ad42aead20 100644 --- a/src/diffusers/pipelines/__init__.py +++ b/src/diffusers/pipelines/__init__.py @@ -2,4 +2,4 @@ from .pipeline_ddim import DDIM from .pipeline_ddpm import DDPM from .pipeline_glide import GLIDE from .pipeline_latent_diffusion import LatentDiffusion -from .pipeline_bddm import BDDMPipeline +from .pipeline_bddm import BDDM diff --git a/src/diffusers/pipelines/pipeline_bddm.py b/src/diffusers/pipelines/pipeline_bddm.py index 4c42087b47..224cd128c2 100644 --- a/src/diffusers/pipelines/pipeline_bddm.py +++ b/src/diffusers/pipelines/pipeline_bddm.py @@ -262,7 +262,7 @@ class DiffWave(ModelMixin, ConfigMixin): return self.final_conv(x) -class BDDMPipeline(DiffusionPipeline): +class BDDM(DiffusionPipeline): def __init__(self, diffwave, noise_scheduler): super().__init__() noise_scheduler = noise_scheduler.set_format("pt") From 61dc11c713c3f10dc2f742f6d4f7475f39c337df Mon Sep 17 00:00:00 2001 From: patil-suraj Date: Mon, 13 Jun 2022 16:39:50 +0200 Subject: [PATCH 4/4] register trained_betas and timestep_values --- src/diffusers/schedulers/scheduling_ddpm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py index 97efe37634..793f9ad1a1 100644 --- a/src/diffusers/schedulers/scheduling_ddpm.py +++ b/src/diffusers/schedulers/scheduling_ddpm.py @@ -38,6 +38,8 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): beta_start=beta_start, beta_end=beta_end, beta_schedule=beta_schedule, + trained_betas=trained_betas, + timestep_values=timestep_values, variance_type=variance_type, clip_predicted_image=clip_predicted_image, )