From 80bc0c0ced1566549dec606f5069e909b86e86b0 Mon Sep 17 00:00:00 2001 From: Will Berman Date: Tue, 11 Apr 2023 09:54:50 -0700 Subject: [PATCH] config fixes (#3060) --- examples/community/sd_text2img_k_diffusion.py | 2 +- .../audio_diffusion/pipeline_audio_diffusion.py | 6 +++--- .../pipeline_stable_diffusion_k_diffusion.py | 2 +- .../audio_diffusion/test_audio_diffusion.py | 17 ++++++++++------- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/examples/community/sd_text2img_k_diffusion.py b/examples/community/sd_text2img_k_diffusion.py index 246c3d8c19..b7fbc46b67 100755 --- a/examples/community/sd_text2img_k_diffusion.py +++ b/examples/community/sd_text2img_k_diffusion.py @@ -105,7 +105,7 @@ class StableDiffusionPipeline(DiffusionPipeline): ) model = ModelWrapper(unet, scheduler.alphas_cumprod) - if scheduler.prediction_type == "v_prediction": + if scheduler.config.prediction_type == "v_prediction": self.k_diffusion_model = CompVisVDenoiser(model) else: self.k_diffusion_model = CompVisDenoiser(model) diff --git a/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py b/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py index 8d8229e661..1df76ed6c5 100644 --- a/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py +++ b/src/diffusers/pipelines/audio_diffusion/pipeline_audio_diffusion.py @@ -60,9 +60,9 @@ class AudioDiffusionPipeline(DiffusionPipeline): input_module = self.vqvae if self.vqvae is not None else self.unet # For backwards compatibility sample_size = ( - (input_module.sample_size, input_module.sample_size) - if type(input_module.sample_size) == int - else input_module.sample_size + (input_module.config.sample_size, input_module.config.sample_size) + if type(input_module.config.sample_size) == int + else input_module.config.sample_size ) return sample_size diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py index 277a4df056..99aca66db8 100755 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py @@ -113,7 +113,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) model = ModelWrapper(unet, scheduler.alphas_cumprod) - if scheduler.prediction_type == "v_prediction": + if scheduler.config.prediction_type == "v_prediction": self.k_diffusion_model = CompVisVDenoiser(model) else: self.k_diffusion_model = CompVisDenoiser(model) diff --git a/tests/pipelines/audio_diffusion/test_audio_diffusion.py b/tests/pipelines/audio_diffusion/test_audio_diffusion.py index ba389d9c93..0eb6252410 100644 --- a/tests/pipelines/audio_diffusion/test_audio_diffusion.py +++ b/tests/pipelines/audio_diffusion/test_audio_diffusion.py @@ -115,8 +115,11 @@ class PipelineFastTests(unittest.TestCase): output = pipe(generator=generator, steps=4, return_dict=False) image_from_tuple = output[0][0] - assert audio.shape == (1, (self.dummy_unet.sample_size[1] - 1) * mel.hop_length) - assert image.height == self.dummy_unet.sample_size[0] and image.width == self.dummy_unet.sample_size[1] + assert audio.shape == (1, (self.dummy_unet.config.sample_size[1] - 1) * mel.hop_length) + assert ( + image.height == self.dummy_unet.config.sample_size[0] + and image.width == self.dummy_unet.config.sample_size[1] + ) image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10] image_from_tuple_slice = np.frombuffer(image_from_tuple.tobytes(), dtype="uint8")[:10] expected_slice = np.array([69, 255, 255, 255, 0, 0, 77, 181, 12, 127]) @@ -133,14 +136,14 @@ class PipelineFastTests(unittest.TestCase): pipe.set_progress_bar_config(disable=None) np.random.seed(0) - raw_audio = np.random.uniform(-1, 1, ((dummy_vqvae_and_unet[0].sample_size[1] - 1) * mel.hop_length,)) + raw_audio = np.random.uniform(-1, 1, ((dummy_vqvae_and_unet[0].config.sample_size[1] - 1) * mel.hop_length,)) generator = torch.Generator(device=device).manual_seed(42) output = pipe(raw_audio=raw_audio, generator=generator, start_step=5, steps=10) image = output.images[0] assert ( - image.height == self.dummy_vqvae_and_unet[0].sample_size[0] - and image.width == self.dummy_vqvae_and_unet[0].sample_size[1] + image.height == self.dummy_vqvae_and_unet[0].config.sample_size[0] + and image.width == self.dummy_vqvae_and_unet[0].config.sample_size[1] ) image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10] expected_slice = np.array([120, 117, 110, 109, 138, 167, 138, 148, 132, 121]) @@ -183,8 +186,8 @@ class PipelineIntegrationTests(unittest.TestCase): audio = output.audios[0] image = output.images[0] - assert audio.shape == (1, (pipe.unet.sample_size[1] - 1) * pipe.mel.hop_length) - assert image.height == pipe.unet.sample_size[0] and image.width == pipe.unet.sample_size[1] + assert audio.shape == (1, (pipe.unet.config.sample_size[1] - 1) * pipe.mel.hop_length) + assert image.height == pipe.unet.config.sample_size[0] and image.width == pipe.unet.config.sample_size[1] image_slice = np.frombuffer(image.tobytes(), dtype="uint8")[:10] expected_slice = np.array([151, 167, 154, 144, 122, 134, 121, 105, 70, 26])