diff --git a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py index a4f3788fa6..2228649eb5 100644 --- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py @@ -1317,7 +1317,7 @@ class UniDiffuserPipeline(DiffusionPipeline): latents = self._combine(image_vae_latents, image_clip_latents) elif mode in ["img2text", "text"]: latents = prompt_embeds - + print(f"Initial latents: {latents}") # 7. Check that shapes of latents and image match the UNet channels. diff --git a/tests/pipelines/unidiffuser/test_unidiffuser.py b/tests/pipelines/unidiffuser/test_unidiffuser.py index b0a189fbf2..e7834c61a0 100644 --- a/tests/pipelines/unidiffuser/test_unidiffuser.py +++ b/tests/pipelines/unidiffuser/test_unidiffuser.py @@ -589,10 +589,9 @@ class UniDiffuserPipelineSlowTests(unittest.TestCase): expected_text_prefix = "Astronaut " assert text[0][:10] == expected_text_prefix - + def test_unidiffuser_default_joint_v1_fp16(self): pipe = UniDiffuserPipeline.from_pretrained("dg845/unidiffuser-diffusers", torch_dtype=torch.float16) - pipe.image_encoder = CLIPVisionModelWithProjection.from_pretrained("/home/tamamo/scratch/unidiffuser_big/image_encoder") pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() @@ -613,10 +612,9 @@ class UniDiffuserPipelineSlowTests(unittest.TestCase): expected_text_prefix = "Pink pink " assert text[0][:10] == expected_text_prefix - + def test_unidiffuser_default_text2img_v1_fp16(self): pipe = UniDiffuserPipeline.from_pretrained("dg845/unidiffuser-diffusers", torch_dtype=torch.float16) - pipe.image_encoder = CLIPVisionModelWithProjection.from_pretrained("/home/tamamo/scratch/unidiffuser_big/image_encoder") pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() @@ -630,10 +628,9 @@ class UniDiffuserPipelineSlowTests(unittest.TestCase): image_slice = image[0, -3:, -3:, -1] expected_slice = np.array([0.4702, 0.4666, 0.4446, 0.4829, 0.4468, 0.4565, 0.4663, 0.4956, 0.4277]) assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 - + def test_unidiffuser_default_text2img_v1_fp16_no_cfg(self): pipe = UniDiffuserPipeline.from_pretrained("dg845/unidiffuser-diffusers", torch_dtype=torch.float16) - pipe.image_encoder = CLIPVisionModelWithProjection.from_pretrained("/home/tamamo/scratch/unidiffuser_big/image_encoder") pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() @@ -651,7 +648,6 @@ class UniDiffuserPipelineSlowTests(unittest.TestCase): def test_unidiffuser_default_img2text_v1_fp16(self): pipe = UniDiffuserPipeline.from_pretrained("dg845/unidiffuser-diffusers", torch_dtype=torch.float16) - pipe.image_encoder = CLIPVisionModelWithProjection.from_pretrained("/home/tamamo/scratch/unidiffuser_big/image_encoder") pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) pipe.enable_attention_slicing() @@ -663,4 +659,3 @@ class UniDiffuserPipelineSlowTests(unittest.TestCase): expected_text_prefix = "Astronaut " assert text[0][:10] == expected_text_prefix -