From ac49f97a75e44547d9ea963ff0f3ac08af9bdbff Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Tue, 27 Feb 2024 15:47:23 +0530 Subject: [PATCH] Add tests to check configs when using single file loading (#7099) * update * update * update * update --------- Co-authored-by: Sayak Paul --- tests/models/autoencoders/test_models_vae.py | 37 ++++++++++++++++ tests/pipelines/controlnet/test_controlnet.py | 38 ++++++++++++++++ .../controlnet/test_controlnet_sdxl.py | 43 +++++++++++++++++++ .../stable_diffusion/test_stable_diffusion.py | 33 ++++++++++++++ .../test_stable_diffusion_inpaint.py | 33 ++++++++++++++ .../test_stable_diffusion_upscale.py | 37 ++++++++++++++++ .../test_stable_diffusion_xl.py | 36 ++++++++++++++++ .../test_stable_diffusion_xl_img2img.py | 32 ++++++++++++++ 8 files changed, 289 insertions(+) diff --git a/tests/models/autoencoders/test_models_vae.py b/tests/models/autoencoders/test_models_vae.py index 0ea399c3ed..8ddf6ac464 100644 --- a/tests/models/autoencoders/test_models_vae.py +++ b/tests/models/autoencoders/test_models_vae.py @@ -810,6 +810,43 @@ class AutoencoderKLIntegrationTests(unittest.TestCase): assert torch_all_close(output_slice_1, output_slice_2, atol=3e-3) + def test_single_file_component_configs(self): + vae_single_file = AutoencoderKL.from_single_file( + "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors" + ) + vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae") + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values"] + for param_name, param_value in vae_single_file.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + vae.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + def test_single_file_arguments(self): + vae_default = AutoencoderKL.from_single_file( + "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors", + ) + + assert vae_default.config.scaling_factor == 0.18125 + assert vae_default.config.sample_size == 512 + assert vae_default.dtype == torch.float32 + + scaling_factor = 2.0 + image_size = 256 + torch_dtype = torch.float16 + + vae = AutoencoderKL.from_single_file( + "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors", + image_size=image_size, + scaling_factor=scaling_factor, + torch_dtype=torch_dtype, + ) + assert vae.config.scaling_factor == scaling_factor + assert vae.config.sample_size == image_size + assert vae.dtype == torch_dtype + @slow class AsymmetricAutoencoderKLIntegrationTests(unittest.TestCase): diff --git a/tests/pipelines/controlnet/test_controlnet.py b/tests/pipelines/controlnet/test_controlnet.py index b7839eb996..114a36b37f 100644 --- a/tests/pipelines/controlnet/test_controlnet.py +++ b/tests/pipelines/controlnet/test_controlnet.py @@ -1072,6 +1072,44 @@ class ControlNetPipelineSlowTests(unittest.TestCase): max_diff = numpy_cosine_similarity_distance(output_sf.flatten(), output.flatten()) assert max_diff < 1e-3 + def test_single_file_component_configs(self): + controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", variant="fp16") + pipe = StableDiffusionControlNetPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", variant="fp16", safety_checker=None, controlnet=controlnet + ) + + controlnet_single_file = ControlNetModel.from_single_file( + "https://huggingface.co/lllyasviel/ControlNet-v1-1/blob/main/control_v11p_sd15_canny.pth" + ) + single_file_pipe = StableDiffusionControlNetPipeline.from_single_file( + "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.safetensors", + safety_checker=None, + controlnet=controlnet_single_file, + scheduler_type="pndm", + ) + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "architectures", "_use_default_values"] + for param_name, param_value in single_file_pipe.controlnet.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.controlnet.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.unet.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.unet.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.vae.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.vae.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + @slow @require_torch_gpu diff --git a/tests/pipelines/controlnet/test_controlnet_sdxl.py b/tests/pipelines/controlnet/test_controlnet_sdxl.py index dd56640315..b39147246a 100644 --- a/tests/pipelines/controlnet/test_controlnet_sdxl.py +++ b/tests/pipelines/controlnet/test_controlnet_sdxl.py @@ -863,6 +863,49 @@ class ControlNetSDXLPipelineSlowTests(unittest.TestCase): max_diff = numpy_cosine_similarity_distance(images[0].flatten(), single_file_images[0].flatten()) assert max_diff < 5e-2 + def test_single_file_component_configs(self): + controlnet = ControlNetModel.from_pretrained( + "diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" + ) + pipe = StableDiffusionXLControlNetPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + variant="fp16", + controlnet=controlnet, + torch_dtype=torch.float16, + ) + + single_file_url = ( + "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0.safetensors" + ) + single_file_pipe = StableDiffusionXLControlNetPipeline.from_single_file( + single_file_url, controlnet=controlnet, torch_dtype=torch.float16 + ) + + for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder.config.to_dict()[param_name] == param_value + + for param_name, param_value in single_file_pipe.text_encoder_2.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder_2.config.to_dict()[param_name] == param_value + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "architectures", "_use_default_values"] + for param_name, param_value in single_file_pipe.unet.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.unet.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.vae.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.vae.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + class StableDiffusionSSD1BControlNetPipelineFastTests(StableDiffusionXLControlNetPipelineFastTests): def test_controlnet_sdxl_guess(self): diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index 57671bbdcc..82afacaa2a 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -1295,6 +1295,39 @@ class StableDiffusionPipelineCkptTests(unittest.TestCase): assert max_diff < 1e-3 + def test_single_file_component_configs(self): + pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") + + ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt" + single_file_pipe = StableDiffusionPipeline.from_single_file(ckpt_path, load_safety_checker=True) + + for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder.config.to_dict()[param_name] == param_value + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "architectures", "_use_default_values"] + for param_name, param_value in single_file_pipe.unet.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.unet.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.vae.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.vae.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.safety_checker.config.to_dict().items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.safety_checker.config.to_dict()[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + @nightly @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index b956e2dc83..218ac3e76a 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -785,6 +785,39 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase): assert max_diff < 1e-4 + def test_single_file_component_configs(self): + pipe = StableDiffusionInpaintPipeline.from_pretrained("runwayml/stable-diffusion-inpainting", variant="fp16") + + ckpt_path = "https://huggingface.co/runwayml/stable-diffusion-inpainting/blob/main/sd-v1-5-inpainting.ckpt" + single_file_pipe = StableDiffusionInpaintPipeline.from_single_file(ckpt_path, load_safety_checker=True) + + for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder.config.to_dict()[param_name] == param_value + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "architectures", "_use_default_values"] + for param_name, param_value in single_file_pipe.unet.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.unet.config[param_name] == param_value + ), f"{param_name} is differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.vae.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.vae.config[param_name] == param_value + ), f"{param_name} is differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.safety_checker.config.to_dict().items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.safety_checker.config.to_dict()[param_name] == param_value + ), f"{param_name} is differs between single file loading and pretrained loading" + @slow @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py index 6f5da320be..4dd6121021 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py @@ -513,3 +513,40 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase): assert ( numpy_cosine_similarity_distance(image_from_pretrained.flatten(), image_from_single_file.flatten()) < 1e-3 ) + + def test_single_file_component_configs(self): + pipe = StableDiffusionUpscalePipeline.from_pretrained( + "stabilityai/stable-diffusion-x4-upscaler", variant="fp16" + ) + + ckpt_path = ( + "https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler/blob/main/x4-upscaler-ema.safetensors" + ) + single_file_pipe = StableDiffusionUpscalePipeline.from_single_file(ckpt_path, load_safety_checker=True) + + for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder.config.to_dict()[param_name] == param_value + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "architectures", "_use_default_values"] + for param_name, param_value in single_file_pipe.unet.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.unet.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.vae.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.vae.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.safety_checker.config.to_dict().items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.safety_checker.config.to_dict()[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py index a27614a2c7..3c81328dea 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py @@ -1091,3 +1091,39 @@ class StableDiffusionXLPipelineIntegrationTests(unittest.TestCase): max_diff = numpy_cosine_similarity_distance(image.flatten(), image_ckpt.flatten()) assert max_diff < 6e-3 + + def test_single_file_component_configs(self): + pipe = StableDiffusionXLPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 + ) + ckpt_path = ( + "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0.safetensors" + ) + single_file_pipe = StableDiffusionXLPipeline.from_single_file( + ckpt_path, variant="fp16", torch_dtype=torch.float16 + ) + + for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder.config.to_dict()[param_name] == param_value + + for param_name, param_value in single_file_pipe.text_encoder_2.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder_2.config.to_dict()[param_name] == param_value + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "architectures", "_use_default_values"] + for param_name, param_value in single_file_pipe.unet.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.unet.config[param_name] == param_value + ), f"{param_name} is differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.vae.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.vae.config[param_name] == param_value + ), f"{param_name} is differs between single file loading and pretrained loading" diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py index 3a0229ac23..9718aede35 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py @@ -816,3 +816,35 @@ class StableDiffusionXLImg2ImgIntegrationTests(unittest.TestCase): max_diff = numpy_cosine_similarity_distance(image.flatten(), image_single_file.flatten()) assert max_diff < 5e-2 + + def test_single_file_component_configs(self): + pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-refiner-1.0", + torch_dtype=torch.float16, + variant="fp16", + ) + ckpt_path = "https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0/blob/main/sd_xl_refiner_1.0.safetensors" + single_file_pipe = StableDiffusionXLImg2ImgPipeline.from_single_file(ckpt_path, torch_dtype=torch.float16) + + assert pipe.text_encoder is None + assert single_file_pipe.text_encoder is None + + for param_name, param_value in single_file_pipe.text_encoder_2.config.to_dict().items(): + if param_name in ["torch_dtype", "architectures", "_name_or_path"]: + continue + assert pipe.text_encoder_2.config.to_dict()[param_name] == param_value + + PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "architectures", "_use_default_values"] + for param_name, param_value in single_file_pipe.unet.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.unet.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading" + + for param_name, param_value in single_file_pipe.vae.config.items(): + if param_name in PARAMS_TO_IGNORE: + continue + assert ( + pipe.vae.config[param_name] == param_value + ), f"{param_name} differs between single file loading and pretrained loading"