1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

Add an explicit --image_size to the conversion script (#1509)

* Add an explicit `--image_size` to the conversion script

* style
This commit is contained in:
Anton Lozhkov
2022-12-01 19:22:48 +01:00
committed by GitHub
parent a6a25ceb61
commit e65b71aba4

View File

@@ -207,12 +207,12 @@ def conv_attn_to_linear(checkpoint):
checkpoint[key] = checkpoint[key][:, :, 0]
def create_unet_diffusers_config(original_config):
def create_unet_diffusers_config(original_config, image_size: int):
"""
Creates a config for the diffusers based on the config of the LDM model.
"""
model_params = original_config.model.params
unet_params = original_config.model.params.unet_config.params
vae_params = original_config.model.params.first_stage_config.params.ddconfig
block_out_channels = [unet_params.model_channels * mult for mult in unet_params.channel_mult]
@@ -230,8 +230,10 @@ def create_unet_diffusers_config(original_config):
up_block_types.append(block_type)
resolution //= 2
vae_scale_factor = 2 ** (len(vae_params.ch_mult) - 1)
config = dict(
sample_size=model_params.image_size,
sample_size=image_size // vae_scale_factor,
in_channels=unet_params.in_channels,
out_channels=unet_params.out_channels,
down_block_types=tuple(down_block_types),
@@ -245,7 +247,7 @@ def create_unet_diffusers_config(original_config):
return config
def create_vae_diffusers_config(original_config):
def create_vae_diffusers_config(original_config, image_size: int):
"""
Creates a config for the diffusers based on the config of the LDM model.
"""
@@ -257,7 +259,7 @@ def create_vae_diffusers_config(original_config):
up_block_types = ["UpDecoderBlock2D"] * len(block_out_channels)
config = dict(
sample_size=vae_params.resolution,
sample_size=image_size,
in_channels=vae_params.in_channels,
out_channels=vae_params.out_ch,
down_block_types=tuple(down_block_types),
@@ -653,6 +655,15 @@ if __name__ == "__main__":
type=str,
help="Type of scheduler to use. Should be one of ['pndm', 'lms', 'ddim', 'euler', 'euler-ancest', 'dpm']",
)
parser.add_argument(
"--image_size",
default=512,
type=int,
help=(
"The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Siffusion v2"
" Base. Use 768 for Stable Diffusion v2."
),
)
parser.add_argument(
"--extract_ema",
action="store_true",
@@ -712,7 +723,7 @@ if __name__ == "__main__":
raise ValueError(f"Scheduler of type {args.scheduler_type} doesn't exist!")
# Convert the UNet2DConditionModel model.
unet_config = create_unet_diffusers_config(original_config)
unet_config = create_unet_diffusers_config(original_config, image_size=args.image_size)
converted_unet_checkpoint = convert_ldm_unet_checkpoint(
checkpoint, unet_config, path=args.checkpoint_path, extract_ema=args.extract_ema
)
@@ -721,7 +732,7 @@ if __name__ == "__main__":
unet.load_state_dict(converted_unet_checkpoint)
# Convert the VAE model.
vae_config = create_vae_diffusers_config(original_config)
vae_config = create_vae_diffusers_config(original_config, image_size=args.image_size)
converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
vae = AutoencoderKL(**vae_config)