mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
[Tests] reduce the model size in the amused fast test (#7804)
* chore: reducing model sizes * chore: shrinks further * chore: shrinks further * chore: shrinking model for img2img pipeline * chore: reducing size of model for inpaint pipeline --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
This commit is contained in:
committed by
GitHub
parent
21f023ec1a
commit
b02e2113ff
@@ -38,17 +38,17 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
transformer = UVit2DModel(
|
||||
hidden_size=32,
|
||||
hidden_size=8,
|
||||
use_bias=False,
|
||||
hidden_dropout=0.0,
|
||||
cond_embed_dim=32,
|
||||
cond_embed_dim=8,
|
||||
micro_cond_encode_dim=2,
|
||||
micro_cond_embed_dim=10,
|
||||
encoder_hidden_size=32,
|
||||
encoder_hidden_size=8,
|
||||
vocab_size=32,
|
||||
codebook_size=32,
|
||||
in_channels=32,
|
||||
block_out_channels=32,
|
||||
codebook_size=8,
|
||||
in_channels=8,
|
||||
block_out_channels=8,
|
||||
num_res_blocks=1,
|
||||
downsample=True,
|
||||
upsample=True,
|
||||
@@ -56,7 +56,7 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
num_hidden_layers=1,
|
||||
num_attention_heads=1,
|
||||
attention_dropout=0.0,
|
||||
intermediate_size=32,
|
||||
intermediate_size=8,
|
||||
layer_norm_eps=1e-06,
|
||||
ln_elementwise_affine=True,
|
||||
)
|
||||
@@ -64,17 +64,17 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
vqvae = VQModel(
|
||||
act_fn="silu",
|
||||
block_out_channels=[32],
|
||||
block_out_channels=[8],
|
||||
down_block_types=[
|
||||
"DownEncoderBlock2D",
|
||||
],
|
||||
in_channels=3,
|
||||
latent_channels=32,
|
||||
layers_per_block=2,
|
||||
norm_num_groups=32,
|
||||
num_vq_embeddings=32,
|
||||
latent_channels=8,
|
||||
layers_per_block=1,
|
||||
norm_num_groups=8,
|
||||
num_vq_embeddings=8,
|
||||
out_channels=3,
|
||||
sample_size=32,
|
||||
sample_size=8,
|
||||
up_block_types=[
|
||||
"UpDecoderBlock2D",
|
||||
],
|
||||
@@ -85,14 +85,14 @@ class AmusedPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
text_encoder_config = CLIPTextConfig(
|
||||
bos_token_id=0,
|
||||
eos_token_id=2,
|
||||
hidden_size=32,
|
||||
intermediate_size=64,
|
||||
hidden_size=8,
|
||||
intermediate_size=8,
|
||||
layer_norm_eps=1e-05,
|
||||
num_attention_heads=8,
|
||||
num_hidden_layers=3,
|
||||
num_attention_heads=1,
|
||||
num_hidden_layers=1,
|
||||
pad_token_id=1,
|
||||
vocab_size=1000,
|
||||
projection_dim=32,
|
||||
projection_dim=8,
|
||||
)
|
||||
text_encoder = CLIPTextModelWithProjection(text_encoder_config)
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -42,17 +42,17 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
transformer = UVit2DModel(
|
||||
hidden_size=32,
|
||||
hidden_size=8,
|
||||
use_bias=False,
|
||||
hidden_dropout=0.0,
|
||||
cond_embed_dim=32,
|
||||
cond_embed_dim=8,
|
||||
micro_cond_encode_dim=2,
|
||||
micro_cond_embed_dim=10,
|
||||
encoder_hidden_size=32,
|
||||
encoder_hidden_size=8,
|
||||
vocab_size=32,
|
||||
codebook_size=32,
|
||||
in_channels=32,
|
||||
block_out_channels=32,
|
||||
codebook_size=8,
|
||||
in_channels=8,
|
||||
block_out_channels=8,
|
||||
num_res_blocks=1,
|
||||
downsample=True,
|
||||
upsample=True,
|
||||
@@ -60,7 +60,7 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
num_hidden_layers=1,
|
||||
num_attention_heads=1,
|
||||
attention_dropout=0.0,
|
||||
intermediate_size=32,
|
||||
intermediate_size=8,
|
||||
layer_norm_eps=1e-06,
|
||||
ln_elementwise_affine=True,
|
||||
)
|
||||
@@ -68,17 +68,17 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
vqvae = VQModel(
|
||||
act_fn="silu",
|
||||
block_out_channels=[32],
|
||||
block_out_channels=[8],
|
||||
down_block_types=[
|
||||
"DownEncoderBlock2D",
|
||||
],
|
||||
in_channels=3,
|
||||
latent_channels=32,
|
||||
layers_per_block=2,
|
||||
norm_num_groups=32,
|
||||
num_vq_embeddings=32,
|
||||
latent_channels=8,
|
||||
layers_per_block=1,
|
||||
norm_num_groups=8,
|
||||
num_vq_embeddings=32, # reducing this to 16 or 8 -> RuntimeError: "cdist_cuda" not implemented for 'Half'
|
||||
out_channels=3,
|
||||
sample_size=32,
|
||||
sample_size=8,
|
||||
up_block_types=[
|
||||
"UpDecoderBlock2D",
|
||||
],
|
||||
@@ -89,14 +89,14 @@ class AmusedImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
text_encoder_config = CLIPTextConfig(
|
||||
bos_token_id=0,
|
||||
eos_token_id=2,
|
||||
hidden_size=32,
|
||||
intermediate_size=64,
|
||||
hidden_size=8,
|
||||
intermediate_size=8,
|
||||
layer_norm_eps=1e-05,
|
||||
num_attention_heads=8,
|
||||
num_hidden_layers=3,
|
||||
num_attention_heads=1,
|
||||
num_hidden_layers=1,
|
||||
pad_token_id=1,
|
||||
vocab_size=1000,
|
||||
projection_dim=32,
|
||||
projection_dim=8,
|
||||
)
|
||||
text_encoder = CLIPTextModelWithProjection(text_encoder_config)
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
@@ -42,17 +42,17 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
def get_dummy_components(self):
|
||||
torch.manual_seed(0)
|
||||
transformer = UVit2DModel(
|
||||
hidden_size=32,
|
||||
hidden_size=8,
|
||||
use_bias=False,
|
||||
hidden_dropout=0.0,
|
||||
cond_embed_dim=32,
|
||||
cond_embed_dim=8,
|
||||
micro_cond_encode_dim=2,
|
||||
micro_cond_embed_dim=10,
|
||||
encoder_hidden_size=32,
|
||||
encoder_hidden_size=8,
|
||||
vocab_size=32,
|
||||
codebook_size=32,
|
||||
in_channels=32,
|
||||
block_out_channels=32,
|
||||
codebook_size=32, # codebook size needs to be consistent with num_vq_embeddings for inpaint tests
|
||||
in_channels=8,
|
||||
block_out_channels=8,
|
||||
num_res_blocks=1,
|
||||
downsample=True,
|
||||
upsample=True,
|
||||
@@ -60,7 +60,7 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
num_hidden_layers=1,
|
||||
num_attention_heads=1,
|
||||
attention_dropout=0.0,
|
||||
intermediate_size=32,
|
||||
intermediate_size=8,
|
||||
layer_norm_eps=1e-06,
|
||||
ln_elementwise_affine=True,
|
||||
)
|
||||
@@ -68,17 +68,17 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
vqvae = VQModel(
|
||||
act_fn="silu",
|
||||
block_out_channels=[32],
|
||||
block_out_channels=[8],
|
||||
down_block_types=[
|
||||
"DownEncoderBlock2D",
|
||||
],
|
||||
in_channels=3,
|
||||
latent_channels=32,
|
||||
layers_per_block=2,
|
||||
norm_num_groups=32,
|
||||
num_vq_embeddings=32,
|
||||
latent_channels=8,
|
||||
layers_per_block=1,
|
||||
norm_num_groups=8,
|
||||
num_vq_embeddings=32, # reducing this to 16 or 8 -> RuntimeError: "cdist_cuda" not implemented for 'Half'
|
||||
out_channels=3,
|
||||
sample_size=32,
|
||||
sample_size=8,
|
||||
up_block_types=[
|
||||
"UpDecoderBlock2D",
|
||||
],
|
||||
@@ -89,14 +89,14 @@ class AmusedInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
|
||||
text_encoder_config = CLIPTextConfig(
|
||||
bos_token_id=0,
|
||||
eos_token_id=2,
|
||||
hidden_size=32,
|
||||
intermediate_size=64,
|
||||
hidden_size=8,
|
||||
intermediate_size=8,
|
||||
layer_norm_eps=1e-05,
|
||||
num_attention_heads=8,
|
||||
num_hidden_layers=3,
|
||||
num_attention_heads=1,
|
||||
num_hidden_layers=1,
|
||||
pad_token_id=1,
|
||||
vocab_size=1000,
|
||||
projection_dim=32,
|
||||
projection_dim=8,
|
||||
)
|
||||
text_encoder = CLIPTextModelWithProjection(text_encoder_config)
|
||||
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
|
||||
|
||||
Reference in New Issue
Block a user