mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
Fix issue in maybe_convert_prompt (#3188)
When the token used for textual inversion does not have any special symbols (e.g. it is not surrounded by <>), the tokenizer does not properly split the replacement tokens. Adding a space for the padding tokens fixes this.
This commit is contained in:
@@ -410,7 +410,7 @@ class TextualInversionLoaderMixin:
|
||||
replacement = token
|
||||
i = 1
|
||||
while f"{token}_{i}" in tokenizer.added_tokens_encoder:
|
||||
replacement += f"{token}_{i}"
|
||||
replacement += f" {token}_{i}"
|
||||
i += 1
|
||||
|
||||
prompt = prompt.replace(token, replacement)
|
||||
|
||||
@@ -541,7 +541,7 @@ class DownloadTests(unittest.TestCase):
|
||||
assert pipe.text_encoder.get_input_embeddings().weight[-3].sum().item() == 96
|
||||
assert pipe.text_encoder.get_input_embeddings().weight[-2].sum().item() == 128
|
||||
assert pipe.text_encoder.get_input_embeddings().weight[-1].sum().item() == 160
|
||||
assert pipe._maybe_convert_prompt("<***>", pipe.tokenizer) == "<***><***>_1<***>_2"
|
||||
assert pipe._maybe_convert_prompt("<***>", pipe.tokenizer) == "<***> <***>_1 <***>_2"
|
||||
|
||||
prompt = "hey <***>"
|
||||
out = pipe(prompt, num_inference_steps=1, output_type="numpy").images
|
||||
@@ -569,7 +569,7 @@ class DownloadTests(unittest.TestCase):
|
||||
assert pipe.text_encoder.get_input_embeddings().weight[-3].sum().item() == 96
|
||||
assert pipe.text_encoder.get_input_embeddings().weight[-2].sum().item() == 128
|
||||
assert pipe.text_encoder.get_input_embeddings().weight[-1].sum().item() == 160
|
||||
assert pipe._maybe_convert_prompt("<****>", pipe.tokenizer) == "<****><****>_1<****>_2"
|
||||
assert pipe._maybe_convert_prompt("<****>", pipe.tokenizer) == "<****> <****>_1 <****>_2"
|
||||
|
||||
prompt = "hey <****>"
|
||||
out = pipe(prompt, num_inference_steps=1, output_type="numpy").images
|
||||
|
||||
Reference in New Issue
Block a user