1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

Performance Improve for Qwen Image Edit (#12190)

* fix(qwen-image-edit):
- update condition reshaping logic to improve editing performance

* fix(qwen-image-edit):
- remove _auto_resize
This commit is contained in:
naykun
2025-08-19 20:45:18 +08:00
committed by GitHub
parent dba4e007fe
commit cc48b9368f

View File

@@ -62,25 +62,6 @@ EXAMPLE_DOC_STRING = """
>>> image.save("qwenimage_edit.png")
```
"""
PREFERRED_QWENIMAGE_RESOLUTIONS = [
(672, 1568),
(688, 1504),
(720, 1456),
(752, 1392),
(800, 1328),
(832, 1248),
(880, 1184),
(944, 1104),
(1024, 1024),
(1104, 944),
(1184, 880),
(1248, 832),
(1328, 800),
(1392, 752),
(1456, 720),
(1504, 688),
(1568, 672),
]
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.calculate_shift
@@ -565,7 +546,6 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
max_sequence_length: int = 512,
_auto_resize: bool = True,
):
r"""
Function invoked when calling the pipeline for generation.
@@ -684,18 +664,9 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
device = self._execution_device
# 3. Preprocess image
if image is not None and not (isinstance(image, torch.Tensor) and image.size(1) == self.latent_channels):
img = image[0] if isinstance(image, list) else image
image_height, image_width = self.image_processor.get_default_height_width(img)
aspect_ratio = image_width / image_height
if _auto_resize:
_, image_width, image_height = min(
(abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_QWENIMAGE_RESOLUTIONS
)
image_width = image_width // multiple_of * multiple_of
image_height = image_height // multiple_of * multiple_of
image = self.image_processor.resize(image, image_height, image_width)
image = self.image_processor.resize(image, calculated_height, calculated_width)
prompt_image = image
image = self.image_processor.preprocess(image, image_height, image_width)
image = self.image_processor.preprocess(image, calculated_height, calculated_width)
image = image.unsqueeze(2)
has_neg_prompt = negative_prompt is not None or (
@@ -712,9 +683,6 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
max_sequence_length=max_sequence_length,
)
if do_true_cfg:
# negative image is the same size as the original image, but all pixels are white
# negative_image = Image.new("RGB", (image.width, image.height), (255, 255, 255))
negative_prompt_embeds, negative_prompt_embeds_mask = self.encode_prompt(
image=prompt_image,
prompt=negative_prompt,
@@ -741,7 +709,7 @@ class QwenImageEditPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
img_shapes = [
[
(1, height // self.vae_scale_factor // 2, width // self.vae_scale_factor // 2),
(1, image_height // self.vae_scale_factor // 2, image_width // self.vae_scale_factor // 2),
(1, calculated_height // self.vae_scale_factor // 2, calculated_width // self.vae_scale_factor // 2),
]
] * batch_size