Merge branch 'main' into enable-all-gpus

2026-01-27 17:22:53 +03:00 · 2025-08-04 19:09:32 +05:30
parent 866740a42b 639fd12a20
commit 96ef4bf62a
3 changed files with 11 additions and 2 deletions
--- a/.github/workflows/ssh-runner.yml
+++ b/.github/workflows/ssh-runner.yml
@@ -31,7 +31,7 @@ jobs:
      group: "${{ github.event.inputs.runner_type }}"
    container:
      image: ${{ github.event.inputs.docker_image }}
-      options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 --privileged
+      options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus all --privileged

    steps:
      - name: Checkout diffusers
--- a/src/diffusers/models/autoencoders/autoencoder_kl_qwenimage.py
+++ b/src/diffusers/models/autoencoders/autoencoder_kl_qwenimage.py
@@ -1,4 +1,4 @@
-# Copyright 2025 The Qwen-Image Team and The HuggingFace Team. All rights reserved.
+# Copyright 2025 The Qwen-Image Team, Wan Team and The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
+# We gratefully acknowledge the Wan Team for their outstanding contributions.
+# QwenImageVAE is further fine-tuned from the Wan Video VAE to achieve improved performance.
+# For more information about the Wan VAE, please refer to:
+# - GitHub: https://github.com/Wan-Video/Wan2.1
+# - arXiv: https://arxiv.org/abs/2503.20314

 from typing import List, Optional, Tuple, Union

--- a/src/diffusers/models/transformers/transformer_wan.py
+++ b/src/diffusers/models/transformers/transformer_wan.py
@@ -180,6 +180,7 @@ class WanAttention(torch.nn.Module, AttentionModuleMixin):
        added_kv_proj_dim: Optional[int] = None,
        cross_attention_dim_head: Optional[int] = None,
        processor=None,
+        is_cross_attention=None,
    ):
        super().__init__()

@@ -207,6 +208,8 @@ class WanAttention(torch.nn.Module, AttentionModuleMixin):
            self.add_v_proj = torch.nn.Linear(added_kv_proj_dim, self.inner_dim, bias=True)
            self.norm_added_k = torch.nn.RMSNorm(dim_head * heads, eps=eps)

+        self.is_cross_attention = cross_attention_dim_head is not None
+
        self.set_processor(processor)

    def fuse_projections(self):