From c6d38320fb6532e7e8bdb4e3bebaac3f39078933 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Mon, 22 Dec 2025 15:36:32 +0100 Subject: [PATCH] Use `T5Tokenizer` instead of `MT5Tokenizer` Given that the `MT5Tokenizer` in `transformers` is just a "re-export" of `T5Tokenizer` as per https://github.com/huggingface/transformers/blob/v4.57.3/src/transformers/models/mt5/tokenization_mt5.py )on latest available stable Transformers i.e., v4.57.3), this commit updates the imports to point to `T5Tokenizer` instead, so that those still work with Transformers v5.0.0rc0 onwards. --- .../community/pipeline_hunyuandit_differential_img2img.py | 6 +++--- .../controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py | 6 +++--- src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py | 6 +++--- src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/examples/community/pipeline_hunyuandit_differential_img2img.py b/examples/community/pipeline_hunyuandit_differential_img2img.py index fb7a4cb5e4..bc6841525b 100644 --- a/examples/community/pipeline_hunyuandit_differential_img2img.py +++ b/examples/community/pipeline_hunyuandit_differential_img2img.py @@ -21,8 +21,8 @@ from transformers import ( BertModel, BertTokenizer, CLIPImageProcessor, - MT5Tokenizer, T5EncoderModel, + T5Tokenizer, ) from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback @@ -260,7 +260,7 @@ class HunyuanDiTDifferentialImg2ImgPipeline(DiffusionPipeline): The HunyuanDiT model designed by Tencent Hunyuan. text_encoder_2 (`T5EncoderModel`): The mT5 embedder. Specifically, it is 't5-v1_1-xxl'. - tokenizer_2 (`MT5Tokenizer`): + tokenizer_2 (`T5Tokenizer`): The tokenizer for the mT5 embedder. scheduler ([`DDPMScheduler`]): A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents. @@ -295,7 +295,7 @@ class HunyuanDiTDifferentialImg2ImgPipeline(DiffusionPipeline): feature_extractor: CLIPImageProcessor, requires_safety_checker: bool = True, text_encoder_2=T5EncoderModel, - tokenizer_2=MT5Tokenizer, + tokenizer_2=T5Tokenizer, ): super().__init__() diff --git a/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py b/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py index 2b5684de95..29a7d61476 100644 --- a/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +++ b/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py @@ -17,7 +17,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np import torch -from transformers import BertModel, BertTokenizer, CLIPImageProcessor, MT5Tokenizer, T5EncoderModel +from transformers import BertModel, BertTokenizer, CLIPImageProcessor, T5EncoderModel, T5Tokenizer from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput @@ -185,7 +185,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline): The HunyuanDiT model designed by Tencent Hunyuan. text_encoder_2 (`T5EncoderModel`): The mT5 embedder. Specifically, it is 't5-v1_1-xxl'. - tokenizer_2 (`MT5Tokenizer`): + tokenizer_2 (`T5Tokenizer`): The tokenizer for the mT5 embedder. scheduler ([`DDPMScheduler`]): A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents. @@ -229,7 +229,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline): HunyuanDiT2DMultiControlNetModel, ], text_encoder_2: Optional[T5EncoderModel] = None, - tokenizer_2: Optional[MT5Tokenizer] = None, + tokenizer_2: Optional[T5Tokenizer] = None, requires_safety_checker: bool = True, ): super().__init__() diff --git a/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py b/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py index e2f935aaf4..052c7b4739 100644 --- a/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +++ b/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py @@ -17,7 +17,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np import torch -from transformers import BertModel, BertTokenizer, CLIPImageProcessor, MT5Tokenizer, T5EncoderModel +from transformers import BertModel, BertTokenizer, CLIPImageProcessor, T5EncoderModel, T5Tokenizer from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput @@ -169,7 +169,7 @@ class HunyuanDiTPipeline(DiffusionPipeline): The HunyuanDiT model designed by Tencent Hunyuan. text_encoder_2 (`T5EncoderModel`): The mT5 embedder. Specifically, it is 't5-v1_1-xxl'. - tokenizer_2 (`MT5Tokenizer`): + tokenizer_2 (`T5Tokenizer`): The tokenizer for the mT5 embedder. scheduler ([`DDPMScheduler`]): A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents. @@ -204,7 +204,7 @@ class HunyuanDiTPipeline(DiffusionPipeline): feature_extractor: CLIPImageProcessor, requires_safety_checker: bool = True, text_encoder_2: Optional[T5EncoderModel] = None, - tokenizer_2: Optional[MT5Tokenizer] = None, + tokenizer_2: Optional[T5Tokenizer] = None, ): super().__init__() diff --git a/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py b/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py index d156eac8f3..6704924b25 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py @@ -17,7 +17,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np import torch -from transformers import BertModel, BertTokenizer, CLIPImageProcessor, MT5Tokenizer, T5EncoderModel +from transformers import BertModel, BertTokenizer, CLIPImageProcessor, T5EncoderModel, T5Tokenizer from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput @@ -173,7 +173,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin): The HunyuanDiT model designed by Tencent Hunyuan. text_encoder_2 (`T5EncoderModel`): The mT5 embedder. Specifically, it is 't5-v1_1-xxl'. - tokenizer_2 (`MT5Tokenizer`): + tokenizer_2 (`T5Tokenizer`): The tokenizer for the mT5 embedder. scheduler ([`DDPMScheduler`]): A scheduler to be used in combination with HunyuanDiT to denoise the encoded image latents. @@ -208,7 +208,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin): feature_extractor: Optional[CLIPImageProcessor] = None, requires_safety_checker: bool = True, text_encoder_2: Optional[T5EncoderModel] = None, - tokenizer_2: Optional[MT5Tokenizer] = None, + tokenizer_2: Optional[T5Tokenizer] = None, pag_applied_layers: Union[str, List[str]] = "blocks.1", # "blocks.16.attn1", "blocks.16", "16", 16 ): super().__init__()