From 765eb50ff15a50a41b4cf930f32c462c520ff30d Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 15 Jan 2026 08:50:35 +0530 Subject: [PATCH] up --- .../transformers/transformer_glm_image.py | 44 +++++---- .../pipelines/glm_image/pipeline_glm_image.py | 89 ++++++++++--------- .../pipelines/glm_image/pipeline_output.py | 3 +- src/diffusers/pipelines/pipeline_utils.py | 12 +-- tests/fixtures/custom_pipeline/pipeline.py | 4 +- tests/fixtures/custom_pipeline/what_ever.py | 4 +- tests/models/test_modeling_common.py | 6 +- .../test_modular_pipelines_common.py | 6 +- tests/others/test_outputs.py | 3 +- tests/pipelines/cosmos/cosmos_guardrail.py | 3 +- tests/pipelines/test_pipelines_common.py | 4 +- tests/remote/test_remote_decode.py | 7 +- 12 files changed, 88 insertions(+), 97 deletions(-) diff --git a/src/diffusers/models/transformers/transformer_glm_image.py b/src/diffusers/models/transformers/transformer_glm_image.py index b7b3aa391c..04fdafb425 100644 --- a/src/diffusers/models/transformers/transformer_glm_image.py +++ b/src/diffusers/models/transformers/transformer_glm_image.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any import torch import torch.nn as nn @@ -104,7 +104,7 @@ class GlmImageAdaLayerNormZero(nn.Module): def forward( self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, temb: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor]: + ) -> tuple[torch.Tensor, torch.Tensor]: dtype = hidden_states.dtype norm_hidden_states = self.norm(hidden_states).to(dtype=dtype) norm_encoder_hidden_states = self.norm_context(encoder_hidden_states).to(dtype=dtype) @@ -148,7 +148,7 @@ class GlmImageLayerKVCache: def __init__(self): self.k_cache = None self.v_cache = None - self.mode: Optional[str] = None # "write", "read", "skip" + self.mode: str | None = None # "write", "read", "skip" def store(self, k: torch.Tensor, v: torch.Tensor): if self.k_cache is None: @@ -186,7 +186,7 @@ class GlmImageKVCache: def __getitem__(self, layer_idx: int) -> GlmImageLayerKVCache: return self.caches[layer_idx] - def set_mode(self, mode: Optional[str]): + def set_mode(self, mode: str): if mode is not None and mode not in ["write", "read", "skip"]: raise ValueError(f"Invalid mode: {mode}, must be one of 'write', 'read', 'skip'") for cache in self.caches: @@ -218,10 +218,10 @@ class GlmImageAttnProcessor: attn: Attention, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, - attention_mask: Optional[torch.Tensor] = None, - image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - kv_cache: Optional[GlmImageLayerKVCache] = None, - ) -> Tuple[torch.Tensor, torch.Tensor]: + attention_mask: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | None = None, + kv_cache: GlmImageLayerKVCache | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: dtype = encoder_hidden_states.dtype batch_size, text_seq_length, embed_dim = encoder_hidden_states.shape @@ -330,14 +330,12 @@ class GlmImageTransformerBlock(nn.Module): self, hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor, - temb: Optional[torch.Tensor] = None, - image_rotary_emb: Optional[ - Union[Tuple[torch.Tensor, torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]] - ] = None, - attention_mask: Optional[Dict[str, torch.Tensor]] = None, - attention_kwargs: Optional[Dict[str, Any]] = None, - kv_cache: Optional[GlmImageLayerKVCache] = None, - ) -> Tuple[torch.Tensor, torch.Tensor]: + temb: torch.Tensor | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]] | None = None, + attention_mask: dict[str, torch.Tensor] | None = None, + attention_kwargs: dict[str, Any] | None = None, + kv_cache: GlmImageLayerKVCache | None = None, + ) -> tuple[torch.Tensor, torch.Tensor]: # 1. Timestep conditioning ( norm_hidden_states, @@ -388,7 +386,7 @@ class GlmImageRotaryPosEmbed(nn.Module): self.patch_size = patch_size self.theta = theta - def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: batch_size, num_channels, height, width = hidden_states.shape height, width = height // self.patch_size, width // self.patch_size @@ -553,14 +551,12 @@ class GlmImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, Cach timestep: torch.LongTensor, target_size: torch.Tensor, crop_coords: torch.Tensor, - attention_kwargs: Optional[Dict[str, Any]] = None, + attention_kwargs: dict[str, Any] | None = None, return_dict: bool = True, - attention_mask: Optional[torch.Tensor] = None, - kv_caches: Optional[GlmImageKVCache] = None, - image_rotary_emb: Optional[ - Union[Tuple[torch.Tensor, torch.Tensor], List[Tuple[torch.Tensor, torch.Tensor]]] - ] = None, - ) -> Union[Tuple[torch.Tensor], Transformer2DModelOutput]: + attention_mask: torch.Tensor | None = None, + kv_caches: GlmImageKVCache | None = None, + image_rotary_emb: tuple[torch.Tensor, torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]] | None = None, + ) -> tuple[torch.Tensor] | Transformer2DModelOutput: batch_size, num_channels, height, width = hidden_states.shape # 1. RoPE diff --git a/src/diffusers/pipelines/glm_image/pipeline_glm_image.py b/src/diffusers/pipelines/glm_image/pipeline_glm_image.py index 5499b8769f..ce200a0554 100644 --- a/src/diffusers/pipelines/glm_image/pipeline_glm_image.py +++ b/src/diffusers/pipelines/glm_image/pipeline_glm_image.py @@ -15,7 +15,7 @@ import inspect import re -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable import numpy as np import PIL @@ -79,10 +79,10 @@ def calculate_shift( # Copied from diffusers.pipelines.cogview4.pipeline_cogview4.retrieve_timesteps def retrieve_timesteps( scheduler, - num_inference_steps: Optional[int] = None, - device: Optional[Union[str, torch.device]] = None, - timesteps: Optional[List[int]] = None, - sigmas: Optional[List[float]] = None, + num_inference_steps: int | None = None, + device: str | torch.device | None = None, + timesteps: list[int] | None = None, + sigmas: list[float] | None = None, **kwargs, ): r""" @@ -97,10 +97,10 @@ def retrieve_timesteps( must be `None`. device (`str` or `torch.device`, *optional*): The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. - timesteps (`List[int]`, *optional*): + timesteps (`list[int]`, *optional*): Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed, `num_inference_steps` and `sigmas` must be `None`. - sigmas (`List[float]`, *optional*): + sigmas (`list[float]`, *optional*): Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed, `num_inference_steps` and `timesteps` must be `None`. @@ -146,7 +146,7 @@ def retrieve_timesteps( # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents def retrieve_latents( - encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample" + encoder_output: torch.Tensor, generator: torch.Generator | None = None, sample_mode: str = "sample" ): if hasattr(encoder_output, "latent_dist") and sample_mode == "sample": return encoder_output.latent_dist.sample(generator) @@ -265,8 +265,8 @@ class GlmImagePipeline(DiffusionPipeline): prompt: str, height: int, width: int, - image: Optional[List[PIL.Image.Image]] = None, - device: Optional[torch.device] = None, + image: list[PIL.Image.Image] | None = None, + device: torch.device | None = None, ): device = device or self._execution_device is_text_to_image = image is None or len(image) == 0 @@ -327,10 +327,10 @@ class GlmImagePipeline(DiffusionPipeline): def _get_glyph_embeds( self, - prompt: Union[str, List[str]] = None, + prompt: str | list[str] = None, max_sequence_length: int = 2048, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, + device: torch.device | None = None, + dtype: torch.dtype | None = None, ): device = device or self._execution_device dtype = dtype or self.text_encoder.dtype @@ -359,20 +359,20 @@ class GlmImagePipeline(DiffusionPipeline): def encode_prompt( self, - prompt: Union[str, List[str]], + prompt: str | list[str], do_classifier_free_guidance: bool = True, num_images_per_prompt: int = 1, - prompt_embeds: Optional[torch.Tensor] = None, - negative_prompt_embeds: Optional[torch.Tensor] = None, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, + prompt_embeds: torch.Tensor | None = None, + negative_prompt_embeds: torch.Tensor | None = None, + device: torch.device | None = None, + dtype: torch.dtype | None = None, max_sequence_length: int = 2048, ): r""" Encodes the prompt into text encoder hidden states. Args: - prompt (`str` or `List[str]`, *optional*): + prompt (`str` or `list[str]`, *optional*): prompt to be encoded do_classifier_free_guidance (`bool`, *optional*, defaults to `True`): Whether to use classifier free guidance or not. @@ -527,40 +527,43 @@ class GlmImagePipeline(DiffusionPipeline): @replace_example_docstring(EXAMPLE_DOC_STRING) def __call__( self, - prompt: Optional[Union[str, List[str]]] = None, - image: Optional[ - Union[ - torch.Tensor, PIL.Image.Image, np.ndarray, List[torch.Tensor], List[PIL.Image.Image], List[np.ndarray] - ] - ] = None, - height: Optional[int] = None, - width: Optional[int] = None, + prompt: str | list[str] | None = None, + image: torch.Tensor + | PIL.Image.Image + | np.ndarray + | list[torch.Tensor] + | list[PIL.Image.Image] + | list[np.ndarray] + | None = None, + height: int | None = None, + width: int | None = None, num_inference_steps: int = 50, - timesteps: Optional[List[int]] = None, - sigmas: Optional[List[float]] = None, + timesteps: list[int] | None = None, + sigmas: list[float] | None = None, guidance_scale: float = 1.5, num_images_per_prompt: int = 1, - generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, - latents: Optional[torch.FloatTensor] = None, - prompt_embeds: Optional[torch.Tensor] = None, - negative_prompt_embeds: Optional[torch.Tensor] = None, - prior_token_ids: Optional[torch.FloatTensor] = None, - prior_image_token_ids: Optional[torch.Tensor] = None, - crops_coords_top_left: Tuple[int, int] = (0, 0), + generator: torch.Generator | list[torch.Generator] | None = None, + latents: torch.FloatTensor | None = None, + prompt_embeds: torch.Tensor | None = None, + negative_prompt_embeds: torch.Tensor | None = None, + prior_token_ids: torch.FloatTensor | None = None, + prior_image_token_ids: torch.Tensor | None = None, + crops_coords_top_left: tuple[int, int] = (0, 0), output_type: str = "pil", return_dict: bool = True, - attention_kwargs: Optional[Dict[str, Any]] = None, - callback_on_step_end: Optional[ - Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks] - ] = None, - callback_on_step_end_tensor_inputs: List[str] = ["latents"], + attention_kwargs: dict[str, Any] | None = None, + callback_on_step_end: Callable[[int, int, dict], None] + | PipelineCallback + | MultiPipelineCallbacks + | None = None, + callback_on_step_end_tensor_inputs: list[str] = ["latents"], max_sequence_length: int = 2048, - ) -> Union[GlmImagePipelineOutput, Tuple]: + ) -> GlmImagePipelineOutput | tuple: """ Function invoked when calling the pipeline for generation. Args: - prompt (`str` or `List[str]`, *optional*): + prompt (`str` or `list[str]`, *optional*): The prompt or prompts to guide the image generation. Must contain shape info in the format 'H W' where H and W are token dimensions (d32). Example: "A beautiful sunset36 24" generates a 1152x768 image. diff --git a/src/diffusers/pipelines/glm_image/pipeline_output.py b/src/diffusers/pipelines/glm_image/pipeline_output.py index aec5a5454e..d4fd061335 100644 --- a/src/diffusers/pipelines/glm_image/pipeline_output.py +++ b/src/diffusers/pipelines/glm_image/pipeline_output.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import List, Union import numpy as np import PIL.Image @@ -18,4 +17,4 @@ class GlmImagePipelineOutput(BaseOutput): num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. """ - images: Union[List[PIL.Image.Image], np.ndarray] + images: list[PIL.Image.Image] | np.ndarray diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py index 0f83d48adf..66cb4b3ce0 100644 --- a/src/diffusers/pipelines/pipeline_utils.py +++ b/src/diffusers/pipelines/pipeline_utils.py @@ -128,7 +128,7 @@ class ImagePipelineOutput(BaseOutput): num_channels)`. """ - images: Union[List[PIL.Image.Image], np.ndarray] + images: list[PIL.Image.Image] | np.ndarray @dataclass @@ -1171,7 +1171,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin): accelerate.hooks.remove_hook_from_module(model, recurse=True) self._all_hooks = [] - def enable_model_cpu_offload(self, gpu_id: int | None = None, device: Union[torch.device, str] = None): + def enable_model_cpu_offload(self, gpu_id: int | None = None, device: torch.device | str = None): r""" Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the accelerator when its @@ -1289,7 +1289,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin): # make sure the model is in the same state as before calling it self.enable_model_cpu_offload(device=getattr(self, "_offload_device", "cuda")) - def enable_sequential_cpu_offload(self, gpu_id: int | None = None, device: Union[torch.device, str] = None): + def enable_sequential_cpu_offload(self, gpu_id: int | None = None, device: torch.device | str = None): r""" Offloads all models to CPU using 🤗 Accelerate, significantly reducing memory usage. When called, the state dicts of all `torch.nn.Module` components (except those in `self._exclude_from_cpu_offload`) are saved to CPU @@ -1498,7 +1498,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin): @classmethod @validate_hf_hub_args - def download(cls, pretrained_model_name, **kwargs) -> Union[str, os.PathLike]: + def download(cls, pretrained_model_name, **kwargs) -> str | os.PathLike: r""" Download and cache a PyTorch diffusion pipeline from pretrained pipeline weights. @@ -1880,7 +1880,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin): return signature_types @property - def parameters(self) -> Dict[str, Any]: + def parameters(self) -> dict[str, Any]: r""" The `self.parameters` property can be useful to run different pipelines with the same weights and configurations without reallocating additional memory. @@ -1910,7 +1910,7 @@ class DiffusionPipeline(ConfigMixin, PushToHubMixin): return pipeline_parameters @property - def components(self) -> Dict[str, Any]: + def components(self) -> dict[str, Any]: r""" The `self.components` property can be useful to run different pipelines with the same weights and configurations without reallocating additional memory. diff --git a/tests/fixtures/custom_pipeline/pipeline.py b/tests/fixtures/custom_pipeline/pipeline.py index dbac72b385..2d569d0f8a 100644 --- a/tests/fixtures/custom_pipeline/pipeline.py +++ b/tests/fixtures/custom_pipeline/pipeline.py @@ -15,8 +15,6 @@ # limitations under the License. -from typing import Tuple, Union - import torch from diffusers import DiffusionPipeline, ImagePipelineOutput, SchedulerMixin, UNet2DModel @@ -47,7 +45,7 @@ class CustomLocalPipeline(DiffusionPipeline): output_type: str | None = "pil", return_dict: bool = True, **kwargs, - ) -> Union[ImagePipelineOutput, Tuple]: + ) -> ImagePipelineOutput | tuple: r""" Args: batch_size (`int`, *optional*, defaults to 1): diff --git a/tests/fixtures/custom_pipeline/what_ever.py b/tests/fixtures/custom_pipeline/what_ever.py index 8856d80b37..afb140f64d 100644 --- a/tests/fixtures/custom_pipeline/what_ever.py +++ b/tests/fixtures/custom_pipeline/what_ever.py @@ -15,8 +15,6 @@ # limitations under the License. -from typing import Tuple, Union - import torch from diffusers import SchedulerMixin, UNet2DModel @@ -48,7 +46,7 @@ class CustomLocalPipeline(DiffusionPipeline): output_type: str | None = "pil", return_dict: bool = True, **kwargs, - ) -> Union[ImagePipelineOutput, Tuple]: + ) -> ImagePipelineOutput | tuple: r""" Args: batch_size (`int`, *optional*, defaults to 1): diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py index b9dfe93233..1b1a51d1e2 100644 --- a/tests/models/test_modeling_common.py +++ b/tests/models/test_modeling_common.py @@ -26,7 +26,7 @@ import unittest import unittest.mock as mock import uuid from collections import defaultdict -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Tuple import numpy as np import pytest @@ -168,8 +168,8 @@ def named_persistent_module_tensors( def compute_module_persistent_sizes( model: nn.Module, - dtype: Optional[Union[str, torch.device]] = None, - special_dtypes: Optional[Dict[str, Union[str, torch.device]]] = None, + dtype: str | torch.device | None = None, + special_dtypes: dict[str, str | torch.device] | None = None, ): """ Compute the size of each submodule of a given model (parameters + persistent buffers). diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index 661fcc2537..649bac591e 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -1,6 +1,6 @@ import gc import tempfile -from typing import Callable, Union +from typing import Callable import pytest import torch @@ -36,7 +36,7 @@ class ModularPipelineTesterMixin: return generator @property - def pipeline_class(self) -> Union[Callable, ModularPipeline]: + def pipeline_class(self) -> Callable | ModularPipeline: raise NotImplementedError( "You need to set the attribute `pipeline_class = ClassNameOfPipeline` in the child test class. " "See existing pipeline tests for reference." @@ -49,7 +49,7 @@ class ModularPipelineTesterMixin: ) @property - def pipeline_blocks_class(self) -> Union[Callable, ModularPipelineBlocks]: + def pipeline_blocks_class(self) -> Callable | ModularPipelineBlocks: raise NotImplementedError( "You need to set the attribute `pipeline_blocks_class = ClassNameOfPipelineBlocks` in the child test class. " "See existing pipeline tests for reference." diff --git a/tests/others/test_outputs.py b/tests/others/test_outputs.py index c8069e6916..90b8bfe946 100644 --- a/tests/others/test_outputs.py +++ b/tests/others/test_outputs.py @@ -1,7 +1,6 @@ import pickle as pkl import unittest from dataclasses import dataclass -from typing import List, Union import numpy as np import PIL.Image @@ -13,7 +12,7 @@ from ..testing_utils import require_torch @dataclass class CustomOutput(BaseOutput): - images: Union[List[PIL.Image.Image], np.ndarray] + images: list[PIL.Image.Image] | np.ndarray class ConfigTester(unittest.TestCase): diff --git a/tests/pipelines/cosmos/cosmos_guardrail.py b/tests/pipelines/cosmos/cosmos_guardrail.py index c9ef597fdb..e1d667608c 100644 --- a/tests/pipelines/cosmos/cosmos_guardrail.py +++ b/tests/pipelines/cosmos/cosmos_guardrail.py @@ -14,7 +14,6 @@ # ===== This file is an implementation of a dummy guardrail for the fast tests ===== -from typing import Union import numpy as np import torch @@ -35,7 +34,7 @@ class DummyCosmosSafetyChecker(ModelMixin, ConfigMixin): def check_video_safety(self, frames: np.ndarray) -> np.ndarray: return frames - def to(self, device: Union[str, torch.device] = None, dtype: torch.dtype = None): + def to(self, device: str | torch.device = None, dtype: torch.dtype = None): module = super().to(device=device, dtype=dtype) return module diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 7db5f4da89..6bf4550887 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -5,7 +5,7 @@ import os import tempfile import unittest import uuid -from typing import Any, Callable, Dict, Union +from typing import Any, Callable, Dict import numpy as np import PIL.Image @@ -1071,7 +1071,7 @@ class PipelineTesterMixin: return generator @property - def pipeline_class(self) -> Union[Callable, DiffusionPipeline]: + def pipeline_class(self) -> Callable | DiffusionPipeline: raise NotImplementedError( "You need to set the attribute `pipeline_class = ClassNameOfPipeline` in the child test class. " "See existing pipeline tests for reference." diff --git a/tests/remote/test_remote_decode.py b/tests/remote/test_remote_decode.py index 27170cba08..e48ddcb174 100644 --- a/tests/remote/test_remote_decode.py +++ b/tests/remote/test_remote_decode.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -from typing import Tuple, Union import numpy as np import PIL.Image @@ -44,13 +43,13 @@ enable_full_determinism() class RemoteAutoencoderKLMixin: - shape: Tuple[int, ...] = None - out_hw: Tuple[int, int] = None + shape: tuple[int, ...] = None + out_hw: tuple[int, int] = None endpoint: str = None dtype: torch.dtype = None scaling_factor: float = None shift_factor: float = None - processor_cls: Union[VaeImageProcessor, VideoProcessor] = None + processor_cls: VaeImageProcessor | VideoProcessor = None output_pil_slice: torch.Tensor = None output_pt_slice: torch.Tensor = None partial_postprocess_return_pt_slice: torch.Tensor = None