diff --git a/src/diffusers/pipelines/hunyuan_video1_5/pipeline_hunyuan_video1_5_image2video.py b/src/diffusers/pipelines/hunyuan_video1_5/pipeline_hunyuan_video1_5_image2video.py index 1af717bc59..9e9f20c79e 100644 --- a/src/diffusers/pipelines/hunyuan_video1_5/pipeline_hunyuan_video1_5_image2video.py +++ b/src/diffusers/pipelines/hunyuan_video1_5/pipeline_hunyuan_video1_5_image2video.py @@ -92,7 +92,7 @@ def format_text_input(prompt: List[str], system_message: str) -> List[Dict[str, return template -# Copied from diffusers.pipelines.hunyuan_image.pipeline_hunyuanimage.extract_glyph_text +# Copied from diffusers.pipelines.hunyuan_video1_5.pipeline_hunyuan_video1_5.extract_glyph_texts def extract_glyph_texts(prompt: str) -> List[str]: """ Extract glyph texts from prompt using regex pattern. @@ -281,7 +281,7 @@ class HunyuanVideo15ImageToVideoPipeline(DiffusionPipeline): text_encoder: Qwen2_5_VLTextModel, tokenizer: Qwen2Tokenizer, prompt: Union[str, List[str]], - device: Optional[torch.device] = None, + device: torch.device, tokenizer_max_length: int = 1000, num_hidden_layers_to_skip: int = 2, # fmt: off @@ -494,10 +494,10 @@ class HunyuanVideo15ImageToVideoPipeline(DiffusionPipeline): prompt_embeds_mask_2 = prompt_embeds_mask_2.repeat(1, num_videos_per_prompt, 1) prompt_embeds_mask_2 = prompt_embeds_mask_2.view(batch_size * num_videos_per_prompt, seq_len_2) - prompt_embeds = prompt_embeds.to(device=device, dtype=dtype) - prompt_embeds_mask = prompt_embeds_mask.to(device=device, dtype=dtype) - prompt_embeds_2 = prompt_embeds_2.to(device=device, dtype=dtype) - prompt_embeds_mask_2 = prompt_embeds_mask_2.to(device=device, dtype=dtype) + prompt_embeds = prompt_embeds.to(dtype=dtype, device=device) + prompt_embeds_mask = prompt_embeds_mask.to(dtype=dtype, device=device) + prompt_embeds_2 = prompt_embeds_2.to(dtype=dtype, device=device) + prompt_embeds_mask_2 = prompt_embeds_mask_2.to(dtype=dtype, device=device) return prompt_embeds, prompt_embeds_mask, prompt_embeds_2, prompt_embeds_mask_2 diff --git a/src/diffusers/utils/dummy_pt_objects.py b/src/diffusers/utils/dummy_pt_objects.py index 73854b3819..fe9a4b30f0 100644 --- a/src/diffusers/utils/dummy_pt_objects.py +++ b/src/diffusers/utils/dummy_pt_objects.py @@ -468,6 +468,21 @@ class AutoencoderKLHunyuanVideo(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class AutoencoderKLHunyuanVideo15(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class AutoencoderKLLTXVideo(metaclass=DummyObject): _backends = ["torch"] @@ -993,6 +1008,21 @@ class HunyuanImageTransformer2DModel(metaclass=DummyObject): requires_backends(cls, ["torch"]) +class HunyuanVideo15Transformer3DModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch"]) + + class HunyuanVideoFramepackTransformer3DModel(metaclass=DummyObject): _backends = ["torch"] diff --git a/src/diffusers/utils/dummy_torch_and_transformers_objects.py b/src/diffusers/utils/dummy_torch_and_transformers_objects.py index e6cf26a125..65306b8390 100644 --- a/src/diffusers/utils/dummy_torch_and_transformers_objects.py +++ b/src/diffusers/utils/dummy_torch_and_transformers_objects.py @@ -1142,6 +1142,36 @@ class HunyuanSkyreelsImageToVideoPipeline(metaclass=DummyObject): requires_backends(cls, ["torch", "transformers"]) +class HunyuanVideo15ImageToVideoPipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + +class HunyuanVideo15Pipeline(metaclass=DummyObject): + _backends = ["torch", "transformers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch", "transformers"]) + + @classmethod + def from_config(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["torch", "transformers"]) + + class HunyuanVideoFramepackPipeline(metaclass=DummyObject): _backends = ["torch", "transformers"]