1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00
This commit is contained in:
sayakpaul
2026-01-12 13:57:55 +05:30
parent f9f6758533
commit db627652b1
23 changed files with 42 additions and 44 deletions

View File

@@ -144,7 +144,7 @@
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
implied, including, without limitation, Any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any

View File

@@ -121,7 +121,7 @@ from showone_unet_3d_condition import ShowOneUNet3DConditionModel
unet = ShowOneUNet3DConditionModel.from_pretrained(pipeline_id, subfolder="unet")
```
3. Load the custom pipeline code (already implemented in [pipeline_t2v_base_pixel.py](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/pipeline_t2v_base_pixel.py)). This script contains a custom `TextToVideoIFPipeline` class for generating videos from text. Like the custom UNet, any code required for `TextToVideIFPipeline` should be placed in `pipeline_t2v_base_pixel.py`.
3. Load the custom pipeline code (already implemented in [pipeline_t2v_base_pixel.py](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/pipeline_t2v_base_pixel.py)). This script contains a custom `TextToVideoIFPipeline` class for generating videos from text. Like the custom UNet, Any code required for `TextToVideIFPipeline` should be placed in `pipeline_t2v_base_pixel.py`.
Initialize `TextToVideoIFPipeline` with `ShowOneUNet3DConditionModel`.

View File

@@ -103,7 +103,7 @@ model = UNet2DConditionModel.from_single_file(ckpt_path, upcast_attention=True)
### Local files
The [`~loaders.FromSingleFileMixin.from_single_file`] method attempts to configure a pipeline or model by inferring the model type from the keys in the checkpoint file. For example, any single file checkpoint based on the Stable Diffusion XL base model is configured from [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0).
The [`~loaders.FromSingleFileMixin.from_single_file`] method attempts to configure a pipeline or model by inferring the model type from the keys in the checkpoint file. For example, Any single file checkpoint based on the Stable Diffusion XL base model is configured from [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0).
If you're working with local files, download the config files with the [`~huggingface_hub.snapshot_download`] method and the model checkpoint with [`~huggingface_hub.hf_hub_download`]. These files are downloaded to your [cache directory](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache), but you can download them to a specific directory with the `local_dir` argument.

View File

@@ -945,7 +945,7 @@ class TokenEmbeddingsHandler:
new_token_embeddings = embeds.weight.data[train_ids]
# New tokens for each text encoder are saved under "clip_l" (for text_encoder 0),
# Note: When loading with diffusers, any name can work - simply specify in inference
# Note: When loading with diffusers, Any name can work - simply specify in inference
tensors[idx_to_text_encoder_name[idx]] = new_token_embeddings
# tensors[f"text_encoders_{idx}"] = new_token_embeddings

View File

@@ -801,7 +801,7 @@ class TokenEmbeddingsHandler:
# New tokens for each text encoder are saved under "clip_l" (for text_encoder 0), "clip_g" (for
# text_encoder 1) to keep compatible with the ecosystem.
# Note: When loading with diffusers, any name can work - simply specify in inference
# Note: When loading with diffusers, Any name can work - simply specify in inference
tensors[idx_to_text_encoder_name[idx]] = new_token_embeddings
# tensors[f"text_encoders_{idx}"] = new_token_embeddings

View File

@@ -966,7 +966,7 @@ class TokenEmbeddingsHandler:
# New tokens for each text encoder are saved under "clip_l" (for text_encoder 0), "clip_g" (for
# text_encoder 1) to keep compatible with the ecosystem.
# Note: When loading with diffusers, any name can work - simply specify in inference
# Note: When loading with diffusers, Any name can work - simply specify in inference
tensors[idx_to_text_encoder_name[idx]] = new_token_embeddings
# tensors[f"text_encoders_{idx}"] = new_token_embeddings

View File

@@ -231,7 +231,7 @@ images = generate(prompt, neg_prompt)
print(f"First inference in {time.time() - start}")
```
From this point forward, any calls to generate should result in a faster inference
From this point forward, Any calls to generate should result in a faster inference
time and it won't change.
```python

View File

@@ -131,7 +131,7 @@ neg_prompt = "cartoon, illustration, animation. face. male, female"
images = generate(prompt, neg_prompt)
print(f"First inference in {time.time() - start}")
# 9. From this point forward, any calls to generate should result in a faster inference
# 9. From this point forward, Any calls to generate should result in a faster inference
# time and it won't change.
start = time.time()
prompt = "photo of a rhino dressed suit and tie sitting at a table in a bar with a bar stools, award winning photography, Elke vogelsang"

View File

@@ -1616,7 +1616,7 @@ class ModularPipeline(ConfigMixin, PushToHubMixin):
params[input_param.name] = input_param.default
return params
def get_default_blocks_name(self, config_dict: Optional[dict[str, any]]) -> Optional[str]:
def get_default_blocks_name(self, config_dict: Optional[dict[str, Any]]) -> Optional[str]:
return self.default_blocks_name
@classmethod

View File

@@ -13,6 +13,7 @@
# limitations under the License.
from dataclasses import dataclass
from typing import Any
import torch
import torch.nn as nn
@@ -27,10 +28,7 @@ from ...models.attention_processor import (
AttnAddedKVProcessor,
AttnProcessor,
)
from ...models.embeddings import (
TimestepEmbedding,
Timesteps,
)
from ...models.embeddings import TimestepEmbedding, Timesteps
from ...models.modeling_utils import ModelMixin
from ...models.resnet import Downsample2D, ResnetBlock2D, Upsample2D
from ...models.transformers.transformer_2d import Transformer2DModel
@@ -619,7 +617,7 @@ class AudioLDM2UNet2DConditionModel(ModelMixin, AttentionMixin, ConfigMixin, UNe
class_labels: torch.Tensor | None = None,
timestep_cond: torch.Tensor | None = None,
attention_mask: torch.Tensor | None = None,
cross_attention_kwargs: dict[str, any] | None = None,
cross_attention_kwargs: dict[str, Any] | None = None,
encoder_attention_mask: torch.Tensor | None = None,
return_dict: bool = True,
encoder_hidden_states_1: torch.Tensor | None = None,
@@ -1029,7 +1027,7 @@ class CrossAttnDownBlock2D(nn.Module):
temb: torch.Tensor | None = None,
encoder_hidden_states: torch.Tensor | None = None,
attention_mask: torch.Tensor | None = None,
cross_attention_kwargs: dict[str, any] | None = None,
cross_attention_kwargs: dict[str, Any] | None = None,
encoder_attention_mask: torch.Tensor | None = None,
encoder_hidden_states_1: torch.Tensor | None = None,
encoder_attention_mask_1: torch.Tensor | None = None,
@@ -1191,7 +1189,7 @@ class UNetMidBlock2DCrossAttn(nn.Module):
temb: torch.Tensor | None = None,
encoder_hidden_states: torch.Tensor | None = None,
attention_mask: torch.Tensor | None = None,
cross_attention_kwargs: dict[str, any] | None = None,
cross_attention_kwargs: dict[str, Any] | None = None,
encoder_attention_mask: torch.Tensor | None = None,
encoder_hidden_states_1: torch.Tensor | None = None,
encoder_attention_mask_1: torch.Tensor | None = None,
@@ -1341,7 +1339,7 @@ class CrossAttnUpBlock2D(nn.Module):
res_hidden_states_tuple: tuple[torch.Tensor, ...],
temb: torch.Tensor | None = None,
encoder_hidden_states: torch.Tensor | None = None,
cross_attention_kwargs: dict[str, any] | None = None,
cross_attention_kwargs: dict[str, Any] | None = None,
upsample_size: int | None = None,
attention_mask: torch.Tensor | None = None,
encoder_attention_mask: torch.Tensor | None = None,

View File

@@ -1042,7 +1042,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
class_labels: torch.Tensor | None = None,
timestep_cond: torch.Tensor | None = None,
attention_mask: torch.Tensor | None = None,
cross_attention_kwargs: dict[str, any] | None = None,
cross_attention_kwargs: dict[str, Any] | None = None,
added_cond_kwargs: dict[str, torch.Tensor] | None = None,
down_block_additional_residuals: tuple[torch.Tensor] | None = None,
mid_block_additional_residual: torch.Tensor | None = None,
@@ -1698,7 +1698,7 @@ class CrossAttnDownBlockFlat(nn.Module):
temb: torch.Tensor | None = None,
encoder_hidden_states: torch.Tensor | None = None,
attention_mask: torch.Tensor | None = None,
cross_attention_kwargs: dict[str, any] | None = None,
cross_attention_kwargs: dict[str, Any] | None = None,
encoder_attention_mask: torch.Tensor | None = None,
additional_residuals: torch.Tensor | None = None,
) -> tuple[torch.Tensor, tuple[torch.Tensor, ...]]:

View File

@@ -436,7 +436,7 @@ def wrap_with_fsdp(
offload: bool = True,
use_orig_params: bool = True,
limit_all_gathers: bool = True,
fsdp_kwargs: dict[str, any] | None = None,
fsdp_kwargs: dict[str, Any] | None = None,
transformer_layer_cls: set[type[torch.nn.Module]] | None = None,
) -> FSDP:
"""
@@ -496,8 +496,8 @@ class EMAModel:
inv_gamma: float | int = 1.0,
power: float | int = 2 / 3,
foreach: bool = False,
model_cls: any | None = None,
model_config: dict[str, any] | None = None,
model_cls: Any | None = None,
model_config: dict[str, Any] | None = None,
**kwargs,
):
"""

View File

@@ -371,13 +371,13 @@ class HunyuanVideoImageToVideoPipelineFastTests(
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -333,13 +333,13 @@ class HunyuanSkyreelsImageToVideoPipelineFastTests(
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -346,13 +346,13 @@ class HunyuanVideoPipelineFastTests(
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -392,13 +392,13 @@ class HunyuanVideoFramepackPipelineFastTests(
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -325,13 +325,13 @@ class SanaPAGPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -290,13 +290,13 @@ class SanaPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -309,13 +309,13 @@ class SanaControlNetPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -283,13 +283,13 @@ class SanaSprintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -295,13 +295,13 @@ class SanaSprintImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -185,13 +185,13 @@ class SanaVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass

View File

@@ -196,13 +196,13 @@ class SanaImageToVideoPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
# TODO(aryan): Create a dummy gemma model with smol vocab size
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_consistent(self):
pass
@unittest.skip(
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
"A very small vocab size is used for fast tests. So, Any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."
)
def test_inference_batch_single_identical(self):
pass