mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
chore: remove redundant words (#10609)
Signed-off-by: sunxunle <sunxunle@ampere.tech>
This commit is contained in:
@@ -115,7 +115,7 @@ export_to_video(frames, "mochi.mp4", fps=30)
|
||||
|
||||
## Reproducing the results from the Genmo Mochi repo
|
||||
|
||||
The [Genmo Mochi implementation](https://github.com/genmoai/mochi/tree/main) uses different precision values for each stage in the inference process. The text encoder and VAE use `torch.float32`, while the DiT uses `torch.bfloat16` with the [attention kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html#torch.nn.attention.sdpa_kernel) set to `EFFICIENT_ATTENTION`. Diffusers pipelines currently do not support setting different `dtypes` for different stages of the pipeline. In order to run inference in the same way as the the original implementation, please refer to the following example.
|
||||
The [Genmo Mochi implementation](https://github.com/genmoai/mochi/tree/main) uses different precision values for each stage in the inference process. The text encoder and VAE use `torch.float32`, while the DiT uses `torch.bfloat16` with the [attention kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html#torch.nn.attention.sdpa_kernel) set to `EFFICIENT_ATTENTION`. Diffusers pipelines currently do not support setting different `dtypes` for different stages of the pipeline. In order to run inference in the same way as the original implementation, please refer to the following example.
|
||||
|
||||
<Tip>
|
||||
The original Mochi implementation zeros out empty prompts. However, enabling this option and placing the entire pipeline under autocast can lead to numerical overflows with the T5 text encoder.
|
||||
|
||||
@@ -73,7 +73,7 @@ def _download(url: str, root: str):
|
||||
loop.update(len(buffer))
|
||||
|
||||
if insecure_hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256:
|
||||
raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match")
|
||||
raise RuntimeError("Model has been downloaded but the SHA256 checksum does not match")
|
||||
|
||||
return download_target
|
||||
|
||||
|
||||
@@ -258,7 +258,7 @@ def get_polynomial_decay_schedule_with_warmup(
|
||||
|
||||
lr_init = optimizer.defaults["lr"]
|
||||
if not (lr_init > lr_end):
|
||||
raise ValueError(f"lr_end ({lr_end}) must be be smaller than initial lr ({lr_init})")
|
||||
raise ValueError(f"lr_end ({lr_end}) must be smaller than initial lr ({lr_init})")
|
||||
|
||||
def lr_lambda(current_step: int):
|
||||
if current_step < num_warmup_steps:
|
||||
|
||||
@@ -158,7 +158,7 @@ class PAGMixin:
|
||||
),
|
||||
):
|
||||
r"""
|
||||
Set the the self-attention layers to apply PAG. Raise ValueError if the input is invalid.
|
||||
Set the self-attention layers to apply PAG. Raise ValueError if the input is invalid.
|
||||
|
||||
Args:
|
||||
pag_applied_layers (`str` or `List[str]`):
|
||||
|
||||
@@ -67,7 +67,7 @@ class VideoProcessor(VaeImageProcessor):
|
||||
|
||||
# ensure the input is a list of videos:
|
||||
# - if it is a batch of videos (5d torch.Tensor or np.ndarray), it is converted to a list of videos (a list of 4d torch.Tensor or np.ndarray)
|
||||
# - if it is is a single video, it is convereted to a list of one video.
|
||||
# - if it is a single video, it is convereted to a list of one video.
|
||||
if isinstance(video, (np.ndarray, torch.Tensor)) and video.ndim == 5:
|
||||
video = list(video)
|
||||
elif isinstance(video, list) and is_valid_image(video[0]) or is_valid_image_imagelist(video):
|
||||
|
||||
Reference in New Issue
Block a user