mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-29 07:22:12 +03:00
rename text2image pipeline
This commit is contained in:
@@ -10,9 +10,11 @@ from transformers import T5EncoderModel, T5TokenizerFast
|
||||
from diffusers import (
|
||||
AutoencoderKLCosmos,
|
||||
AutoencoderKLWan,
|
||||
CosmosTextToImagePipeline,
|
||||
Cosmos2TextToImagePipeline,
|
||||
Cosmos2VideoToWorldPipeline,
|
||||
CosmosTextToWorldPipeline,
|
||||
CosmosTransformer3DModel,
|
||||
CosmosVideoToWorldPipeline,
|
||||
EDMEulerScheduler,
|
||||
)
|
||||
|
||||
@@ -412,7 +414,8 @@ def save_pipeline_cosmos_1_0(args, transformer, vae):
|
||||
final_sigmas_type="sigma_min",
|
||||
)
|
||||
|
||||
pipe = CosmosTextToWorldPipeline(
|
||||
pipe_cls = CosmosTextToWorldPipeline if "Text2World" in args.transformer_type else CosmosVideoToWorldPipeline
|
||||
pipe = pipe_cls(
|
||||
text_encoder=text_encoder,
|
||||
tokenizer=tokenizer,
|
||||
transformer=transformer,
|
||||
@@ -438,7 +441,8 @@ def save_pipeline_cosmos_2_0(args, transformer, vae):
|
||||
use_flow_sigmas=True,
|
||||
)
|
||||
|
||||
pipe = CosmosTextToImagePipeline(
|
||||
pipe_cls = Cosmos2TextToImagePipeline if "Text2Image" in args.transformer_type else Cosmos2VideoToWorldPipeline
|
||||
pipe = pipe_cls(
|
||||
text_encoder=text_encoder,
|
||||
tokenizer=tokenizer,
|
||||
transformer=transformer,
|
||||
|
||||
@@ -361,8 +361,8 @@ else:
|
||||
"CogView4ControlPipeline",
|
||||
"CogView4Pipeline",
|
||||
"ConsisIDPipeline",
|
||||
"Cosmos2TextToImagePipeline",
|
||||
"Cosmos2VideoToWorldPipeline",
|
||||
"CosmosTextToImagePipeline",
|
||||
"CosmosTextToWorldPipeline",
|
||||
"CosmosVideoToWorldPipeline",
|
||||
"CycleDiffusionPipeline",
|
||||
@@ -951,8 +951,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
CogView4ControlPipeline,
|
||||
CogView4Pipeline,
|
||||
ConsisIDPipeline,
|
||||
Cosmos2TextToImagePipeline,
|
||||
Cosmos2VideoToWorldPipeline,
|
||||
CosmosTextToImagePipeline,
|
||||
CosmosTextToWorldPipeline,
|
||||
CosmosVideoToWorldPipeline,
|
||||
CycleDiffusionPipeline,
|
||||
|
||||
@@ -158,7 +158,7 @@ else:
|
||||
_import_structure["cogview4"] = ["CogView4Pipeline", "CogView4ControlPipeline"]
|
||||
_import_structure["consisid"] = ["ConsisIDPipeline"]
|
||||
_import_structure["cosmos"] = [
|
||||
"CosmosTextToImagePipeline",
|
||||
"Cosmos2TextToImagePipeline",
|
||||
"CosmosTextToWorldPipeline",
|
||||
"CosmosVideoToWorldPipeline",
|
||||
"Cosmos2VideoToWorldPipeline",
|
||||
@@ -565,8 +565,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
StableDiffusionXLControlNetXSPipeline,
|
||||
)
|
||||
from .cosmos import (
|
||||
Cosmos2TextToImagePipeline,
|
||||
Cosmos2VideoToWorldPipeline,
|
||||
CosmosTextToImagePipeline,
|
||||
CosmosTextToWorldPipeline,
|
||||
CosmosVideoToWorldPipeline,
|
||||
)
|
||||
|
||||
@@ -22,8 +22,8 @@ except OptionalDependencyNotAvailable:
|
||||
|
||||
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
||||
else:
|
||||
_import_structure["pipeline_cosmos2_text2image"] = ["Cosmos2TextToImagePipeline"]
|
||||
_import_structure["pipeline_cosmos2_video2world"] = ["Cosmos2VideoToWorldPipeline"]
|
||||
_import_structure["pipeline_cosmos_text2image"] = ["CosmosTextToImagePipeline"]
|
||||
_import_structure["pipeline_cosmos_text2world"] = ["CosmosTextToWorldPipeline"]
|
||||
_import_structure["pipeline_cosmos_video2world"] = ["CosmosVideoToWorldPipeline"]
|
||||
|
||||
@@ -35,8 +35,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
||||
except OptionalDependencyNotAvailable:
|
||||
from ...utils.dummy_torch_and_transformers_objects import *
|
||||
else:
|
||||
from .pipeline_cosmos2_text2image import Cosmos2TextToImagePipeline
|
||||
from .pipeline_cosmos2_video2world import Cosmos2VideoToWorldPipeline
|
||||
from .pipeline_cosmos_text2image import CosmosTextToImagePipeline
|
||||
from .pipeline_cosmos_text2world import CosmosTextToWorldPipeline
|
||||
from .pipeline_cosmos_video2world import CosmosVideoToWorldPipeline
|
||||
|
||||
|
||||
@@ -54,11 +54,11 @@ EXAMPLE_DOC_STRING = """
|
||||
Examples:
|
||||
```python
|
||||
>>> import torch
|
||||
>>> from diffusers import CosmosTextToImagePipeline
|
||||
>>> from diffusers import Cosmos2TextToImagePipeline
|
||||
|
||||
>>> # Available checkpoints: nvidia/Cosmos-Predict2-2B-Text2Image, nvidia/Cosmos-Predict2-14B-Text2Image
|
||||
>>> model_id = "nvidia/Cosmos-Predict2-2B-Text2Image"
|
||||
>>> pipe = CosmosTextToImagePipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
>>> pipe = Cosmos2TextToImagePipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
>>> pipe.to("cuda")
|
||||
|
||||
>>> prompt = "A close-up shot captures a vibrant yellow scrubber vigorously working on a grimy plate, its bristles moving in circular motions to lift stubborn grease and food residue. The dish, once covered in remnants of a hearty meal, gradually reveals its original glossy surface. Suds form and bubble around the scrubber, creating a satisfying visual of cleanliness in progress. The sound of scrubbing fills the air, accompanied by the gentle clinking of the dish against the sink. As the scrubber continues its task, the dish transforms, gleaming under the bright kitchen lights, symbolizing the triumph of cleanliness over mess."
|
||||
@@ -132,7 +132,7 @@ def retrieve_timesteps(
|
||||
return timesteps, num_inference_steps
|
||||
|
||||
|
||||
class CosmosTextToImagePipeline(DiffusionPipeline):
|
||||
class Cosmos2TextToImagePipeline(DiffusionPipeline):
|
||||
r"""
|
||||
Pipeline for text-to-image generation using [Cosmos](https://github.com/NVIDIA/Cosmos).
|
||||
|
||||
@@ -637,6 +637,10 @@ class CosmosTextToImagePipeline(DiffusionPipeline):
|
||||
else:
|
||||
video = self.video_processor.postprocess_video(video, output_type=output_type)
|
||||
image = [batch[0] for batch in video]
|
||||
if isinstance(video, torch.Tensor):
|
||||
image = torch.stack(image)
|
||||
elif isinstance(video, np.ndarray):
|
||||
image = np.stack(image)
|
||||
else:
|
||||
image = latents[:, :, 0]
|
||||
|
||||
@@ -407,7 +407,7 @@ class ConsisIDPipeline(metaclass=DummyObject):
|
||||
requires_backends(cls, ["torch", "transformers"])
|
||||
|
||||
|
||||
class CosmosTextToImagePipeline(metaclass=DummyObject):
|
||||
class Cosmos2TextToImagePipeline(metaclass=DummyObject):
|
||||
_backends = ["torch", "transformers"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
Reference in New Issue
Block a user