1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-29 07:22:12 +03:00

rename text2image pipeline

This commit is contained in:
Aryan
2025-06-10 23:31:09 +02:00
parent 9059a52b51
commit 829545df04
6 changed files with 21 additions and 13 deletions

View File

@@ -10,9 +10,11 @@ from transformers import T5EncoderModel, T5TokenizerFast
from diffusers import (
AutoencoderKLCosmos,
AutoencoderKLWan,
CosmosTextToImagePipeline,
Cosmos2TextToImagePipeline,
Cosmos2VideoToWorldPipeline,
CosmosTextToWorldPipeline,
CosmosTransformer3DModel,
CosmosVideoToWorldPipeline,
EDMEulerScheduler,
)
@@ -412,7 +414,8 @@ def save_pipeline_cosmos_1_0(args, transformer, vae):
final_sigmas_type="sigma_min",
)
pipe = CosmosTextToWorldPipeline(
pipe_cls = CosmosTextToWorldPipeline if "Text2World" in args.transformer_type else CosmosVideoToWorldPipeline
pipe = pipe_cls(
text_encoder=text_encoder,
tokenizer=tokenizer,
transformer=transformer,
@@ -438,7 +441,8 @@ def save_pipeline_cosmos_2_0(args, transformer, vae):
use_flow_sigmas=True,
)
pipe = CosmosTextToImagePipeline(
pipe_cls = Cosmos2TextToImagePipeline if "Text2Image" in args.transformer_type else Cosmos2VideoToWorldPipeline
pipe = pipe_cls(
text_encoder=text_encoder,
tokenizer=tokenizer,
transformer=transformer,

View File

@@ -361,8 +361,8 @@ else:
"CogView4ControlPipeline",
"CogView4Pipeline",
"ConsisIDPipeline",
"Cosmos2TextToImagePipeline",
"Cosmos2VideoToWorldPipeline",
"CosmosTextToImagePipeline",
"CosmosTextToWorldPipeline",
"CosmosVideoToWorldPipeline",
"CycleDiffusionPipeline",
@@ -951,8 +951,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
CogView4ControlPipeline,
CogView4Pipeline,
ConsisIDPipeline,
Cosmos2TextToImagePipeline,
Cosmos2VideoToWorldPipeline,
CosmosTextToImagePipeline,
CosmosTextToWorldPipeline,
CosmosVideoToWorldPipeline,
CycleDiffusionPipeline,

View File

@@ -158,7 +158,7 @@ else:
_import_structure["cogview4"] = ["CogView4Pipeline", "CogView4ControlPipeline"]
_import_structure["consisid"] = ["ConsisIDPipeline"]
_import_structure["cosmos"] = [
"CosmosTextToImagePipeline",
"Cosmos2TextToImagePipeline",
"CosmosTextToWorldPipeline",
"CosmosVideoToWorldPipeline",
"Cosmos2VideoToWorldPipeline",
@@ -565,8 +565,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
StableDiffusionXLControlNetXSPipeline,
)
from .cosmos import (
Cosmos2TextToImagePipeline,
Cosmos2VideoToWorldPipeline,
CosmosTextToImagePipeline,
CosmosTextToWorldPipeline,
CosmosVideoToWorldPipeline,
)

View File

@@ -22,8 +22,8 @@ except OptionalDependencyNotAvailable:
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["pipeline_cosmos2_text2image"] = ["Cosmos2TextToImagePipeline"]
_import_structure["pipeline_cosmos2_video2world"] = ["Cosmos2VideoToWorldPipeline"]
_import_structure["pipeline_cosmos_text2image"] = ["CosmosTextToImagePipeline"]
_import_structure["pipeline_cosmos_text2world"] = ["CosmosTextToWorldPipeline"]
_import_structure["pipeline_cosmos_video2world"] = ["CosmosVideoToWorldPipeline"]
@@ -35,8 +35,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import *
else:
from .pipeline_cosmos2_text2image import Cosmos2TextToImagePipeline
from .pipeline_cosmos2_video2world import Cosmos2VideoToWorldPipeline
from .pipeline_cosmos_text2image import CosmosTextToImagePipeline
from .pipeline_cosmos_text2world import CosmosTextToWorldPipeline
from .pipeline_cosmos_video2world import CosmosVideoToWorldPipeline

View File

@@ -54,11 +54,11 @@ EXAMPLE_DOC_STRING = """
Examples:
```python
>>> import torch
>>> from diffusers import CosmosTextToImagePipeline
>>> from diffusers import Cosmos2TextToImagePipeline
>>> # Available checkpoints: nvidia/Cosmos-Predict2-2B-Text2Image, nvidia/Cosmos-Predict2-14B-Text2Image
>>> model_id = "nvidia/Cosmos-Predict2-2B-Text2Image"
>>> pipe = CosmosTextToImagePipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
>>> pipe = Cosmos2TextToImagePipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
>>> pipe.to("cuda")
>>> prompt = "A close-up shot captures a vibrant yellow scrubber vigorously working on a grimy plate, its bristles moving in circular motions to lift stubborn grease and food residue. The dish, once covered in remnants of a hearty meal, gradually reveals its original glossy surface. Suds form and bubble around the scrubber, creating a satisfying visual of cleanliness in progress. The sound of scrubbing fills the air, accompanied by the gentle clinking of the dish against the sink. As the scrubber continues its task, the dish transforms, gleaming under the bright kitchen lights, symbolizing the triumph of cleanliness over mess."
@@ -132,7 +132,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps
class CosmosTextToImagePipeline(DiffusionPipeline):
class Cosmos2TextToImagePipeline(DiffusionPipeline):
r"""
Pipeline for text-to-image generation using [Cosmos](https://github.com/NVIDIA/Cosmos).
@@ -637,6 +637,10 @@ class CosmosTextToImagePipeline(DiffusionPipeline):
else:
video = self.video_processor.postprocess_video(video, output_type=output_type)
image = [batch[0] for batch in video]
if isinstance(video, torch.Tensor):
image = torch.stack(image)
elif isinstance(video, np.ndarray):
image = np.stack(image)
else:
image = latents[:, :, 0]

View File

@@ -407,7 +407,7 @@ class ConsisIDPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers"])
class CosmosTextToImagePipeline(metaclass=DummyObject):
class Cosmos2TextToImagePipeline(metaclass=DummyObject):
_backends = ["torch", "transformers"]
def __init__(self, *args, **kwargs):