diff --git a/src/diffusers/modular_pipelines/components_manager.py b/src/diffusers/modular_pipelines/components_manager.py index 9dd8035c44..cb7e8fb736 100644 --- a/src/diffusers/modular_pipelines/components_manager.py +++ b/src/diffusers/modular_pipelines/components_manager.py @@ -164,7 +164,11 @@ class AutoOffloadStrategy: device_type = execution_device.type device_module = getattr(torch, device_type, torch.cuda) - mem_on_device = device_module.mem_get_info(execution_device.index)[0] + try: + mem_on_device = device_module.mem_get_info(execution_device.index)[0] + except AttributeError: + raise AttributeError(f"Do not know how to obtain obtain memory info for {str(device_module)}.") + mem_on_device = mem_on_device - self.memory_reserve_margin if current_module_size < mem_on_device: return [] @@ -699,6 +703,8 @@ class ComponentsManager: if not is_accelerate_available(): raise ImportError("Make sure to install accelerate to use auto_cpu_offload") + # TODO: add a warning if mem_get_info isn't available on `device`. + for name, component in self.components.items(): if isinstance(component, torch.nn.Module) and hasattr(component, "_hf_hook"): remove_hook_from_module(component, recurse=True) diff --git a/src/diffusers/modular_pipelines/flux/before_denoise.py b/src/diffusers/modular_pipelines/flux/before_denoise.py index c098b7d4f1..daffec9865 100644 --- a/src/diffusers/modular_pipelines/flux/before_denoise.py +++ b/src/diffusers/modular_pipelines/flux/before_denoise.py @@ -598,7 +598,7 @@ class FluxKontextRoPEInputsStep(ModularPipelineBlocks): and getattr(block_state, "image_width", None) is not None ): image_latent_height = 2 * (int(block_state.image_height) // (components.vae_scale_factor * 2)) - image_latent_width = 2 * (int(block_state.width) // (components.vae_scale_factor * 2)) + image_latent_width = 2 * (int(block_state.image_width) // (components.vae_scale_factor * 2)) img_ids = FluxPipeline._prepare_latent_image_ids( None, image_latent_height // 2, image_latent_width // 2, device, dtype ) diff --git a/src/diffusers/modular_pipelines/flux/denoise.py b/src/diffusers/modular_pipelines/flux/denoise.py index b1796bb63c..5a769df103 100644 --- a/src/diffusers/modular_pipelines/flux/denoise.py +++ b/src/diffusers/modular_pipelines/flux/denoise.py @@ -59,7 +59,7 @@ class FluxLoopDenoiser(ModularPipelineBlocks): ), InputParam( "guidance", - required=True, + required=False, type_hint=torch.Tensor, description="Guidance scale as a tensor", ), @@ -141,7 +141,7 @@ class FluxKontextLoopDenoiser(ModularPipelineBlocks): ), InputParam( "guidance", - required=True, + required=False, type_hint=torch.Tensor, description="Guidance scale as a tensor", ), diff --git a/src/diffusers/modular_pipelines/flux/encoders.py b/src/diffusers/modular_pipelines/flux/encoders.py index b71962bd93..f0314d4771 100644 --- a/src/diffusers/modular_pipelines/flux/encoders.py +++ b/src/diffusers/modular_pipelines/flux/encoders.py @@ -95,7 +95,7 @@ class FluxProcessImagesInputStep(ModularPipelineBlocks): ComponentSpec( "image_processor", VaeImageProcessor, - config=FrozenDict({"vae_scale_factor": 16}), + config=FrozenDict({"vae_scale_factor": 16, "vae_latent_channels": 16}), default_creation_method="from_config", ), ] @@ -143,10 +143,6 @@ class FluxProcessImagesInputStep(ModularPipelineBlocks): class FluxKontextProcessImagesInputStep(ModularPipelineBlocks): model_name = "flux-kontext" - def __init__(self, _auto_resize=True): - self._auto_resize = _auto_resize - super().__init__() - @property def description(self) -> str: return ( @@ -167,7 +163,7 @@ class FluxKontextProcessImagesInputStep(ModularPipelineBlocks): @property def inputs(self) -> List[InputParam]: - return [InputParam("image")] + return [InputParam("image"), InputParam("_auto_resize", type_hint=bool, default=True)] @property def intermediate_outputs(self) -> List[OutputParam]: @@ -195,7 +191,8 @@ class FluxKontextProcessImagesInputStep(ModularPipelineBlocks): img = images[0] image_height, image_width = components.image_processor.get_default_height_width(img) aspect_ratio = image_width / image_height - if self._auto_resize: + _auto_resize = block_state._auto_resize + if _auto_resize: # Kontext is trained on specific resolutions, using one of them is recommended _, image_width, image_height = min( (abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_KONTEXT_RESOLUTIONS diff --git a/src/diffusers/modular_pipelines/flux/inputs.py b/src/diffusers/modular_pipelines/flux/inputs.py index e1bc17f5ff..8309eebfeb 100644 --- a/src/diffusers/modular_pipelines/flux/inputs.py +++ b/src/diffusers/modular_pipelines/flux/inputs.py @@ -112,6 +112,10 @@ class FluxTextInputStep(ModularPipelineBlocks): block_state.prompt_embeds = block_state.prompt_embeds.view( block_state.batch_size * block_state.num_images_per_prompt, seq_len, -1 ) + pooled_prompt_embeds = block_state.pooled_prompt_embeds.repeat(1, block_state.num_images_per_prompt) + block_state.pooled_prompt_embeds = pooled_prompt_embeds.view( + block_state.batch_size * block_state.num_images_per_prompt, -1 + ) self.set_block_state(state, block_state) return components, state diff --git a/tests/modular_pipelines/flux/__init__.py b/tests/modular_pipelines/flux/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py new file mode 100644 index 0000000000..9d70c21aa8 --- /dev/null +++ b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py @@ -0,0 +1,130 @@ +# coding=utf-8 +# Copyright 2025 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import tempfile +import unittest + +import numpy as np +import PIL +import torch + +from diffusers.image_processor import VaeImageProcessor +from diffusers.modular_pipelines import ( + FluxAutoBlocks, + FluxKontextAutoBlocks, + FluxKontextModularPipeline, + FluxModularPipeline, + ModularPipeline, +) + +from ...testing_utils import floats_tensor, torch_device +from ..test_modular_pipelines_common import ModularPipelineTesterMixin + + +class FluxModularTests: + pipeline_class = FluxModularPipeline + pipeline_blocks_class = FluxAutoBlocks + repo = "hf-internal-testing/tiny-flux-modular" + + def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): + pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) + pipeline.load_components(torch_dtype=torch_dtype) + return pipeline + + def get_dummy_inputs(self, device, seed=0): + if str(device).startswith("mps"): + generator = torch.manual_seed(seed) + else: + generator = torch.Generator(device=device).manual_seed(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "height": 8, + "width": 8, + "max_sequence_length": 48, + "output_type": "np", + } + return inputs + + +class FluxModularPipelineFastTests(FluxModularTests, ModularPipelineTesterMixin, unittest.TestCase): + params = frozenset(["prompt", "height", "width", "guidance_scale"]) + batch_params = frozenset(["prompt"]) + + +class FluxImg2ImgModularPipelineFastTests(FluxModularTests, ModularPipelineTesterMixin, unittest.TestCase): + params = frozenset(["prompt", "height", "width", "guidance_scale", "image"]) + batch_params = frozenset(["prompt", "image"]) + + def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): + pipeline = super().get_pipeline(components_manager, torch_dtype) + # Override `vae_scale_factor` here as currently, `image_processor` is initialized with + # fixed constants instead of + # https://github.com/huggingface/diffusers/blob/d54622c2679d700b425ad61abce9b80fc36212c0/src/diffusers/pipelines/flux/pipeline_flux_img2img.py#L230C9-L232C10 + pipeline.image_processor = VaeImageProcessor(vae_scale_factor=2) + return pipeline + + def get_dummy_inputs(self, device, seed=0): + inputs = super().get_dummy_inputs(device, seed) + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) + image = image / 2 + 0.5 + inputs["image"] = image + inputs["strength"] = 0.8 + inputs["height"] = 8 + inputs["width"] = 8 + return inputs + + def test_save_from_pretrained(self): + pipes = [] + base_pipe = self.get_pipeline().to(torch_device) + pipes.append(base_pipe) + + with tempfile.TemporaryDirectory() as tmpdirname: + base_pipe.save_pretrained(tmpdirname) + pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device) + pipe.load_components(torch_dtype=torch.float32) + pipe.to(torch_device) + pipe.image_processor = VaeImageProcessor(vae_scale_factor=2) + + pipes.append(pipe) + + image_slices = [] + for pipe in pipes: + inputs = self.get_dummy_inputs(torch_device) + image = pipe(**inputs, output="images") + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + + +class FluxKontextModularPipelineFastTests(FluxImg2ImgModularPipelineFastTests): + pipeline_class = FluxKontextModularPipeline + pipeline_blocks_class = FluxKontextAutoBlocks + repo = "hf-internal-testing/tiny-flux-kontext-pipe" + + def get_dummy_inputs(self, device, seed=0): + inputs = super().get_dummy_inputs(device, seed) + image = PIL.Image.new("RGB", (32, 32), 0) + _ = inputs.pop("strength") + inputs["image"] = image + inputs["height"] = 8 + inputs["width"] = 8 + inputs["max_area"] = 8 * 8 + inputs["_auto_resize"] = False + return inputs diff --git a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py index d05f818135..22347aa558 100644 --- a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py +++ b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py @@ -21,24 +21,12 @@ import numpy as np import torch from PIL import Image -from diffusers import ( - ClassifierFreeGuidance, - StableDiffusionXLAutoBlocks, - StableDiffusionXLModularPipeline, -) +from diffusers import ClassifierFreeGuidance, StableDiffusionXLAutoBlocks, StableDiffusionXLModularPipeline from diffusers.loaders import ModularIPAdapterMixin -from ...models.unets.test_models_unet_2d_condition import ( - create_ip_adapter_state_dict, -) -from ...testing_utils import ( - enable_full_determinism, - floats_tensor, - torch_device, -) -from ..test_modular_pipelines_common import ( - ModularPipelineTesterMixin, -) +from ...models.unets.test_models_unet_2d_condition import create_ip_adapter_state_dict +from ...testing_utils import enable_full_determinism, floats_tensor, torch_device +from ..test_modular_pipelines_common import ModularPipelineTesterMixin enable_full_determinism()