Fix code style with make style.

2026-01-27 17:22:53 +03:00 · 2023-05-09 09:02:05 -07:00
parent 16fd515d3c
commit 5728328545
2 changed files with 48 additions and 32 deletions
--- a/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
+++ b/src/diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py
@@ -327,7 +327,7 @@ class UniDiffuserPipeline(DiffusionPipeline):

    def set_joint_mode(self):
        self.mode = "joint"
-    
+
    def reset_mode(self):
        self.mode = None

@@ -349,10 +349,10 @@ class UniDiffuserPipeline(DiffusionPipeline):
            num_images_per_prompt = 1
        if num_prompts_per_image is None:
            num_prompts_per_image = 1
-        
+
        assert num_images_per_prompt > 0, "num_images_per_prompt must be a positive integer"
        assert num_prompts_per_image > 0, "num_prompts_per_image must be a positive integer"
-        
+
        if mode in ["text2img"]:
            if prompt is not None and isinstance(prompt, str):
                batch_size = 1
@@ -666,7 +666,9 @@ class UniDiffuserPipeline(DiffusionPipeline):
        image = image.cpu().permute(0, 2, 3, 1).float().numpy()
        return image

-    def prepare_text_latents(self, batch_size, num_images_per_prompt, seq_len, hidden_size, dtype, device, generator, latents=None):
+    def prepare_text_latents(
+        self, batch_size, num_images_per_prompt, seq_len, hidden_size, dtype, device, generator, latents=None
+    ):
        # Prepare latents for the CLIP embedded prompt.
        shape = (batch_size * num_images_per_prompt, seq_len, hidden_size)
        if isinstance(generator, list) and len(generator) != batch_size:
@@ -678,7 +680,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
        if latents is None:
            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
        else:
-            latents = einops.repeat(latents, 'B L D -> (repeat B) L D', repeat=num_images_per_prompt)
+            latents = einops.repeat(latents, "B L D -> (repeat B) L D", repeat=num_images_per_prompt)
            latents = latents.to(device)

        # scale the initial noise by the standard deviation required by the scheduler
@@ -688,9 +690,23 @@ class UniDiffuserPipeline(DiffusionPipeline):
    # Modified from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
    # Rename: prepare_latents -> prepare_image_vae_latents
    def prepare_image_vae_latents(
-        self, batch_size, num_prompts_per_image, num_channels_latents, height, width, dtype, device, generator, latents=None
+        self,
+        batch_size,
+        num_prompts_per_image,
+        num_channels_latents,
+        height,
+        width,
+        dtype,
+        device,
+        generator,
+        latents=None,
    ):
-        shape = (batch_size * num_prompts_per_image, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
+        shape = (
+            batch_size * num_prompts_per_image,
+            num_channels_latents,
+            height // self.vae_scale_factor,
+            width // self.vae_scale_factor,
+        )
        if isinstance(generator, list) and len(generator) != batch_size:
            raise ValueError(
                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -700,14 +716,16 @@ class UniDiffuserPipeline(DiffusionPipeline):
        if latents is None:
            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
        else:
-            latents = einops.repeat(latents, 'B C H W -> (repeat B) C H W', repeat=num_prompts_per_image)
+            latents = einops.repeat(latents, "B C H W -> (repeat B) C H W", repeat=num_prompts_per_image)
            latents = latents.to(device)

        # scale the initial noise by the standard deviation required by the scheduler
        latents = latents * self.scheduler.init_noise_sigma
        return latents

-    def prepare_image_clip_latents(self, batch_size, num_prompts_per_image, clip_img_dim, dtype, device, generator, latents=None):
+    def prepare_image_clip_latents(
+        self, batch_size, num_prompts_per_image, clip_img_dim, dtype, device, generator, latents=None
+    ):
        # Prepare latents for the CLIP embedded image.
        shape = (batch_size * num_prompts_per_image, 1, clip_img_dim)
        if isinstance(generator, list) and len(generator) != batch_size:
@@ -719,7 +737,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
        if latents is None:
            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
        else:
-            latents = einops.repeat(latents, 'B L D -> (repeat B) L D', repeat=num_prompts_per_image)
+            latents = einops.repeat(latents, "B L D -> (repeat B) L D", repeat=num_prompts_per_image)
            latents = latents.to(device)

        # scale the initial noise by the standard deviation required by the scheduler
@@ -887,16 +905,16 @@ class UniDiffuserPipeline(DiffusionPipeline):

            img_out = self._combine(img_vae_out, img_clip_out)
            return img_out
-    
+
    def check_latents_shape(self, latents_name, latents, expected_shape):
        latents_shape = latents.shape
        expected_num_dims = len(expected_shape) + 1  # expected dimensions plus the batch dimension
        expected_shape_str = ", ".join(str(dim) for dim in expected_shape)
        if len(latents_shape) != expected_num_dims:
            raise ValueError(
-                    f"`{latents_name}` should have shape (batch_size, {expected_shape_str}), but the current shape"
-                    f" {latents_shape} has {len(latents_shape)} dimensions."
-                )
+                f"`{latents_name}` should have shape (batch_size, {expected_shape_str}), but the current shape"
+                f" {latents_shape} has {len(latents_shape)} dimensions."
+            )
        for i in range(1, expected_num_dims):
            if latents_shape[i] != expected_shape[i - 1]:
                raise ValueError(
@@ -960,13 +978,11 @@ class UniDiffuserPipeline(DiffusionPipeline):
                        f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
                        f" {negative_prompt_embeds.shape}."
                    )
-        
+
        if mode == "img2text":
            if image is None:
-                raise ValueError(
-                    "`img2text` mode requires an image to be provided."
-                )
-        
+                raise ValueError("`img2text` mode requires an image to be provided.")
+
        # Check provided latents
        latent_height = height // self.vae_scale_factor
        latent_width = width // self.vae_scale_factor
@@ -990,27 +1006,27 @@ class UniDiffuserPipeline(DiffusionPipeline):
            latents_dim = img_vae_dim + self.image_encoder_hidden_size + text_dim
            latents_expected_shape = (latents_dim,)
            self.check_latents_shape("latents", latents, latents_expected_shape)
-        
+
        # Check individual latent shapes, if present
        if prompt_latents_available:
            prompt_latents_expected_shape = (self.text_encoder_seq_len, self.text_encoder_hidden_size)
            self.check_latents_shape("prompt_latents", prompt_latents, prompt_latents_expected_shape)
-        
+
        if vae_latents_available:
            vae_latents_expected_shape = (self.num_channels_latents, latent_height, latent_width)
            self.check_latents_shape("vae_latents", vae_latents, vae_latents_expected_shape)
-        
+
        if clip_latents_available:
            clip_latents_expected_shape = (1, self.image_encoder_hidden_size)
            self.check_latents_shape("clip_latents", clip_latents, clip_latents_expected_shape)
-        
+
        if mode in ["text2img", "img"] and vae_latents_available and clip_latents_available:
            if vae_latents.shape[0] != clip_latents.shape[0]:
                raise ValueError(
                    f"Both `vae_latents` and `clip_latents` are supplied, but their batch dimensions are not equal:"
                    f" {vae_latents.shape[0]} != {clip_latents.shape[0]}."
                )
-        
+
        if mode == "joint" and prompt_latents_available and vae_latents_available and clip_latents_available:
            if prompt_latents.shape[0] != vae_latents.shape[0] or prompt_latents.shape[0] != clip_latents.shape[0]:
                raise ValueError(
@@ -1076,12 +1092,12 @@ class UniDiffuserPipeline(DiffusionPipeline):
                `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
                Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
            num_images_per_prompt (`int`, *optional*, defaults to 1):
-                The number of images to generate per prompt. Used in `text2img` (text-conditioned image generation)
-                and `img` mode. If the mode is joint and both `num_images_per_prompt` and `num_prompts_per_image` are
+                The number of images to generate per prompt. Used in `text2img` (text-conditioned image generation) and
+                `img` mode. If the mode is joint and both `num_images_per_prompt` and `num_prompts_per_image` are
                supplied, `min(num_images_per_prompt, num_prompts_per_image)` samples will be generated.
            num_prompts_per_image (`int`, *optional*, defaults to 1):
-                The number of prompts to generate per image. Used in `img2text` (image-conditioned text generation)
-                and `text` mode. If the mode is joint and both `num_images_per_prompt` and `num_prompts_per_image` are
+                The number of prompts to generate per image. Used in `img2text` (image-conditioned text generation) and
+                `text` mode. If the mode is joint and both `num_images_per_prompt` and `num_prompts_per_image` are
                supplied, `min(num_images_per_prompt, num_prompts_per_image)` samples will be generated.
            eta (`float`, *optional*, defaults to 0.0):
                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
--- a/tests/pipelines/unidiffuser/test_unidiffuser.py
+++ b/tests/pipelines/unidiffuser/test_unidiffuser.py
@@ -321,7 +321,7 @@ class UniDiffuserPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        expected_text_prefix = " no no no "
        assert text[0][:10] == expected_text_prefix
-    
+
    def test_unidiffuser_text2img_multiple_images(self):
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
        components = self.get_dummy_components()
@@ -340,7 +340,7 @@ class UniDiffuserPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        inputs["num_prompts_per_image"] = 3
        image = unidiffuser_pipe(**inputs).images
        assert image.shape == (2, 32, 32, 3)
-    
+
    def test_unidiffuser_img2text_multiple_prompts(self):
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
        components = self.get_dummy_components()
@@ -360,7 +360,7 @@ class UniDiffuserPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        text = unidiffuser_pipe(**inputs).text

        assert len(text) == 3
-    
+
    def test_unidiffuser_text2img_multiple_images_with_latents(self):
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
        components = self.get_dummy_components()
@@ -379,7 +379,7 @@ class UniDiffuserPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
        inputs["num_prompts_per_image"] = 3
        image = unidiffuser_pipe(**inputs).images
        assert image.shape == (2, 32, 32, 3)
-    
+
    def test_unidiffuser_img2text_multiple_prompts_with_latents(self):
        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
        components = self.get_dummy_components()