RePaint fast tests and API conforming (#1701)

* add fast tests * better tests and fp16 * batch fixes * Reuse preprocessing * quickfix
2026-01-27 17:22:53 +03:00 · 2022-12-15 18:35:31 +01:00
parent ea90bf2ba1
commit 13994b2d3f
3 changed files with 126 additions and 28 deletions
--- a/src/diffusers/pipelines/repaint/pipeline_repaint.py
+++ b/src/diffusers/pipelines/repaint/pipeline_repaint.py
@@ -13,33 +13,61 @@
 # limitations under the License.


-from typing import Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union

 import numpy as np
 import torch

 import PIL
-from tqdm.auto import tqdm

 from ...models import UNet2DModel
 from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 from ...schedulers import RePaintScheduler
+from ...utils import PIL_INTERPOLATION, deprecate, logging


-def _preprocess_image(image: PIL.Image.Image):
-    image = np.array(image.convert("RGB"))
-    image = image[None].transpose(0, 3, 1, 2)
-    image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+
+# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess
+def _preprocess_image(image: Union[List, PIL.Image.Image, torch.Tensor]):
+    if isinstance(image, torch.Tensor):
+        return image
+    elif isinstance(image, PIL.Image.Image):
+        image = [image]
+
+    if isinstance(image[0], PIL.Image.Image):
+        w, h = image[0].size
+        w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
+
+        image = [np.array(i.resize((w, h), resample=PIL_INTERPOLATION["lanczos"]))[None, :] for i in image]
+        image = np.concatenate(image, axis=0)
+        image = np.array(image).astype(np.float32) / 255.0
+        image = image.transpose(0, 3, 1, 2)
+        image = 2.0 * image - 1.0
+        image = torch.from_numpy(image)
+    elif isinstance(image[0], torch.Tensor):
+        image = torch.cat(image, dim=0)
    return image


-def _preprocess_mask(mask: PIL.Image.Image):
-    mask = np.array(mask.convert("L"))
-    mask = mask.astype(np.float32) / 255.0
-    mask = mask[None, None]
-    mask[mask < 0.5] = 0
-    mask[mask >= 0.5] = 1
-    mask = torch.from_numpy(mask)
+def _preprocess_mask(mask: Union[List, PIL.Image.Image, torch.Tensor]):
+    if isinstance(mask, torch.Tensor):
+        return mask
+    elif isinstance(mask, PIL.Image.Image):
+        mask = [mask]
+
+    if isinstance(mask[0], PIL.Image.Image):
+        w, h = mask[0].size
+        w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
+        mask = [np.array(m.convert("L").resize((w, h), resample=PIL_INTERPOLATION["nearest"]))[None, :] for m in mask]
+        mask = np.concatenate(mask, axis=0)
+        mask = mask.astype(np.float32) / 255.0
+        mask[mask < 0.5] = 0
+        mask[mask >= 0.5] = 1
+        mask = torch.from_numpy(mask)
+    elif isinstance(mask[0], torch.Tensor):
+        mask = torch.cat(mask, dim=0)
    return mask


@@ -54,8 +82,8 @@ class RePaintPipeline(DiffusionPipeline):
    @torch.no_grad()
    def __call__(
        self,
-        original_image: Union[torch.FloatTensor, PIL.Image.Image],
-        mask_image: Union[torch.FloatTensor, PIL.Image.Image],
+        image: Union[torch.Tensor, PIL.Image.Image],
+        mask_image: Union[torch.Tensor, PIL.Image.Image],
        num_inference_steps: int = 250,
        eta: float = 0.0,
        jump_length: int = 10,
@@ -63,10 +91,11 @@ class RePaintPipeline(DiffusionPipeline):
        generator: Optional[torch.Generator] = None,
        output_type: Optional[str] = "pil",
        return_dict: bool = True,
+        **kwargs,
    ) -> Union[ImagePipelineOutput, Tuple]:
        r"""
        Args:
-            original_image (`torch.FloatTensor` or `PIL.Image.Image`):
+            image (`torch.FloatTensor` or `PIL.Image.Image`):
                The original image to inpaint on.
            mask_image (`torch.FloatTensor` or `PIL.Image.Image`):
                The mask_image where 0.0 values define which part of the original image to inpaint (change).
@@ -97,12 +126,14 @@ class RePaintPipeline(DiffusionPipeline):
            generated images.
        """

-        if not isinstance(original_image, torch.FloatTensor):
-            original_image = _preprocess_image(original_image)
-        original_image = original_image.to(self.device)
-        if not isinstance(mask_image, torch.FloatTensor):
-            mask_image = _preprocess_mask(mask_image)
-        mask_image = mask_image.to(self.device)
+        message = "Please use `image` instead of `original_image`."
+        original_image = deprecate("original_image", "0.15.0", message, take_from=kwargs)
+        original_image = original_image or image
+
+        original_image = _preprocess_image(original_image)
+        original_image = original_image.to(device=self.device, dtype=self.unet.dtype)
+        mask_image = _preprocess_mask(mask_image)
+        mask_image = mask_image.to(device=self.device, dtype=self.unet.dtype)

        # sample gaussian noise to begin the loop
        image = torch.randn(
@@ -110,14 +141,14 @@ class RePaintPipeline(DiffusionPipeline):
            generator=generator,
            device=self.device,
        )
-        image = image.to(self.device)
+        image = image.to(device=self.device, dtype=self.unet.dtype)

        # set step values
        self.scheduler.set_timesteps(num_inference_steps, jump_length, jump_n_sample, self.device)
        self.scheduler.eta = eta

        t_last = self.scheduler.timesteps[0] + 1
-        for i, t in enumerate(tqdm(self.scheduler.timesteps)):
+        for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)):
            if t < t_last:
                # predict the noise residual
                model_output = self.unet(image, t).sample
--- a/src/diffusers/schedulers/scheduling_repaint.py
+++ b/src/diffusers/schedulers/scheduling_repaint.py
@@ -270,9 +270,13 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):
        # been observed.

        # 5. Add noise
-        noise = torch.randn(
-            model_output.shape, dtype=model_output.dtype, generator=generator, device=model_output.device
-        )
+        device = model_output.device
+        if device.type == "mps":
+            # randn does not work reproducibly on mps
+            noise = torch.randn(model_output.shape, dtype=model_output.dtype, generator=generator)
+            noise = noise.to(device)
+        else:
+            noise = torch.randn(model_output.shape, generator=generator, device=device, dtype=model_output.dtype)
        std_dev_t = self.eta * self._get_variance(timestep) ** 0.5

        variance = 0
@@ -305,7 +309,12 @@ class RePaintScheduler(SchedulerMixin, ConfigMixin):

        for i in range(n):
            beta = self.betas[timestep + i]
-            noise = torch.randn(sample.shape, generator=generator, device=sample.device)
+            if sample.device.type == "mps":
+                # randn does not work reproducibly on mps
+                noise = torch.randn(sample.shape, dtype=sample.dtype, generator=generator)
+                noise = noise.to(sample.device)
+            else:
+                noise = torch.randn(sample.shape, generator=generator, device=sample.device, dtype=sample.dtype)

            # 10. Algorithm 1 Line 10 https://arxiv.org/pdf/2201.09865.pdf
            sample = (1 - beta) ** 0.5 * sample + beta**0.5 * noise
--- a/tests/pipelines/repaint/test_repaint.py
+++ b/tests/pipelines/repaint/test_repaint.py
@@ -21,10 +21,68 @@ import torch
 from diffusers import RePaintPipeline, RePaintScheduler, UNet2DModel
 from diffusers.utils.testing_utils import load_image, load_numpy, require_torch_gpu, slow, torch_device

+from ...test_pipelines_common import PipelineTesterMixin
+

 torch.backends.cuda.matmul.allow_tf32 = False


+class RepaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
+    pipeline_class = RePaintPipeline
+    test_cpu_offload = False
+
+    def get_dummy_components(self):
+        torch.manual_seed(0)
+        torch.manual_seed(0)
+        unet = UNet2DModel(
+            block_out_channels=(32, 64),
+            layers_per_block=2,
+            sample_size=32,
+            in_channels=3,
+            out_channels=3,
+            down_block_types=("DownBlock2D", "AttnDownBlock2D"),
+            up_block_types=("AttnUpBlock2D", "UpBlock2D"),
+        )
+        scheduler = RePaintScheduler()
+        components = {"unet": unet, "scheduler": scheduler}
+        return components
+
+    def get_dummy_inputs(self, device, seed=0):
+        if str(device).startswith("mps"):
+            generator = torch.manual_seed(seed)
+        else:
+            generator = torch.Generator(device=device).manual_seed(seed)
+        image = np.random.RandomState(seed).standard_normal((1, 3, 32, 32))
+        image = torch.from_numpy(image).to(device=device, dtype=torch.float32)
+        mask = (image > 0).to(device=device, dtype=torch.float32)
+        inputs = {
+            "image": image,
+            "mask_image": mask,
+            "generator": generator,
+            "num_inference_steps": 5,
+            "eta": 0.0,
+            "jump_length": 2,
+            "jump_n_sample": 2,
+            "output_type": "numpy",
+        }
+        return inputs
+
+    def test_repaint(self):
+        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
+        components = self.get_dummy_components()
+        sd_pipe = RePaintPipeline(**components)
+        sd_pipe = sd_pipe.to(device)
+        sd_pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(device)
+        image = sd_pipe(**inputs).images
+        image_slice = image[0, -3:, -3:, -1]
+
+        assert image.shape == (1, 32, 32, 3)
+        expected_slice = np.array([1.0000, 0.5426, 0.5497, 0.2200, 1.0000, 1.0000, 0.5623, 1.0000, 0.6274])
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
+
+
@slow
@require_torch_gpu
 class RepaintPipelineIntegrationTests(unittest.TestCase):