Merge remote-tracking branch 'origin/main'

2026-01-27 17:22:53 +03:00 · 2022-06-14 18:25:33 +02:00
parent 7fe05bb311 be736cb248
commit 31a7c75be9
6 changed files with 55 additions and 201 deletions
--- a/156
+++ b/156
@@ -1,156 +0,0 @@
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-
-# limitations under the License.
-
-
-import torch
-
-import tqdm
-
-from ..pipeline_utils import DiffusionPipeline
-
-
-class PNDM(DiffusionPipeline):
-    def __init__(self, unet, noise_scheduler):
-        super().__init__()
-        noise_scheduler = noise_scheduler.set_format("pt")
-        self.register_modules(unet=unet, noise_scheduler=noise_scheduler)
-
-    def __call__(self, batch_size=1, generator=None, torch_device=None, num_inference_steps=50):
-        # eta corresponds to η in paper and should be between [0, 1]
-        if torch_device is None:
-            torch_device = "cuda" if torch.cuda.is_available() else "cpu"
-
-        num_trained_timesteps = self.noise_scheduler.timesteps
-        inference_step_times = range(0, num_trained_timesteps, num_trained_timesteps // num_inference_steps)
-
-        self.unet.to(torch_device)
-
-        # Sample gaussian noise to begin loop
-        image = torch.randn(
-            (batch_size, self.unet.in_channels, self.unet.resolution, self.unet.resolution),
-            generator=generator,
-        )
-        image = image.to(torch_device)
-
-        seq = list(inference_step_times)
-        seq_next = [-1] + list(seq[:-1])
-        model = self.unet
-
-        warmup_steps = [len(seq) - (i // 4 + 1) for i in range(3 * 4)]
-
-        ets = []
-        prev_image = image
-        for i, step_idx in enumerate(warmup_steps):
-            i = seq[step_idx]
-            j = seq_next[step_idx]
-
-            t = (torch.ones(image.shape[0]) * i)
-            t_next = (torch.ones(image.shape[0]) * j)
-
-            residual = model(image.to("cuda"), t.to("cuda"))
-            residual = residual.to("cpu")
-
-            image = image.to("cpu")
-            image = self.noise_scheduler.transfer(prev_image.to("cpu"), t_list[0], t_list[1], residual)
-
-            if i % 4 == 0:
-                ets.append(residual)
-                prev_image = image
-
-        for 
-
-        ets = []
-        step_idx = len(seq) - 1
-        while step_idx >= 0:
-            i = seq[step_idx]
-            j = seq_next[step_idx]
-
-            t = (torch.ones(image.shape[0]) * i)
-            t_next = (torch.ones(image.shape[0]) * j)
-
-            residual = model(image.to("cuda"), t.to("cuda"))
-            residual = residual.to("cpu")
-
-            t_list = [t, (t+t_next)/2, t_next]
-
-            ets.append(residual)
-            if len(ets) <= 3:
-                image = image.to("cpu")
-                x_2 = self.noise_scheduler.transfer(image.to("cpu"), t_list[0], t_list[1], residual)
-
-                e_2 = model(x_2.to("cuda"), t_list[1].to("cuda")).to("cpu")
-                x_3 = self.noise_scheduler.transfer(image, t_list[0], t_list[1], e_2)
-                e_3 = model(x_3.to("cuda"), t_list[1].to("cuda")).to("cpu")
-                x_4 = self.noise_scheduler.transfer(image, t_list[0], t_list[2], e_3)
-                e_4 = model(x_4.to("cuda"), t_list[2].to("cuda")).to("cpu")
-                residual = (1 / 6) * (residual + 2 * e_2 + 2 * e_3 + e_4)
-            else:
-                residual = (1 / 24) * (55 * ets[-1] - 59 * ets[-2] + 37 * ets[-3] - 9 * ets[-4])
-
-            img_next = self.noise_scheduler.transfer(image.to("cpu"), t, t_next, residual)
-            image = img_next
-
-            step_idx = step_idx - 1
-
-#            if len(prev_noises) in [1, 2]:
-#                t = (t + t_next) / 2
-#            elif len(prev_noises) == 3:
-#                t = t_next / 2
-
-#            if len(prev_noises) == 0:
-#                ets.append(residual)
-#
-#            if len(ets) > 3:
-#                residual = (1 / 24) * (55 * ets[-1] - 59 * ets[-2] + 37 * ets[-3] - 9 * ets[-4])
-#                step_idx = step_idx - 1
-#            elif len(ets) <= 3 and len(prev_noises) == 3:
-#                residual = (1 / 6) * (prev_noises[-3] + 2 * prev_noises[-2] + 2 * prev_noises[-1] + residual)
-#                prev_noises = []
-#                step_idx = step_idx - 1
-#            elif len(ets) <= 3 and len(prev_noises) < 3:
-#                prev_noises.append(residual)
-#                if len(prev_noises) < 2:
-#                    t_next = (t + t_next) / 2
-#
-#            image = self.noise_scheduler.transfer(image.to("cpu"), t, t_next, residual)
-
-        return image
-
-        # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
-        # Ideally, read DDIM paper in-detail understanding
-
-        # Notation (<variable name> -> <name in paper>
-        # - pred_noise_t -> e_theta(x_t, t)
-        # - pred_original_image -> f_theta(x_t, t) or x_0
-        # - std_dev_t -> sigma_t
-        # - eta -> η
-        # - pred_image_direction -> "direction pointingc to x_t"
-        # - pred_prev_image -> "x_t-1"
-#        for t in tqdm.tqdm(reversed(range(num_inference_steps)), total=num_inference_steps):
-            # 1. predict noise residual
-#            with torch.no_grad():
-#                residual = self.unet(image, inference_step_times[t])
-#
-            # 2. predict previous mean of image x_t-1
-#            pred_prev_image = self.noise_scheduler.step(residual, image, t, num_inference_steps, eta)
-#
-            # 3. optionally sample variance
-#            variance = 0
-#            if eta > 0:
-#                noise = torch.randn(image.shape, generator=generator).to(image.device)
-#                variance = self.noise_scheduler.get_variance(t, num_inference_steps).sqrt() * eta * noise
-#
-            # 4. set current image to prev_image: x_t -> x_t-1
-#            image = pred_prev_image + variance
--- a/run_pndm.py
+++ b/run_pndm.py
@@ -1,27 +0,0 @@
-#!/usr/bin/env python3
-from diffusers import PNDM, UNetModel, PNDMScheduler
-import PIL.Image
-import numpy as np
-import torch
-
-model_id = "fusing/ddim-celeba-hq"
-
-model = UNetModel.from_pretrained(model_id)
-scheduler = PNDMScheduler()
-
-# load model and scheduler
-ddpm = PNDM(unet=model, noise_scheduler=scheduler)
-
-# run pipeline in inference (sample random noise and denoise)
-image = ddpm()
-
-# process image to PIL
-image_processed = image.cpu().permute(0, 2, 3, 1)
-image_processed = (image_processed + 1.0) / 2
-image_processed = torch.clamp(image_processed, 0.0, 1.0)
-image_processed = image_processed * 255
-image_processed = image_processed.numpy().astype(np.uint8)
-image_pil = PIL.Image.fromarray(image_processed[0])
-
-# save image
-image_pil.save("/home/patrick/images/test.png")
--- a/src/diffusers/pipeline_utils.py
+++ b/src/diffusers/pipeline_utils.py
@@ -55,11 +55,20 @@ class DiffusionPipeline(ConfigMixin):
    config_name = "model_index.json"

    def register_modules(self, **kwargs):
+        # import it here to avoid circular import
+        from diffusers import pipelines
+        
        for name, module in kwargs.items():
+            # check if the module is a pipeline module
+            is_pipeline_module = hasattr(pipelines, module.__module__.split(".")[-1])
+            
            # retrive library
            library = module.__module__.split(".")[0]
-            # if library is not in LOADABLE_CLASSES, then it is a custom module
-            if library not in LOADABLE_CLASSES:
+            
+            # if library is not in LOADABLE_CLASSES, then it is a custom module.
+            # Or if it's a pipeline module, then the module is inside the pipeline
+            # so we set the library to module name.
+            if library not in LOADABLE_CLASSES or is_pipeline_module:
                library = module.__module__.split(".")[-1]

            # retrive class_name
@@ -151,12 +160,22 @@ class DiffusionPipeline(ConfigMixin):
        init_dict, _ = pipeline_class.extract_init_dict(config_dict, **kwargs)

        init_kwargs = {}
-
+        
+        # import it here to avoid circular import
+        from diffusers import pipelines
+        
        # 4. Load each module in the pipeline
        for name, (library_name, class_name) in init_dict.items():
-            # if the model is not in diffusers or transformers, we need to load it from the hub
-            # assumes that it's a subclass of ModelMixin
-            if library_name == module_candidate_name:
+            is_pipeline_module = hasattr(pipelines, library_name)
+            # if the model is in a pipeline module, then we load it from the pipeline
+            if is_pipeline_module:
+                pipeline_module = getattr(pipelines, library_name)
+                class_obj = getattr(pipeline_module, class_name)
+                importable_classes = ALL_IMPORTABLE_CLASSES
+                class_candidates = {c: class_obj for c in ALL_IMPORTABLE_CLASSES.keys()}
+            elif library_name == module_candidate_name:
+                # if the model is not in diffusers or transformers, we need to load it from the hub
+                # assumes that it's a subclass of ModelMixin
                class_obj = get_class_from_dynamic_module(cached_folder, module_candidate, class_name, cached_folder)
                # since it's not from a library, we need to check class candidates for all importable classes
                importable_classes = ALL_IMPORTABLE_CLASSES
--- a/src/diffusers/pipelines/pipeline_pndm.py
+++ b/src/diffusers/pipelines/pipeline_pndm.py
@@ -28,7 +28,8 @@ class PNDM(DiffusionPipeline):
        self.register_modules(unet=unet, noise_scheduler=noise_scheduler)

    def __call__(self, batch_size=1, generator=None, torch_device=None, num_inference_steps=50):
-        # eta corresponds to η in paper and should be between [0, 1]
+        # For more information on the sampling method you can take a look at Algorithm 2 of
+        # the official paper: https://arxiv.org/pdf/2202.09778.pdf
        if torch_device is None:
            torch_device = "cuda" if torch.cuda.is_available() else "cpu"

@@ -42,21 +43,17 @@ class PNDM(DiffusionPipeline):
        image = image.to(torch_device)

        warmup_time_steps = self.noise_scheduler.get_warmup_time_steps(num_inference_steps)
-        prev_image = image
        for t in tqdm.tqdm(range(len(warmup_time_steps))):
            t_orig = warmup_time_steps[t]
            residual = self.unet(image, t_orig)

-            if t % 4 == 0:
-                prev_image = image
-
-            image = self.noise_scheduler.step_warm_up(residual, prev_image, t, num_inference_steps)
+            image = self.noise_scheduler.step_prk(residual, image, t, num_inference_steps)

        timesteps = self.noise_scheduler.get_time_steps(num_inference_steps)
        for t in tqdm.tqdm(range(len(timesteps))):
            t_orig = timesteps[t]
            residual = self.unet(image, t_orig)

-            image = self.noise_scheduler.step(residual, image, t, num_inference_steps)
+            image = self.noise_scheduler.step_plms(residual, image, t, num_inference_steps)

        return image
--- a/src/diffusers/schedulers/scheduling_pndm.py
+++ b/src/diffusers/schedulers/scheduling_pndm.py
@@ -55,11 +55,14 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):

        self.set_format(tensor_format=tensor_format)

-        # for now we only support F-PNDM, i.e. the runge-kutta method
+        # For now we only support F-PNDM, i.e. the runge-kutta method
+        # For more information on the algorithm please take a look at the paper: https://arxiv.org/pdf/2202.09778.pdf
+        # mainly at equations (12) and (13) and the Algorithm 2.
        self.pndm_order = 4

        # running values
        self.cur_residual = 0
+        self.cur_image = None
        self.ets = []
        self.warmup_time_steps = {}
        self.time_steps = {}
@@ -95,7 +98,8 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):

        return self.time_steps[num_inference_steps]

-    def step_warm_up(self, residual, image, t, num_inference_steps):
+    def step_prk(self, residual, image, t, num_inference_steps):
+        # TODO(Patrick) - need to rethink whether the "warmup" way is the correct API design here
        warmup_time_steps = self.get_warmup_time_steps(num_inference_steps)

        t_prev = warmup_time_steps[t // 4 * 4]
@@ -104,6 +108,7 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
        if t % 4 == 0:
            self.cur_residual += 1 / 6 * residual
            self.ets.append(residual)
+            self.cur_image = image
        elif (t - 1) % 4 == 0:
            self.cur_residual += 1 / 3 * residual
        elif (t - 2) % 4 == 0:
@@ -112,9 +117,9 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
            residual = self.cur_residual + 1 / 6 * residual
            self.cur_residual = 0

-        return self.transfer(image, t_prev, t_next, residual)
+        return self.transfer(self.cur_image, t_prev, t_next, residual)

-    def step(self, residual, image, t, num_inference_steps):
+    def step_plms(self, residual, image, t, num_inference_steps):
        timesteps = self.get_time_steps(num_inference_steps)

        t_prev = timesteps[t]
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -19,9 +19,10 @@ import unittest

 import torch

-from diffusers import DDIM, DDPM, PNDM, GLIDE, DDIMScheduler, DDPMScheduler, LatentDiffusion, PNDMScheduler, UNetModel
+from diffusers import DDIM, DDPM, PNDM, GLIDE, BDDM, DDIMScheduler, DDPMScheduler, LatentDiffusion, PNDMScheduler, UNetModel
 from diffusers.configuration_utils import ConfigMixin
 from diffusers.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.pipeline_bddm import DiffWave
 from diffusers.testing_utils import floats_tensor, slow, torch_device


@@ -213,6 +214,21 @@ class PipelineTesterMixin(unittest.TestCase):
        expected_slice = torch.tensor([0.7295, 0.7358, 0.7256, 0.7435, 0.7095, 0.6884, 0.7325, 0.6921, 0.6458])
        assert (image_slice.flatten() - expected_slice).abs().max() < 1e-2

+    def test_module_from_pipeline(self):
+        model = DiffWave(num_res_layers=4)
+        noise_scheduler = DDPMScheduler(timesteps=12)
+
+        bddm = BDDM(model, noise_scheduler)
+
+        # check if the library name for the diffwave moduel is set to pipeline module
+        self.assertTrue(bddm.config["diffwave"][0] == "pipeline_bddm")
+
+        # check if we can save and load the pipeline
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            bddm.save_pretrained(tmpdirname)
+            _ = BDDM.from_pretrained(tmpdirname)
+            # check if the same works using the DifusionPipeline class
+            _ = DiffusionPipeline.from_pretrained(tmpdirname)
    @slow
    def test_glide_text2img(self):
        model_id = "fusing/glide-base"