diff --git a/README.md b/README.md index 5061bd1547..a6ba944a9e 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ import numpy as np generator = torch.Generator() generator = generator.manual_seed(6694729458485568) +torch_device = "cuda" if torch.cuda.is_available() else "cpu" # 1. Load models scheduler = GaussianDDPMScheduler.from_config("fusing/ddpm-lsun-church") diff --git a/examples/sample_loop.py b/examples/sample_loop.py deleted file mode 100755 index d8134a6bf3..0000000000 --- a/examples/sample_loop.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python3 -from diffusers import UNetModel, GaussianDDPMScheduler -import torch -import torch.nn.functional as F -import numpy as np -import PIL.Image -import tqdm - -#torch_device = "cuda" -# -#unet = UNetModel.from_pretrained("/home/patrick/ddpm-lsun-church") -#unet.to(torch_device) -# -#TIME_STEPS = 10 -# -#scheduler = GaussianDDPMScheduler.from_config("/home/patrick/ddpm-lsun-church", timesteps=TIME_STEPS) -# -#diffusion_config = { -# "beta_start": 0.0001, -# "beta_end": 0.02, -# "num_diffusion_timesteps": TIME_STEPS, -#} -# -# 2. Do one denoising step with model -#batch_size, num_channels, height, width = 1, 3, 256, 256 -# -#torch.manual_seed(0) -#noise_image = torch.randn(batch_size, num_channels, height, width, device="cuda") -# -# -# Helper -#def noise_like(shape, device, repeat=False): -# def repeat_noise(): -# return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1))) -# -# def noise(): -# return torch.randn(shape, device=device) -# -# return repeat_noise() if repeat else noise() -# -# -#betas = np.linspace(diffusion_config["beta_start"], diffusion_config["beta_end"], diffusion_config["num_diffusion_timesteps"], dtype=np.float64) -#betas = torch.tensor(betas, device=torch_device) -#alphas = 1.0 - betas -# -#alphas_cumprod = torch.cumprod(alphas, axis=0) -#alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0) -# -#posterior_mean_coef1 = betas * torch.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod) -#posterior_mean_coef2 = (1.0 - alphas_cumprod_prev) * torch.sqrt(alphas) / (1.0 - alphas_cumprod) -# -#posterior_variance = betas * (1.0 - alphas_cumprod_prev) / (1.0 - alphas_cumprod) -#posterior_log_variance_clipped = torch.log(posterior_variance.clamp(min=1e-20)) -# -# -#sqrt_recip_alphas_cumprod = torch.sqrt(1.0 / alphas_cumprod) -#sqrt_recipm1_alphas_cumprod = torch.sqrt(1.0 / alphas_cumprod - 1) -# -# -#noise_coeff = (1 - alphas) / torch.sqrt(1 - alphas_cumprod) -#coeff = 1 / torch.sqrt(alphas) - - -def real_fn(): - # Compare the following to Algorithm 2 Sampling of paper: https://arxiv.org/pdf/2006.11239.pdf - # 1: x_t ~ N(0,1) - x_t = noise_image - # 2: for t = T, ...., 1 do - for i in reversed(range(TIME_STEPS)): - t = torch.tensor([i]).to(torch_device) - # 3: z ~ N(0, 1) - noise = noise_like(x_t.shape, torch_device) - - # 4: √1αtxt − √1−αt1−α¯tθ(xt, t) + σtz - # ------------------------- MODEL ------------------------------------# - with torch.no_grad(): - pred_noise = unet(x_t, t) # pred epsilon_theta - - # pred_x = sqrt_recip_alphas_cumprod[t] * x_t - sqrt_recipm1_alphas_cumprod[t] * pred_noise - # pred_x.clamp_(-1.0, 1.0) - # pred mean - # posterior_mean = posterior_mean_coef1[t] * pred_x + posterior_mean_coef2[t] * x_t - # --------------------------------------------------------------------# - - posterior_mean = coeff[t] * (x_t - noise_coeff[t] * pred_noise) - - # ------------------------- Variance Scheduler -----------------------# - # pred variance - posterior_log_variance = posterior_log_variance_clipped[t] - - b, *_, device = *x_t.shape, x_t.device - nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x_t.shape) - 1))) - posterior_variance = nonzero_mask * (0.5 * posterior_log_variance).exp() - # --------------------------------------------------------------------# - - x_t_1 = (posterior_mean + posterior_variance * noise).to(torch.float32) - x_t = x_t_1 - - print(x_t.abs().sum()) - - -def post_process_to_image(x_t): - image = x_t.cpu().permute(0, 2, 3, 1) - image = (image + 1.0) * 127.5 - image = image.numpy().astype(np.uint8) - - return PIL.Image.fromarray(image[0]) - - -from pytorch_diffusion import Diffusion - -#diffusion = Diffusion.from_pretrained("lsun_church") -#samples = diffusion.denoise(1) -# -#image = post_process_to_image(samples) -#image.save("check.png") -#import ipdb; ipdb.set_trace() - - -device = "cuda" -scheduler = GaussianDDPMScheduler.from_config("/home/patrick/ddpm-lsun-church", timesteps=10) - -import ipdb; ipdb.set_trace() - -model = UNetModel.from_pretrained("/home/patrick/ddpm-lsun-church").to(device) - - -torch.manual_seed(0) -next_image = scheduler.sample_noise((1, model.in_channels, model.resolution, model.resolution), device=device) - -for t in tqdm.tqdm(reversed(range(len(scheduler))), total=len(scheduler)): - # define coefficients for time step t - clip_image_coeff = 1 / torch.sqrt(scheduler.get_alpha_prod(t)) - clip_noise_coeff = torch.sqrt(1 / scheduler.get_alpha_prod(t) - 1) - image_coeff = (1 - scheduler.get_alpha_prod(t - 1)) * torch.sqrt(scheduler.get_alpha(t)) / (1 - scheduler.get_alpha_prod(t)) - clip_coeff = torch.sqrt(scheduler.get_alpha_prod(t - 1)) * scheduler.get_beta(t) / (1 - scheduler.get_alpha_prod(t)) - - # predict noise residual - with torch.no_grad(): - noise_residual = model(next_image, t) - - # compute prev image from noise - pred_mean = clip_image_coeff * next_image - clip_noise_coeff * noise_residual - pred_mean = torch.clamp(pred_mean, -1, 1) - image = clip_coeff * pred_mean + image_coeff * next_image - - # sample variance - variance = scheduler.sample_variance(t, image.shape, device=device) - - # sample previous image - sampled_image = image + variance - - next_image = sampled_image - - -image = post_process_to_image(next_image) -image.save("example_new.png") diff --git a/models/vision/ddpm/example.py b/models/vision/ddpm/example.py index 2ba753c385..c086407490 100755 --- a/models/vision/ddpm/example.py +++ b/models/vision/ddpm/example.py @@ -1,20 +1,23 @@ #!/usr/bin/env python3 -import tempfile -import sys - +import os +import pathlib from modeling_ddpm import DDPM - -model_id = sys.argv[1] - -ddpm = DDPM.from_pretrained(model_id) -image = ddpm() - import PIL.Image import numpy as np -image_processed = image.cpu().permute(0, 2, 3, 1) -image_processed = (image_processed + 1.0) * 127.5 -image_processed = image_processed.numpy().astype(np.uint8) -image_pil = PIL.Image.fromarray(image_processed[0]) -image_pil.save("test.png") -import ipdb; ipdb.set_trace() +model_ids = ["ddpm-lsun-cat", "ddpm-lsun-cat-ema", "ddpm-lsun-church-ema", "ddpm-lsun-church", "ddpm-lsun-bedroom", "ddpm-lsun-bedroom-ema", "ddpm-cifar10-ema", "ddpm-cifar10", "ddpm-celeba-hq", "ddpm-celeba-hq-ema"] + +for model_id in model_ids: + path = os.path.join("/home/patrick/images/hf", model_id) + pathlib.Path(path).mkdir(parents=True, exist_ok=True) + + ddpm = DDPM.from_pretrained("fusing/" + model_id) + image = ddpm(batch_size=4) + + image_processed = image.cpu().permute(0, 2, 3, 1) + image_processed = (image_processed + 1.0) * 127.5 + image_processed = image_processed.numpy().astype(np.uint8) + + for i in range(image_processed.shape[0]): + image_pil = PIL.Image.fromarray(image_processed[i]) + image_pil.save(os.path.join(path, f"image_{i}.png")) diff --git a/models/vision/ddpm/modeling_ddpm.py b/models/vision/ddpm/modeling_ddpm.py index ae049a8c0a..a10feaba40 100644 --- a/models/vision/ddpm/modeling_ddpm.py +++ b/models/vision/ddpm/modeling_ddpm.py @@ -27,12 +27,13 @@ class DDPM(DiffusionPipeline): super().__init__() self.register_modules(unet=unet, noise_scheduler=noise_scheduler) - def __call__(self, generator=None, torch_device=None): - torch_device = "cuda" if torch.cuda.is_available() else "cpu" + def __call__(self, batch_size=1, generator=None, torch_device=None): + if torch_device is None: + torch_device = "cuda" if torch.cuda.is_available() else "cpu" self.unet.to(torch_device) # 1. Sample gaussian noise - image = self.noise_scheduler.sample_noise((1, self.unet.in_channels, self.unet.resolution, self.unet.resolution), device=torch_device, generator=generator) + image = self.noise_scheduler.sample_noise((batch_size, self.unet.in_channels, self.unet.resolution, self.unet.resolution), device=torch_device, generator=generator) for t in tqdm.tqdm(reversed(range(len(self.noise_scheduler))), total=len(self.noise_scheduler)): # i) define coefficients for time step t clip_image_coeff = 1 / torch.sqrt(self.noise_scheduler.get_alpha_prod(t)) diff --git a/src/diffusers/schedulers/gaussian_ddpm.py b/src/diffusers/schedulers/gaussian_ddpm.py index 4fcdfdf2bd..2a25cbbfc9 100644 --- a/src/diffusers/schedulers/gaussian_ddpm.py +++ b/src/diffusers/schedulers/gaussian_ddpm.py @@ -108,7 +108,7 @@ class GaussianDDPMScheduler(nn.Module, ConfigMixin): def sample_variance(self, time_step, shape, device, generator=None): variance = self.log_variance[time_step] - nonzero_mask = torch.tensor([1 - (time_step == 0)], device=device).float()[None, :].repeat(shape[0], 1) + nonzero_mask = torch.tensor([1 - (time_step == 0)], device=device).float()[None, :] noise = self.sample_noise(shape, device=device, generator=generator) diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py index 4655c96749..6dce91ae4b 100755 --- a/tests/test_modeling_utils.py +++ b/tests/test_modeling_utils.py @@ -76,7 +76,7 @@ def floats_tensor(shape, scale=1.0, rng=None, name=None): class ModelTesterMixin(unittest.TestCase): @property def dummy_input(self): - batch_size = 1 + batch_size = 4 num_channels = 3 sizes = (32, 32)