From 05be622b1c152bf11026f97f083bb1b989231aec Mon Sep 17 00:00:00 2001
From: Jonah <97376233+jonahclarsen@users.noreply.github.com>
Date: Thu, 30 May 2024 20:59:49 -0300
Subject: [PATCH] Fix depth pipeline "input/weight type should be the same"
 error at fp16 (#8321)

Fix "input/weight type should be the same"

Co-authored-by: YiYi Xu <yixu310@gmail.com>
---
 .../stable_diffusion/pipeline_stable_diffusion_depth2img.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
index 17d4ee3a7f..8e1e3ab319 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@@ -546,7 +546,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
 
         if depth_map is None:
             pixel_values = self.feature_extractor(images=image, return_tensors="pt").pixel_values
-            pixel_values = pixel_values.to(device=device)
+            pixel_values = pixel_values.to(device=device, dtype=dtype)
             # The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16.
             # So we use `torch.autocast` here for half precision inference.
             if torch.backends.mps.is_available():