mirror of
https://github.com/vladmandic/sdnext.git
synced 2026-01-27 15:02:48 +03:00
71 lines
2.7 KiB
Python
71 lines
2.7 KiB
Python
import cv2
|
|
import torch
|
|
import torch.nn.functional as F
|
|
import numpy as np
|
|
from PIL import Image
|
|
from modules import devices, masking
|
|
from modules.shared import opts
|
|
|
|
|
|
class DepthAnythingDetector:
|
|
"""https://github.com/LiheYoung/Depth-Anything"""
|
|
def __init__(self, model):
|
|
from torchvision.transforms import Compose
|
|
from modules.control.proc.depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
|
|
self.model = model
|
|
self.transform = Compose([
|
|
Resize(
|
|
width=518,
|
|
height=518,
|
|
resize_target=False,
|
|
keep_aspect_ratio=True,
|
|
ensure_multiple_of=14,
|
|
resize_method="lower_bound",
|
|
image_interpolation_method=cv2.INTER_CUBIC,
|
|
),
|
|
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
PrepareForNet()])
|
|
|
|
@classmethod
|
|
def from_pretrained(cls, pretrained_model_or_path: str, cache_dir: str, local_files_only=False) -> str:
|
|
from modules.control.proc.depth_anything.dpt import DPT_DINOv2
|
|
import huggingface_hub as hf
|
|
model = (
|
|
DPT_DINOv2(
|
|
encoder="vitl",
|
|
features=256,
|
|
out_channels=[256, 512, 1024, 1024],
|
|
localhub=False,
|
|
)
|
|
.to(devices.device)
|
|
.eval()
|
|
)
|
|
model_path = hf.hf_hub_download(repo_id=pretrained_model_or_path, filename="pytorch_model.bin", cache_dir=cache_dir, local_files_only=local_files_only)
|
|
model_dict = torch.load(model_path)
|
|
model.load_state_dict(model_dict)
|
|
return cls(model)
|
|
|
|
def __call__(self, image, color_map: str = "none", output_type: str = 'pil'):
|
|
self.model.to(devices.device)
|
|
if isinstance(image, Image.Image):
|
|
image = np.array(image)
|
|
h, w = image.shape[:2]
|
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
|
|
image = self.transform({ "image": image })["image"]
|
|
image = torch.from_numpy(image).unsqueeze(0).to(devices.device)
|
|
with devices.inference_context():
|
|
depth = self.model(image)
|
|
if opts.control_move_processor:
|
|
self.model.to('cpu')
|
|
depth = F.interpolate(depth[None], (h, w), mode="bilinear", align_corners=False)[0, 0]
|
|
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
|
depth = depth.cpu().numpy().astype(np.uint8)
|
|
if color_map != 'none':
|
|
depth = cv2.applyColorMap(depth, masking.COLORMAP.index(color_map))[:, :, ::-1]
|
|
if output_type == "pil":
|
|
depth = Image.fromarray(depth)
|
|
return depth
|
|
|
|
# def unload_model(self):
|
|
# self.model.to("cpu")
|