mirror of
https://github.com/vladmandic/sdnext.git
synced 2026-01-29 05:02:09 +03:00
501 lines
20 KiB
Python
Executable File
501 lines
20 KiB
Python
Executable File
#!/bin/env python
|
|
"""
|
|
process people images
|
|
- check image resolution
|
|
- runs detection of face and body
|
|
- extracts crop and performs checks:
|
|
- visible: is face or body detected
|
|
- in frame: for face based on box, for body based on number of visible keypoints
|
|
- resolution: is cropped image still of sufficient resolution
|
|
- optionaly upsample and restore face quality
|
|
- blur: is image sharp enough
|
|
- dynamic range: is image bright enough
|
|
- similarity: compares image to all previously processed images to see if its unique enough
|
|
- images are resized and optionally squared
|
|
- face additionally runs through semantic segmentation to remove background
|
|
- if image passes checks
|
|
image padded and saved as extracted image
|
|
- body requires that face is detected and in-frame,
|
|
but does not have to pass all other checks as body performs its own checks
|
|
- runs clip interrogation on extracted images to generate filewords
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import io
|
|
import math
|
|
import base64
|
|
import pathlib
|
|
import argparse
|
|
import logging
|
|
import filetype
|
|
import numpy as np
|
|
import mediapipe as mp
|
|
from PIL import Image, ImageOps
|
|
from skimage.metrics import structural_similarity as ssim
|
|
from scipy.stats import beta
|
|
sys.path.append(os.path.join(os.path.dirname(__file__)))
|
|
|
|
from util import log, Map
|
|
from sdapi import postsync
|
|
|
|
|
|
params = Map({
|
|
# general settings, do not modify
|
|
'src': '', # source folder
|
|
'dst': '', # destination folder
|
|
'clear_dst': True, # remove all files from destination at the start
|
|
'format': '.jpg', # image format
|
|
'target_size': 512, # target resolution
|
|
'square_images': True, # should output images be squared
|
|
'segmentation_model': 0, # segmentation model 0/general 1/landscape
|
|
'segmentation_background': (192, 192, 192), # segmentation background color
|
|
'blur_samplesize': 60, # sample size to use for blur detection
|
|
'similarity_size': 64, # base similarity detection on reduced images
|
|
# original image processing settings
|
|
'keep_original': False, # keep original image
|
|
# face processing settings
|
|
'extract_face': False, # extract face from image
|
|
'face_score': 0.7, # min face detection score
|
|
'face_pad': 0.1, # pad face image percentage
|
|
'face_model': 1, # which face model to use 0/close-up 1/standard
|
|
'face_blur': False, # check for body blur
|
|
'face_blur_score': 1.5, # max score for face blur detection
|
|
'face_range': False, # check for body blur
|
|
'face_range_score': 0.15, # min score for face dynamic range detection
|
|
'face_restore': False, # attempt to restore face quality
|
|
'face_upscale': False, # attempt to scale small faces
|
|
'face_segmentation': False, # segmentation enabled
|
|
# body processing settings
|
|
'extract_body': False, # extract body from image
|
|
'body_score': 0.9, # min body detection score
|
|
'body_visibility': 0.5, # min visibility score for each detected body part
|
|
'body_parts': 15, # min number of detected body parts with sufficient visibility
|
|
'body_pad': 0.2, # pad body image percentage
|
|
'body_model': 2, # body model to use 0/low 1/medium 2/high
|
|
'body_blur': False, # check for body blur
|
|
'body_blur_score': 1.8, # max score for body blur detection
|
|
'face_range': False, # check for body blur
|
|
'body_range_score': 0.15, # min score for body dynamic range detection
|
|
'body_segmentation': False, # segmentation enabled
|
|
# similarity detection settings
|
|
'similarity_score': 0.8, # maximum similarity score before image is discarded
|
|
# interrogate settings
|
|
'interrogate_model': ['clip', 'deepdanbooru'], # interrogate models
|
|
'interrogate_captions': True, # write captions to file
|
|
'tag_limit': 5, # number of tags to extract
|
|
})
|
|
face_model = None
|
|
body_model = None
|
|
segmentation_model = None
|
|
|
|
|
|
def detect_blur(image):
|
|
# based on <https://github.com/karthik9319/Blur-Detection/>
|
|
bw = ImageOps.grayscale(image)
|
|
cx, cy = image.size[0] // 2, image.size[1] // 2
|
|
fft = np.fft.fft2(bw)
|
|
fftShift = np.fft.fftshift(fft)
|
|
fftShift[cy - params.blur_samplesize: cy + params.blur_samplesize, cx - params.blur_samplesize: cx + params.blur_samplesize] = 0
|
|
fftShift = np.fft.ifftshift(fftShift)
|
|
recon = np.fft.ifft2(fftShift)
|
|
magnitude = np.log(np.abs(recon))
|
|
mean = round(np.mean(magnitude), 2)
|
|
return mean
|
|
|
|
|
|
def detect_dynamicrange(image):
|
|
# based on <https://towardsdatascience.com/measuring-enhancing-image-quality-attributes-234b0f250e10>
|
|
data = np.asarray(image)
|
|
image = np.float32(data)
|
|
RGB = [0.299, 0.587, 0.114]
|
|
height, width = image.shape[:2]
|
|
brightness_image = np.sqrt(image[..., 0] ** 2 * RGB[0] + image[..., 1] ** 2 * RGB[1] + image[..., 2] ** 2 * RGB[2])
|
|
hist, _ = np.histogram(brightness_image, bins=256, range=(0, 255))
|
|
img_brightness_pmf = hist / (height * width)
|
|
dist = beta(2, 2)
|
|
ys = dist.pdf(np.linspace(0, 1, 256))
|
|
ref_pmf = ys / np.sum(ys)
|
|
dot_product = np.dot(ref_pmf, img_brightness_pmf)
|
|
squared_dist_a = np.sum(ref_pmf ** 2)
|
|
squared_dist_b = np.sum(img_brightness_pmf ** 2)
|
|
res = dot_product / math.sqrt(squared_dist_a * squared_dist_b)
|
|
return round(res, 2)
|
|
|
|
|
|
images = []
|
|
def detect_simmilar(image):
|
|
img = image.resize((params.similarity_size, params.similarity_size))
|
|
img = ImageOps.grayscale(img)
|
|
data = np.array(img)
|
|
similarity = 0
|
|
for i in images:
|
|
val = ssim(data, i, data_range=255, channel_axis=None, gradient=False, full=False)
|
|
if val > similarity:
|
|
similarity = val
|
|
images.append(data)
|
|
return similarity
|
|
|
|
|
|
def segmentation(image):
|
|
global segmentation_model
|
|
if segmentation_model is None:
|
|
segmentation_model = mp.solutions.selfie_segmentation.SelfieSegmentation(model_selection=params.segmentation_model)
|
|
data = np.array(image)
|
|
results = segmentation_model.process(data)
|
|
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
|
|
background = np.zeros(data.shape, dtype=np.uint8)
|
|
background[:] = params.segmentation_background
|
|
data = np.where(condition, data, background) # consider using a joint bilateral filter instead of pure combine
|
|
segmented = Image.fromarray(data)
|
|
return segmented
|
|
|
|
|
|
def extract_face(img):
|
|
if not params.extract_face:
|
|
return None, True
|
|
if img.mode == 'RGBA':
|
|
img = img.convert('RGB')
|
|
scale = max(img.size[0], img.size[1]) / params.target_size
|
|
resized = img.copy()
|
|
resized.thumbnail((params.target_size, params.target_size), Image.HAMMING)
|
|
|
|
global face_model
|
|
if face_model is None:
|
|
face_model = mp.solutions.face_detection.FaceDetection(min_detection_confidence=params.face_score, model_selection=params.face_model)
|
|
results = face_model.process(np.array(resized))
|
|
if results.detections is None:
|
|
return None, False
|
|
box = results.detections[0].location_data.relative_bounding_box
|
|
if box.xmin < 0 or box.ymin < 0 or (box.width - box.xmin) > 1 or (box.height - box.ymin) > 1:
|
|
log.info({ 'process face skip': 'out of frame' })
|
|
return None, False
|
|
x = (box.xmin - params.face_pad / 2) * resized.width
|
|
y = (box.ymin - params.face_pad / 2)* resized.height
|
|
w = (box.width + params.face_pad) * resized.width
|
|
h = (box.height + params.face_pad) * resized.height
|
|
cx = x + w / 2
|
|
cy = y + h / 2
|
|
l = max(w, h) / 2
|
|
square = [scale * (cx - l), scale * (cy - l), scale * (cx + l), scale * (cy + l)]
|
|
square = [max(square[0], 0), max(square[1], 0), min(square[2], img.width), min(square[3], img.height)]
|
|
cropped = img.crop(tuple(square))
|
|
|
|
upscale = 1
|
|
if params.face_restore or params.face_upscale:
|
|
if (cropped.size[0] < params.target_size or cropped.size[1] < params.target_size) and params.face_upscale:
|
|
upscale = 2
|
|
kwargs = Map({
|
|
'image': encode(cropped),
|
|
'upscaler_1': 'SwinIR_4x' if params.face_upscale else None,
|
|
'codeformer_visibility': 1.0 if params.face_restore else 0.0,
|
|
'codeformer_weight': 0.15 if params.face_restore else 0.0,
|
|
'upscaling_resize': upscale,
|
|
})
|
|
original = [cropped.size[0], cropped.size[1]]
|
|
res = postsync('/sdapi/v1/extra-single-image', kwargs)
|
|
if 'image' not in res:
|
|
log.error({ 'process face': 'upscale failed' })
|
|
raise ValueError('upscale failed')
|
|
cropped = Image.open(io.BytesIO(base64.b64decode(res['image'])))
|
|
kwargs.image = [cropped.size[0], cropped.size[1]]
|
|
upscaled = [cropped.size[0], cropped.size[1]]
|
|
upscale = False if upscale == 1 else { 'original': original, 'upscaled': upscaled }
|
|
log.info({ 'process face restore': params.face_restore, 'upscale': upscale })
|
|
|
|
if cropped.size[0] < params.target_size and cropped.size[1] < params.target_size:
|
|
log.info({ 'process face skip': 'low resolution', 'size': [cropped.size[0], cropped.size[1]] })
|
|
return None, True
|
|
cropped.thumbnail((params.target_size, params.target_size), Image.HAMMING)
|
|
|
|
if params.square_images:
|
|
squared = Image.new('RGB', (params.target_size, params.target_size))
|
|
squared.paste(cropped, ((params.target_size - cropped.width) // 2, (params.target_size - cropped.height) // 2))
|
|
if params.face_segmentation:
|
|
squared = segmentation(squared)
|
|
else:
|
|
squared = cropped
|
|
|
|
if params.face_blur:
|
|
blur = detect_blur(squared)
|
|
if blur > params.face_blur_score:
|
|
log.info({ 'process face skip': 'blur check fail', 'blur': blur })
|
|
return None, True
|
|
else:
|
|
log.debug({ 'process face blur': blur })
|
|
|
|
if params.face_range:
|
|
range = detect_dynamicrange(squared)
|
|
if range < params.face_range_score:
|
|
log.info({ 'process face skip': 'dynamic range check fail', 'range': range })
|
|
return None, True
|
|
else:
|
|
log.debug({ 'process face dynamic range': range })
|
|
|
|
similarity = detect_simmilar(squared)
|
|
if similarity > params.similarity_score:
|
|
log.info({ 'process face skip': 'similarity check fail', 'score': round(similarity, 2) })
|
|
return None, True
|
|
|
|
return squared, True
|
|
|
|
|
|
def extract_body(img):
|
|
if not params.extract_body:
|
|
return None, True
|
|
if img.mode == 'RGBA':
|
|
img = img.convert('RGB')
|
|
scale = max(img.size[0], img.size[1]) / params.target_size
|
|
resized = img.copy()
|
|
resized.thumbnail((params.target_size, params.target_size), Image.HAMMING)
|
|
|
|
global body_model
|
|
if body_model is None:
|
|
body_model = mp.solutions.pose.Pose(static_image_mode=True, min_detection_confidence=params.body_score, model_complexity=params.body_model)
|
|
results = body_model.process(np.array(resized))
|
|
if results.pose_landmarks is None:
|
|
return None, False
|
|
x = [resized.width * (i.x - params.body_pad / 2) for i in results.pose_landmarks.landmark if i.visibility > params.body_visibility]
|
|
y = [resized.height * (i.y - params.body_pad / 2) for i in results.pose_landmarks.landmark if i.visibility > params.body_visibility]
|
|
if len(x) < params.body_parts:
|
|
log.info({ 'process body skip': 'insufficient body parts', 'detected': len(x) })
|
|
return None, True
|
|
w = max(x) - min(x) + resized.width * params.body_pad
|
|
h = max(y) - min(y) + resized.height * params.body_pad
|
|
cx = min(x) + w / 2
|
|
cy = min(y) + h / 2
|
|
l = max(w, h) / 2
|
|
square = [scale * (cx - l), scale * (cy - l), scale * (cx + l), scale * (cy + l)]
|
|
square = [max(square[0], 0), max(square[1], 0), min(square[2], img.width), min(square[3], img.height)]
|
|
cropped = img.crop(tuple(square))
|
|
if cropped.size[0] < params.target_size and cropped.size[1] < params.target_size:
|
|
log.info({ 'process body skip': 'low resolution', 'size': [cropped.size[0], cropped.size[1]] })
|
|
return None, True
|
|
cropped.thumbnail((params.target_size, params.target_size), Image.HAMMING)
|
|
|
|
if params.square_images:
|
|
squared = Image.new('RGB', (params.target_size, params.target_size))
|
|
squared.paste(cropped, ((params.target_size - cropped.width) // 2, (params.target_size - cropped.height) // 2))
|
|
if params.body_segmentation:
|
|
squared = segmentation(squared)
|
|
else:
|
|
squared = cropped
|
|
|
|
if params.body_blur:
|
|
blur = detect_blur(squared)
|
|
if blur > params.body_blur_score:
|
|
log.info({ 'process body skip': 'blur check fail', 'blur': blur })
|
|
return None, True
|
|
else:
|
|
log.debug({ 'process body blur': blur })
|
|
|
|
if params.body_range:
|
|
range = detect_dynamicrange(squared)
|
|
if range < params.body_range_score:
|
|
log.info({ 'process body skip': 'dynamic range check fail', 'range': range })
|
|
return None, True
|
|
else:
|
|
log.debug({ 'process body dynamic range': range })
|
|
|
|
similarity = detect_simmilar(squared)
|
|
if similarity > params.similarity_score:
|
|
log.info({ 'process body skip': 'similarity check fail', 'score': round(similarity, 2) })
|
|
return None, True
|
|
|
|
return squared, True
|
|
|
|
|
|
def save_original(img):
|
|
if img.mode == 'RGBA':
|
|
img = img.convert('RGB')
|
|
resized = img.copy()
|
|
resized.thumbnail((params.target_size, params.target_size), Image.HAMMING)
|
|
if params.square_images:
|
|
squared = Image.new('RGB', (params.target_size, params.target_size))
|
|
squared.paste(resized, ((params.target_size - resized.width) // 2, (params.target_size - resized.height) // 2))
|
|
else:
|
|
squared = resized
|
|
return squared
|
|
|
|
|
|
def encode(img):
|
|
with io.BytesIO() as stream:
|
|
img.save(stream, 'JPEG')
|
|
values = stream.getvalue()
|
|
encoded = base64.b64encode(values).decode()
|
|
return encoded
|
|
|
|
|
|
def interrogate(img, fn, intag = None):
|
|
if len(params.interrogate_model) == 0:
|
|
return
|
|
caption = ''
|
|
tags = []
|
|
for model in params.interrogate_model:
|
|
json = Map({ 'image': encode(img), 'model': model })
|
|
res = postsync('/sdapi/v1/interrogate', json)
|
|
if model == 'clip':
|
|
caption = res.caption if 'caption' in res else ''
|
|
caption = caption.split(',')[0].replace('a ', '')
|
|
if intag is not None:
|
|
caption = intag + ', ' + caption
|
|
if model == 'deepdanbooru':
|
|
tag = res.caption if 'caption' in res else ''
|
|
tags = tag.split(',')
|
|
tags = [t.replace('(', '').replace(')', '').replace('\\', '').split(':')[0].strip() for t in tags]
|
|
if intag is not None:
|
|
for t in intag.split(',')[::-1]:
|
|
tags.insert(0, t.strip())
|
|
if params.interrogate_captions:
|
|
file = fn.replace(params.format, '.txt')
|
|
f = open(file, 'w')
|
|
f.write(caption)
|
|
f.close()
|
|
pos = 0 if len(tags) == 0 else 1
|
|
tags.insert(pos, caption.split(' ')[1])
|
|
if len(tags) > params.tag_limit:
|
|
tags = tags[:params.tag_limit]
|
|
log.info({ 'interrogate': caption, 'tags': tags })
|
|
return caption, tags
|
|
|
|
|
|
i = {}
|
|
metadata = Map({})
|
|
|
|
# entry point when used as module
|
|
def process_file(f: str, dst: str = None, preview: bool = False, offline: bool = False, txt = None, tag = None, opts = []):
|
|
def save(img, f, what):
|
|
i[what] = i.get(what, 0) + 1
|
|
if dst is None:
|
|
dir = os.path.dirname(f)
|
|
else:
|
|
dir = dst
|
|
base = os.path.basename(f).split('.')[0]
|
|
parent = os.path.basename(pathlib.Path(dir))
|
|
basename = str(i[what]).rjust(3, '0') + '-' + what + '-' + base
|
|
fn = basename + params.format
|
|
# log.debug({ 'save': fn })
|
|
caption = ''
|
|
tags = ''
|
|
if not preview:
|
|
img.save(os.path.join(dir, fn))
|
|
if not offline:
|
|
caption, tags = interrogate(img, os.path.join(dir, fn), tag)
|
|
metadata[os.path.join(parent, basename)] = { 'caption': caption, 'tags': ','.join(tags) }
|
|
return fn
|
|
|
|
# overrides
|
|
if len(opts) > 0:
|
|
params.keep_original = True if 'original' in opts else False
|
|
params.extract_face = True if 'face' in opts else False
|
|
params.extract_body = True if 'body' in opts else False
|
|
params.face_blur = True if 'blur' in opts else False
|
|
params.body_blur = True if 'blur' in opts else False
|
|
params.face_range = True if 'range' in opts else False
|
|
params.body_range = True if 'range' in opts else False
|
|
params.face_upscale = True if 'upscale' in opts else False
|
|
params.face_restore = True if 'upscale' in opts else False
|
|
|
|
log.info({ 'processing': f })
|
|
try:
|
|
image = Image.open(f)
|
|
except Exception as err:
|
|
log.error({ 'image': f, 'error': err })
|
|
return 0, {}
|
|
|
|
image = ImageOps.exif_transpose(image) # rotate image according to EXIF orientation
|
|
if txt is not None:
|
|
params.interrogate_captions = txt
|
|
|
|
if image.width < 512 or image.height < 512:
|
|
log.info({ 'process skip': 'low resolution', 'resolution': [image.width, image.height] })
|
|
return 0, {}
|
|
log.debug({ 'resolution': [image.width, image.height], 'mp': round((image.width * image.height) / 1024 / 1024, 1) })
|
|
|
|
face, ok = extract_face(image)
|
|
if face is not None:
|
|
fn = save(face, f, 'face')
|
|
log.info({ 'extract face': fn })
|
|
else:
|
|
log.debug({ 'no face': f })
|
|
|
|
if not ok:
|
|
return 0, {}
|
|
|
|
body, ok = extract_body(image)
|
|
if body is not None:
|
|
fn = save(body, f, 'body')
|
|
log.info({ 'extract body': fn })
|
|
else:
|
|
log.debug({ 'no body': f })
|
|
|
|
if params.keep_original:
|
|
resized = save_original(image)
|
|
fn = save(resized, f, 'original')
|
|
log.info({ 'keep original': fn })
|
|
|
|
image.close()
|
|
return i, metadata
|
|
|
|
def process_images(src: str, dst: str, args = None):
|
|
params.src = src
|
|
params.dst = dst
|
|
if args is not None:
|
|
params.update(args)
|
|
log.info({ 'processing': params })
|
|
if not os.path.isdir(src):
|
|
log.error({ 'process': 'not a folder', 'src': src })
|
|
else:
|
|
if os.path.isdir(dst) and params.clear_dst:
|
|
log.info({ 'clear dst': dst })
|
|
i = [os.path.join(dst, f) for f in os.listdir(dst) if os.path.isfile(os.path.join(dst, f)) and filetype.is_image(os.path.join(dst, f))]
|
|
for f in i:
|
|
os.remove(f)
|
|
pathlib.Path(dst).mkdir(parents=True, exist_ok=True)
|
|
for root, _sub_dirs, files in os.walk(src):
|
|
for f in files:
|
|
i, _metadata = process_file(os.path.join(root, f), dst)
|
|
return i
|
|
|
|
|
|
def unload_models():
|
|
global face_model
|
|
if face_model is not None:
|
|
face_model = None
|
|
global body_model
|
|
if body_model is not None:
|
|
body_model = None
|
|
global segmentation_model
|
|
if segmentation_model is not None:
|
|
segmentation_model = None
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# log.setLevel(logging.DEBUG)
|
|
parser = argparse.ArgumentParser(description = 'dataset processor')
|
|
parser.add_argument('--output', type=str, required=True, help='folder to store images')
|
|
parser.add_argument('--preview', default=False, action='store_true', help = "run processing but do not store results")
|
|
parser.add_argument('--offline', default=False, action='store_true', help = "run only processing steps that do not require running server")
|
|
parser.add_argument('--debug', default=False, action='store_true', help = "enable debug logging")
|
|
parser.add_argument('input', type=str, nargs='*')
|
|
args = parser.parse_args()
|
|
params.dst = args.output
|
|
if args.debug:
|
|
log.setLevel(logging.DEBUG)
|
|
log.debug({ 'debug': True })
|
|
log.info({ 'processing': params })
|
|
if not os.path.exists(params.dst) and not args.preview:
|
|
pathlib.Path(params.dst).mkdir(parents=True, exist_ok=True)
|
|
files = []
|
|
for loc in args.input:
|
|
if os.path.isfile(loc):
|
|
files.append(loc)
|
|
elif os.path.isdir(loc):
|
|
for root, _sub_dirs, dir in os.walk(loc):
|
|
for f in dir:
|
|
files.append(os.path.join(root, f))
|
|
for f in files:
|
|
process_file(f, params.dst, args.preview, args.offline)
|
|
log.info({ 'processed': i, 'inputs': len(files) })
|
|
# print(json.dumps(metadata, indent=2))
|