1
0
mirror of https://github.com/vladmandic/sdnext.git synced 2026-01-29 05:02:09 +03:00
Files
sdnext/cli/modules/process.py
2023-02-08 11:53:39 -05:00

369 lines
15 KiB
Python
Executable File

#!/bin/env python
"""
process people images
- check image resolution
- runs detection of face and body
- extracts crop and performs checks:
- visible: is face or body detected
- in frame: for face based on box, for body based on number of visible keypoints
- resolution: is cropped image still of sufficient resolution
- blur: is image sharp enough
- dynamic range: is image bright enough
- similarity: compares image to all previously processed images to see if its unique enough
- images are resized and optionally squared
- face additionally runs through semantic segmentation to remove background
- if image passes checks
image padded and saved as extracted image
- body requires that face is detected and in-frame,
but does not have to pass all other checks as body performs its own checks
- runs clip interrogation on extracted images to generate filewords
"""
import os
import io
import math
import base64
import pathlib
import argparse
import logging
import filetype
import numpy as np
import mediapipe as mp
from PIL import Image, ImageOps
from skimage.metrics import structural_similarity as ssim
from scipy.stats import beta
from util import log, Map
from sdapi import postsync
params = Map({
'src': '', # source folder
'dst': '', # destination folder
'format': '.png', # image format
'extract_face': True, # extract face from image
'extract_body': True, # extract face from image
'clear_dst': True, # remove all files from destination at the start
'target_size': 512, # target resolution
'square_images': True, # should output images be squared
'blur_samplesize': 60, # sample size to use for blur detection
'face_score': 0.7, # min face detection score
'face_pad': 0.07, # pad face image percentage
'face_model': 1, # which face model to use 0/close-up 1/standard
'face_blur_score': 1.5, # max score for face blur detection
'face_range_score': 0.2, # min score for face dynamic range detection
'body_score': 0.9, # min body detection score
'body_visibility': 0.5, # min visibility score for each detected body part
'body_parts': 15, # min number of detected body parts with sufficient visibility
'body_pad': 0.2, # pad body image percentage
'body_model': 2, # body model to use 0/low 1/medium 2/high
'body_blur_score': 1.8, # max score for body blur detection
'body_range_score': 0.2, # min score for body dynamic range detection
'segmentation_face': False, # segmentation enabled
'segmentation_body': False, # segmentation enabled
'segmentation_model': 0, # segmentation model 0/general 1/landscape
'segmentation_background': (192, 192, 192), # segmentation background color
'similarity_score': 0.6, # maximum similarity score before image is discarded
'similarity_size': 64, # base similarity detection on reduced images
'interrogate_model': 'clip' # interrogate model
})
def detect_blur(image):
# based on <https://github.com/karthik9319/Blur-Detection/>
bw = ImageOps.grayscale(image)
cx, cy = image.size[0] // 2, image.size[1] // 2
fft = np.fft.fft2(bw)
fftShift = np.fft.fftshift(fft)
fftShift[cy - params.blur_samplesize: cy + params.blur_samplesize, cx - params.blur_samplesize: cx + params.blur_samplesize] = 0
fftShift = np.fft.ifftshift(fftShift)
recon = np.fft.ifft2(fftShift)
magnitude = np.log(np.abs(recon))
mean = round(np.mean(magnitude), 2)
return mean
def detect_dynamicrange(image):
# based on <https://towardsdatascience.com/measuring-enhancing-image-quality-attributes-234b0f250e10>
data = np.asarray(image)
image = np.float32(data)
RGB = [0.299, 0.587, 0.114]
height, width = image.shape[:2]
brightness_image = np.sqrt(image[..., 0] ** 2 * RGB[0] + image[..., 1] ** 2 * RGB[1] + image[..., 2] ** 2 * RGB[2])
hist, _ = np.histogram(brightness_image, bins=256, range=(0, 255))
img_brightness_pmf = hist / (height * width)
dist = beta(2, 2)
ys = dist.pdf(np.linspace(0, 1, 256))
ref_pmf = ys / np.sum(ys)
dot_product = np.dot(ref_pmf, img_brightness_pmf)
squared_dist_a = np.sum(ref_pmf ** 2)
squared_dist_b = np.sum(img_brightness_pmf ** 2)
res = dot_product / math.sqrt(squared_dist_a * squared_dist_b)
return round(res, 2)
images = []
def detect_simmilar(image):
img = image.resize((params.similarity_size, params.similarity_size))
img = ImageOps.grayscale(img)
data = np.array(img)
similarity = 0
for i in images:
val = ssim(data, i, data_range=255, channel_axis=None, gradient=False, full=False)
if val > similarity:
similarity = val
images.append(data)
return similarity
def segmentation(image):
with mp.solutions.selfie_segmentation.SelfieSegmentation(model_selection=params.segmentation_model) as selfie_segmentation:
data = np.array(image)
results = selfie_segmentation.process(data)
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
background = np.zeros(data.shape, dtype=np.uint8)
background[:] = params.segmentation_background
data = np.where(condition, data, background) # consider using a joint bilateral filter instead of pure combine
segmented = Image.fromarray(data)
return segmented
def extract_face(img):
if not params.extract_face:
return None, True
if img.mode == 'RGBA':
img = img.convert('RGB')
scale = max(img.size[0], img.size[1]) / params.target_size
resized = img.copy()
resized.thumbnail((params.target_size, params.target_size), Image.HAMMING)
with mp.solutions.face_detection.FaceDetection(min_detection_confidence=params.face_score, model_selection=params.face_model) as face:
results = face.process(np.array(resized))
if results.detections is None:
return None, False
box = results.detections[0].location_data.relative_bounding_box
if box.xmin < 0 or box.ymin < 0 or (box.width - box.xmin) > 1 or (box.height - box.ymin) > 1:
log.info({ 'process face skip': 'out of frame' })
return None, False
x = (box.xmin - params.face_pad / 2) * resized.width
y = (box.ymin - params.face_pad / 2)* resized.height
w = (box.width + params.face_pad) * resized.width
h = (box.height + params.face_pad) * resized.height
cx = x + w / 2
cy = y + h / 2
l = max(w, h) / 2
square = [scale * (cx - l), scale * (cy - l), scale * (cx + l), scale * (cy + l)]
square = [max(square[0], 0), max(square[1], 0), min(square[2], img.width), min(square[3], img.height)]
cropped = img.crop(tuple(square))
if cropped.size[0] < params.target_size and cropped.size[1] < params.target_size:
log.info({ 'process face skip': 'low resolution', 'size': [cropped.size[0], cropped.size[1]] })
return None, True
cropped.thumbnail((params.target_size, params.target_size), Image.HAMMING)
if params.square_images:
squared = Image.new('RGB', (params.target_size, params.target_size))
squared.paste(cropped, (0, 0))
if params.segmentation_face:
squared = segmentation(squared)
else:
squared = cropped
blur = detect_blur(squared)
if blur > params.face_blur_score:
log.info({ 'process face skip': 'blur check fail', 'blur': blur })
return None, True
else:
log.debug({ 'process face blur': blur })
range = detect_dynamicrange(squared)
if range < params.face_range_score:
log.info({ 'process face skip': 'dynamic range check fail', 'range': range })
return None, True
else:
log.debug({ 'process face dynamic range': range })
similarity = detect_simmilar(squared)
if similarity > params.similarity_score:
log.info({ 'process face skip': 'similarity check fail', 'score': round(similarity, 2) })
return None, True
return squared, True
def extract_body(img):
if not params.extract_body:
return None, True
if img.mode == 'RGBA':
img = img.convert('RGB')
scale = max(img.size[0], img.size[1]) / params.target_size
resized = img.copy()
resized.thumbnail((params.target_size, params.target_size), Image.HAMMING)
with mp.solutions.pose.Pose(static_image_mode=True, min_detection_confidence=params.body_score, model_complexity=params.body_model) as pose:
results = pose.process(np.array(resized))
if results.pose_landmarks is None:
return None, False
x = [resized.width * (i.x - params.body_pad / 2) for i in results.pose_landmarks.landmark if i.visibility > params.body_visibility]
y = [resized.height * (i.y - params.body_pad / 2) for i in results.pose_landmarks.landmark if i.visibility > params.body_visibility]
if len(x) < params.body_parts:
log.info({ 'process body skip': 'insufficient body parts', 'detected': len(x) })
return None, True
w = max(x) - min(x) + resized.width * params.body_pad
h = max(y) - min(y) + resized.height * params.body_pad
cx = min(x) + w / 2
cy = min(y) + h / 2
l = max(w, h) / 2
square = [scale * (cx - l), scale * (cy - l), scale * (cx + l), scale * (cy + l)]
square = [max(square[0], 0), max(square[1], 0), min(square[2], img.width), min(square[3], img.height)]
cropped = img.crop(tuple(square))
if cropped.size[0] < params.target_size and cropped.size[1] < params.target_size:
log.info({ 'process body skip': 'low resolution', 'size': [cropped.size[0], cropped.size[1]] })
return None, True
cropped.thumbnail((params.target_size, params.target_size), Image.HAMMING)
if params.square_images:
squared = Image.new('RGB', (params.target_size, params.target_size))
squared.paste(cropped, (0, 0))
if params.segmentation_body:
squared = segmentation(squared)
else:
squared = cropped
blur = detect_blur(squared)
if blur > params.body_blur_score:
log.info({ 'process body skip': 'blur check fail', 'blur': blur })
return None, True
else:
log.debug({ 'process body blur': blur })
range = detect_dynamicrange(squared)
if range < params.body_range_score:
log.info({ 'process body skip': 'dynamic range check fail', 'range': range })
return None, True
else:
log.debug({ 'process body dynamic range': range })
similarity = detect_simmilar(squared)
if similarity > params.similarity_score:
log.info({ 'process body skip': 'similarity check fail', 'score': similarity })
return None, True
return squared, True
def interrogate(img, fn):
def encode(f):
with io.BytesIO() as stream:
img.save(stream, 'JPEG')
values = stream.getvalue()
encoded = base64.b64encode(values).decode()
return encoded
if params.interrogate_model is None or params.interrogate_model == '':
return
json = Map({ 'image': encode(img), 'model': params.interrogate_model })
res = postsync('/sdapi/v1/interrogate', json)
caption = res.caption if 'caption' in res else ''
log.info({ 'interrogate': caption })
file = fn.replace(params.format, '.txt')
f = open(file, 'w')
f.write(caption)
f.close()
i = {}
def process_file(f: str, dst: str = None, preview: bool = False, offline: bool = False):
def save(img, f, what):
i[what] = i.get(what, 0) + 1
if dst is None:
dir = os.path.dirname(f)
else:
dir = dst
base = os.path.basename(f).split('.')[0]
fn = os.path.join(dir, str(i[what]).rjust(3, '0') + '-' + what + '-' + base + params.format)
# log.debug({ 'save': fn })
if not preview:
img.save(fn)
if not offline:
interrogate(img, fn)
return fn
log.info({ 'processing': f })
try:
image = Image.open(f)
except Exception as err:
log.error({ 'image': f, 'error': err })
return 0, 0
image = ImageOps.exif_transpose(image) # rotate image according to EXIF orientation
if image.width < 512 or image.height < 512:
log.info({ 'process skip': 'low resolution', 'resolution': [image.width, image.height] })
return
log.debug({ 'resolution': [image.width, image.height], 'mp': round((image.width * image.height) / 1024 / 1024, 1) })
face, ok = extract_face(image)
if face is not None:
fn = save(face, f, 'face')
log.info({ 'extract face': fn })
else:
log.debug({ 'no face': f })
if not ok:
return
body, ok = extract_body(image)
if body is not None:
fn = save(body, f, 'body')
log.info({ 'extract body': fn })
else:
log.debug({ 'no body': f })
image.close()
return i
def process_images(src: str, dst: str, args = None):
params.src = src
params.dst = dst
if args is not None:
params.update(args)
log.info({ 'processing': params })
if not os.path.isdir(src):
log.error({ 'process': 'not a folder', 'src': src })
else:
if os.path.isdir(dst) and params.clear_dst:
log.info({ 'clear dst': dst })
i = [os.path.join(dst, f) for f in os.listdir(dst) if os.path.isfile(os.path.join(dst, f)) and filetype.is_image(os.path.join(dst, f))]
for f in i:
os.remove(f)
pathlib.Path(dst).mkdir(parents=True, exist_ok=True)
for root, _sub_dirs, files in os.walk(src):
for f in files:
process_file(os.path.join(root, f), dst)
return i
if __name__ == '__main__':
# log.setLevel(logging.DEBUG)
parser = argparse.ArgumentParser(description = 'image watermarking')
parser.add_argument('--output', type=str, required=True, help='folder to store images')
parser.add_argument('--preview', default=False, action='store_true', help = "run processing but do not store results")
parser.add_argument('--offline', default=False, action='store_true', help = "run only processing steps that do not require running server")
parser.add_argument('--debug', default=False, action='store_true', help = "enable debug logging")
parser.add_argument('input', type=str, nargs='*')
args = parser.parse_args()
params.dst = args.output
if args.debug:
log.setLevel(logging.DEBUG)
log.debug({ 'debug': True })
log.info({ 'processing': params })
if not os.path.exists(params.dst) and not args.preview:
pathlib.Path(params.dst).mkdir(parents=True, exist_ok=True)
files = []
for loc in args.input:
if os.path.isfile(loc):
files.append(loc)
elif os.path.isdir(loc):
for root, _sub_dirs, dir in os.walk(loc):
for f in dir:
files.append(os.path.join(root, f))
for f in files:
process_file(f, params.dst, args.preview, args.offline)
log.info({ 'processed': i, 'inputs': len(files) })