1
0
mirror of https://github.com/vladmandic/sdnext.git synced 2026-01-27 15:02:48 +03:00
Files
sdnext/modules/processing_diffusers.py
2024-09-21 11:38:54 -04:00

327 lines
19 KiB
Python

from types import SimpleNamespace
import os
import time
import numpy as np
import torch
import torchvision.transforms.functional as TF
from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, hidiffusion
from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler
from modules.processing_args import set_pipeline_args
from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
debug('Trace: DIFFUSERS')
def process_diffusers(p: processing.StableDiffusionProcessing):
debug(f'Process diffusers args: {vars(p)}')
orig_pipeline = shared.sd_model
results = []
def is_txt2img():
return sd_models.get_diffusers_task(shared.sd_model) == sd_models.DiffusersTaskType.TEXT_2_IMAGE
def is_refiner_enabled():
return p.enable_hr and p.refiner_steps > 0 and p.refiner_start > 0 and p.refiner_start < 1 and shared.sd_refiner is not None
def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
if sd_models.get_diffusers_task(sd_model) == sd_models.DiffusersTaskType.INPAINTING and getattr(p, 'image_mask', None) is None and p.task_args.get('image_mask', None) is None and getattr(p, 'mask', None) is None:
shared.log.warning('Processing: mode=inpaint mask=None')
sd_model = sd_models.set_diffuser_pipe(sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
if shared.opts.cuda_compile_backend == "olive-ai":
sd_model = olive_check_parameters_changed(p, is_refiner_enabled())
if sd_model.__class__.__name__ == "OnnxRawPipeline":
sd_model = preprocess_onnx_pipeline(p)
nonlocal orig_pipeline
orig_pipeline = sd_model # processed ONNX pipeline should not be replaced with original pipeline.
if getattr(sd_model, "current_attn_name", None) != shared.opts.cross_attention_optimization:
shared.log.info(f"Setting attention optimization: {shared.opts.cross_attention_optimization}")
sd_models.set_diffusers_attention(sd_model)
return sd_model
# sanitize init_images
if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None:
del p.init_images
if hasattr(p, 'init_images') and not isinstance(getattr(p, 'init_images', []), list):
p.init_images = [p.init_images]
if len(getattr(p, 'init_images', [])) > 0:
while len(p.init_images) < len(p.prompts):
p.init_images.append(p.init_images[-1])
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return results
# pipeline type is set earlier in processing, but check for sanity
is_control = getattr(p, 'is_control', False) is True
has_images = len(getattr(p, 'init_images' ,[])) > 0
if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE and not has_images and not is_control:
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE) # reset pipeline
if hasattr(shared.sd_model, 'unet') and hasattr(shared.sd_model.unet, 'config') and hasattr(shared.sd_model.unet.config, 'in_channels') and shared.sd_model.unet.config.in_channels == 9 and not is_control:
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.INPAINTING) # force pipeline
if len(getattr(p, 'init_images', [])) == 0:
p.init_images = [TF.to_pil_image(torch.rand((3, getattr(p, 'height', 512), getattr(p, 'width', 512))))]
sd_models.move_model(shared.sd_model, devices.device)
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
use_refiner_start = is_txt2img() and is_refiner_enabled() and not p.is_hr_pass and p.refiner_start > 0 and p.refiner_start < 1
use_denoise_start = not is_txt2img() and p.refiner_start > 0 and p.refiner_start < 1
shared.sd_model = update_pipeline(shared.sd_model, p)
shared.log.info(f'Base: class={shared.sd_model.__class__.__name__}')
update_sampler(p, shared.sd_model)
base_args = set_pipeline_args(
p=p,
model=shared.sd_model,
prompts=p.prompts,
negative_prompts=p.negative_prompts,
prompts_2=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
negative_prompts_2=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
num_inference_steps=calculate_base_steps(p, use_refiner_start=use_refiner_start, use_denoise_start=use_denoise_start),
eta=shared.opts.scheduler_eta,
guidance_scale=p.cfg_scale,
guidance_rescale=p.diffusers_guidance_rescale,
denoising_start=0 if use_refiner_start else p.refiner_start if use_denoise_start else None,
denoising_end=p.refiner_start if use_refiner_start else 1 if use_denoise_start else None,
output_type='latent' if hasattr(shared.sd_model, 'vae') else 'np',
# output_type='pil',
clip_skip=p.clip_skip,
desc='Base',
)
shared.state.sampling_steps = base_args.get('prior_num_inference_steps', None) or p.steps or base_args.get('num_inference_steps', None)
if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1:
p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta
output = None
try:
t0 = time.time()
sd_models_compile.check_deepcache(enable=True)
sd_models.move_model(shared.sd_model, devices.device)
hidiffusion.apply(p, shared.sd_model_type)
# if 'image' in base_args:
# base_args['image'] = set_latents(p)
if hasattr(shared.sd_model, 'tgate') and getattr(p, 'gate_step', -1) > 0:
base_args['gate_step'] = p.gate_step
output = shared.sd_model.tgate(**base_args) # pylint: disable=not-callable
else:
output = shared.sd_model(**base_args)
if isinstance(output, dict):
output = SimpleNamespace(**output)
hidiffusion.unapply()
sd_models_compile.openvino_post_compile(op="base") # only executes on compiled vino models
sd_models_compile.check_deepcache(enable=False)
if shared.cmd_opts.profile:
t1 = time.time()
shared.log.debug(f'Profile: pipeline call: {t1-t0:.2f}')
if not hasattr(output, 'images') and hasattr(output, 'frames'):
if hasattr(output.frames[0], 'shape'):
shared.log.debug(f'Generated: frames={output.frames[0].shape[1]}')
else:
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
output.images = output.frames[0]
if isinstance(output.images, np.ndarray):
output.images = torch.from_numpy(output.images)
except AssertionError as e:
shared.log.info(e)
except ValueError as e:
shared.state.interrupted = True
shared.log.error(f'Processing: args={base_args} {e}')
if shared.cmd_opts.debug:
errors.display(e, 'Processing')
except RuntimeError as e:
shared.state.interrupted = True
shared.log.error(f'Processing: args={base_args} {e}')
errors.display(e, 'Processing')
if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings
p.extra_generation_params['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used)
shared.state.nextjob()
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return results
# optional second pass
if p.enable_hr:
p.is_hr_pass = True
p.init_hr(p.hr_scale, p.hr_upscaler, force=p.hr_force)
prev_job = shared.state.job
# hires runs on original pipeline
if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None:
shared.sd_model.restore_pipeline()
# upscale
if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_resize_mode >0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
shared.log.info(f'Upscale: mode={p.hr_resize_mode} upscaler="{p.hr_upscaler}" context="{p.hr_resize_context}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}')
p.ops.append('upscale')
if shared.opts.samples_save and not p.do_not_save_samples and shared.opts.save_images_before_highres_fix and hasattr(shared.sd_model, 'vae'):
save_intermediate(p, latents=output.images, suffix="-before-hires")
shared.state.job = 'Upscale'
output.images = resize_hires(p, latents=output.images)
sd_hijack_hypertile.hypertile_set(p, hr=True)
latent_upscale = shared.latent_upscale_modes.get(p.hr_upscaler, None)
if (latent_upscale is not None or p.hr_force) and getattr(p, 'hr_denoising_strength', p.denoising_strength) > 0:
p.ops.append('hires')
sd_models_compile.openvino_recompile_model(p, hires=True, refiner=False)
if shared.sd_model.__class__.__name__ == "OnnxRawPipeline":
shared.sd_model = preprocess_onnx_pipeline(p)
p.hr_force = True
# hires
p.denoising_strength = getattr(p, 'hr_denoising_strength', p.denoising_strength)
if p.hr_force and p.denoising_strength == 0:
shared.log.warning('HiRes skip: denoising=0')
p.hr_force = False
if p.hr_force:
shared.state.job_count = 2 * p.n_iter
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
shared.log.info(f'HiRes: class={shared.sd_model.__class__.__name__} sampler="{p.hr_sampler_name}"')
if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__:
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None:
if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
p.task_args['image'] = output.images # replace so hires uses new output
sd_models.move_model(shared.sd_model, devices.device)
orig_denoise = p.denoising_strength
p.denoising_strength = getattr(p, 'hr_denoising_strength', p.denoising_strength)
update_sampler(p, shared.sd_model, second_pass=True)
hires_args = set_pipeline_args(
p=p,
model=shared.sd_model,
prompts=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
negative_prompts=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
prompts_2=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
negative_prompts_2=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
num_inference_steps=calculate_hires_steps(p),
eta=shared.opts.scheduler_eta,
guidance_scale=p.image_cfg_scale if p.image_cfg_scale is not None else p.cfg_scale,
guidance_rescale=p.diffusers_guidance_rescale,
output_type='latent' if hasattr(shared.sd_model, 'vae') else 'np',
clip_skip=p.clip_skip,
image=output.images,
strength=p.denoising_strength,
desc='Hires',
)
shared.state.job = 'HiRes'
shared.state.sampling_steps = hires_args.get('prior_num_inference_steps', None) or p.steps or hires_args.get('num_inference_steps', None)
try:
sd_models_compile.check_deepcache(enable=True)
output = shared.sd_model(**hires_args) # pylint: disable=not-callable
if isinstance(output, dict):
output = SimpleNamespace(**output)
sd_models_compile.check_deepcache(enable=False)
sd_models_compile.openvino_post_compile(op="base")
except AssertionError as e:
shared.log.info(e)
p.denoising_strength = orig_denoise
shared.state.job = prev_job
shared.state.nextjob()
p.is_hr_pass = False
# optional refiner pass or decode
if is_refiner_enabled():
prev_job = shared.state.job
shared.state.job = 'Refine'
shared.state.job_count +=1
if shared.opts.samples_save and not p.do_not_save_samples and shared.opts.save_images_before_refiner and hasattr(shared.sd_model, 'vae'):
save_intermediate(p, latents=output.images, suffix="-before-refiner")
if shared.opts.diffusers_move_base:
shared.log.debug('Moving to CPU: model=base')
sd_models.move_model(shared.sd_model, devices.cpu)
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return results
if shared.opts.diffusers_offload_mode == "balanced":
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
if shared.opts.diffusers_move_refiner:
sd_models.move_model(shared.sd_refiner, devices.device)
p.ops.append('refine')
p.is_refiner_pass = True
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=True)
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE)
shared.sd_refiner = sd_models.set_diffuser_pipe(shared.sd_refiner, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
for i in range(len(output.images)):
image = output.images[i]
noise_level = round(350 * p.denoising_strength)
output_type='latent' if hasattr(shared.sd_refiner, 'vae') else 'np'
if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__:
image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
p.extra_generation_params['Noise level'] = noise_level
output_type = 'np'
if hasattr(p, 'task_args') and p.task_args.get('image', None) is not None and output is not None: # replace input with output so it can be used by hires/refine
p.task_args['image'] = image
shared.log.info(f'Refiner: class={shared.sd_refiner.__class__.__name__}')
update_sampler(p, shared.sd_refiner, second_pass=True)
refiner_args = set_pipeline_args(
p=p,
model=shared.sd_refiner,
prompts=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts[i],
negative_prompts=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts[i],
num_inference_steps=calculate_refiner_steps(p),
eta=shared.opts.scheduler_eta,
# strength=p.denoising_strength,
noise_level=noise_level, # StableDiffusionUpscalePipeline only
guidance_scale=p.image_cfg_scale if p.image_cfg_scale is not None else p.cfg_scale,
guidance_rescale=p.diffusers_guidance_rescale,
denoising_start=p.refiner_start if p.refiner_start > 0 and p.refiner_start < 1 else None,
denoising_end=1 if p.refiner_start > 0 and p.refiner_start < 1 else None,
image=image,
output_type=output_type,
clip_skip=p.clip_skip,
desc='Refiner',
)
shared.state.sampling_steps = refiner_args.get('prior_num_inference_steps', None) or p.steps or refiner_args.get('num_inference_steps', None)
try:
if 'requires_aesthetics_score' in shared.sd_refiner.config: # sdxl-model needs false and sdxl-refiner needs true
shared.sd_refiner.register_to_config(requires_aesthetics_score = getattr(shared.sd_refiner, 'tokenizer', None) is None)
refiner_output = shared.sd_refiner(**refiner_args) # pylint: disable=not-callable
if isinstance(refiner_output, dict):
refiner_output = SimpleNamespace(**refiner_output)
sd_models_compile.openvino_post_compile(op="refiner")
except AssertionError as e:
shared.log.info(e)
if not shared.state.interrupted and not shared.state.skipped:
refiner_images = processing_vae.vae_decode(latents=refiner_output.images, model=shared.sd_refiner, full_quality=True, width=max(p.width, p.hr_upscale_to_x), height=max(p.height, p.hr_upscale_to_y))
for refiner_image in refiner_images:
results.append(refiner_image)
if shared.opts.diffusers_offload_mode == "balanced":
shared.sd_refiner = sd_models.apply_balanced_offload(shared.sd_refiner)
elif shared.opts.diffusers_move_refiner:
shared.log.debug('Moving to CPU: model=refiner')
sd_models.move_model(shared.sd_refiner, devices.cpu)
shared.state.job = prev_job
shared.state.nextjob()
p.is_refiner_pass = False
# final decode since there is no refiner
if not is_refiner_enabled():
if output is not None:
if not hasattr(output, 'images') and hasattr(output, 'frames'):
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
output.images = output.frames[0]
if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0))
height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0))
else:
width = getattr(p, 'width', 0)
height = getattr(p, 'height', 0)
results = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, width=width, height=height)
elif hasattr(output, 'images'):
results = output.images
else:
shared.log.warning('Processing returned no results')
results = []
else:
shared.log.warning('Processing returned no results')
results = []
shared.sd_model = orig_pipeline
return results