mirror of
https://github.com/vladmandic/sdnext.git
synced 2026-01-27 15:02:48 +03:00
616 lines
33 KiB
Python
616 lines
33 KiB
Python
from types import SimpleNamespace
|
|
import os
|
|
import time
|
|
import numpy as np
|
|
import torch
|
|
import torchvision.transforms.functional as TF
|
|
from PIL import Image
|
|
from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, timer, modelstats, extra_networks, attention
|
|
from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled, get_job_name
|
|
from modules.processing_args import set_pipeline_args
|
|
from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
|
|
from modules.lora import lora_common
|
|
|
|
|
|
debug = os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None
|
|
output_type = 'np' if os.environ.get('SD_VAE_DEFAULT', None) is not None else 'latent'
|
|
last_p = None
|
|
orig_pipeline = shared.sd_model
|
|
|
|
|
|
def restore_state(p: processing.StableDiffusionProcessing):
|
|
if p.state in ['reprocess_refine', 'reprocess_detail']:
|
|
# validate
|
|
if last_p is None:
|
|
shared.log.warning(f'Restore state: op={p.state} last state missing')
|
|
return p
|
|
if p.__class__ != last_p.__class__:
|
|
shared.log.warning(f'Restore state: op={p.state} last state is different type')
|
|
return p
|
|
if shared.history.count == 0:
|
|
shared.log.warning(f'Restore state: op={p.state} last latents missing')
|
|
return p
|
|
state = p.state
|
|
|
|
# set ops
|
|
if state == 'reprocess_refine':
|
|
width, width_before, width_after, width_mask = p.width, p.width_before, p.width_after, p.width_mask
|
|
height, height_before, height_after, height_mask = p.height, p.height_before, p.height_after, p.height_mask
|
|
scale_by, scale_by_before, scale_by_after, scale_by_mask = p.scale_by, p.scale_by_before, p.scale_by_after, p.scale_by_mask
|
|
resize_name, resize_name_before, resize_name_after, resize_name_mask = p.resize_name, p.resize_name_before, p.resize_name_after, p.resize_name_mask
|
|
resize_mode, resize_mode_before, resize_mode_after, resize_mode_mask = p.resize_mode, p.resize_mode_before, p.resize_mode_after, p.resize_mode_mask
|
|
resize_context, resize_context_before, resize_context_after, resize_context_mask = p.resize_context, p.resize_context_before, p.resize_context_after, p.resize_context_mask
|
|
selected_scale_tab, selected_scale_tab_before, selected_scale_tab_after, selected_scale_tab_mask = p.selected_scale_tab, p.selected_scale_tab_before, p.selected_scale_tab_after, p.selected_scale_tab_mask
|
|
hr_scale, hr_resize_mode, hr_resize_context, hr_upscaler, hr_second_pass_steps = p.hr_scale, p.hr_resize_mode, p.hr_resize_context, p.hr_upscaler, p.hr_second_pass_steps
|
|
hr_resize_x, hr_resize_y, hr_upscale_to_x, hr_upscale_to_y, hr_denoising_strength = p.hr_resize_x, p.hr_resize_y, p.hr_upscale_to_x, p.hr_upscale_to_y, p.hr_denoising_strength
|
|
|
|
p = last_p
|
|
p.skip = ['encode', 'base']
|
|
p.state = state
|
|
p.enable_hr = True
|
|
p.hr_force = True
|
|
p.init_images = None
|
|
|
|
p.width, p.width_before, p.width_after, p.width_mask = width, width_before, width_after, width_mask
|
|
p.height, p.height_before, p.height_after, p.height_mask = height, height_before, height_after, height_mask
|
|
p.resize_name, p.resize_name_before, p.resize_name_after, p.resize_name_mask = resize_name, resize_name_before, resize_name_after, resize_name_mask
|
|
p.resize_mode, p.resize_mode_before, p.resize_mode_after, p.resize_mode_mask = resize_mode, resize_mode_before, resize_mode_after, resize_mode_mask
|
|
p.resize_context, p.resize_context_before, p.resize_context_after, p.resize_context_mask = resize_context, resize_context_before, resize_context_after, resize_context_mask
|
|
p.selected_scale_tab, p.selected_scale_tab_before, p.selected_scale_tab_after, p.selected_scale_tab_mask = selected_scale_tab, selected_scale_tab_before, selected_scale_tab_after, selected_scale_tab_mask
|
|
p.scale_by, p.scale_by_before, p.scale_by_after, p.scale_by_mask = scale_by, scale_by_before, scale_by_after, scale_by_mask
|
|
p.hr_scale, p.hr_resize_mode, p.hr_resize_context, p.hr_upscaler, p.hr_second_pass_steps = hr_scale, hr_resize_mode, hr_resize_context, hr_upscaler, hr_second_pass_steps
|
|
p.hr_resize_x, p.hr_resize_y, p.hr_upscale_to_x, p.hr_upscale_to_y, p.hr_denoising_strength = hr_resize_x, hr_resize_y, hr_upscale_to_x, hr_upscale_to_y, hr_denoising_strength
|
|
if state == 'reprocess_detail':
|
|
p.skip = ['encode', 'base', 'hires']
|
|
p.detailer_enabled = True
|
|
shared.log.info(f'Restore state: op={p.state} skip={p.skip}')
|
|
return p
|
|
|
|
|
|
def process_pre(p: processing.StableDiffusionProcessing):
|
|
from modules import ipadapter, sd_hijack_freeu, para_attention, teacache, hidiffusion, ras, pag, cfgzero, transformer_cache, token_merge, linfusion, cachedit
|
|
shared.log.info('Processing modifiers: apply')
|
|
|
|
try:
|
|
# apply-with-unapply
|
|
sd_models_compile.check_deepcache(enable=True)
|
|
ipadapter.apply(shared.sd_model, p)
|
|
token_merge.apply_token_merging(p.sd_model)
|
|
hidiffusion.apply(p, shared.sd_model_type)
|
|
ras.apply(shared.sd_model, p)
|
|
pag.apply(p)
|
|
cfgzero.apply(p)
|
|
linfusion.apply(shared.sd_model)
|
|
cachedit.apply_cache_dit(shared.sd_model)
|
|
|
|
# apply-only
|
|
sd_hijack_freeu.apply_freeu(p)
|
|
transformer_cache.set_cache()
|
|
para_attention.apply_first_block_cache()
|
|
teacache.apply_teacache(p)
|
|
except Exception as e:
|
|
shared.log.error(f'Processing apply: {e}')
|
|
errors.display(e, 'apply')
|
|
|
|
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
|
|
# if hasattr(shared.sd_model, 'unet'):
|
|
# sd_models.move_model(shared.sd_model.unet, devices.device)
|
|
# if hasattr(shared.sd_model, 'transformer'):
|
|
# sd_models.move_model(shared.sd_model.transformer, devices.device)
|
|
|
|
from modules import modular
|
|
if modular.is_compatible(shared.sd_model):
|
|
modular_pipe = modular.convert_to_modular(shared.sd_model)
|
|
if modular_pipe is not None:
|
|
shared.sd_model = modular_pipe
|
|
if modular.is_guider(shared.sd_model):
|
|
from modules import modular_guiders
|
|
modular_guiders.set_guider(p)
|
|
|
|
timer.process.record('pre')
|
|
|
|
|
|
def process_post(p: processing.StableDiffusionProcessing):
|
|
from modules import ipadapter, hidiffusion, ras, pag, cfgzero, token_merge, linfusion, cachedit
|
|
shared.log.info('Processing modifiers: unapply')
|
|
|
|
try:
|
|
sd_models_compile.check_deepcache(enable=False)
|
|
ipadapter.unapply(shared.sd_model, unload=getattr(p, 'ip_adapter_unload', False))
|
|
token_merge.remove_token_merging(p.sd_model)
|
|
hidiffusion.unapply()
|
|
ras.unapply(shared.sd_model)
|
|
pag.unapply()
|
|
cfgzero.unapply()
|
|
linfusion.unapply(shared.sd_model)
|
|
cachedit.unapply_cache_dir(shared.sd_model)
|
|
except Exception as e:
|
|
shared.log.error(f'Processing unapply: {e}')
|
|
errors.display(e, 'unapply')
|
|
timer.process.record('post')
|
|
|
|
|
|
def process_base(p: processing.StableDiffusionProcessing):
|
|
jobid = shared.state.begin('Base')
|
|
txt2img = is_txt2img()
|
|
use_refiner_start = is_refiner_enabled(p) and (not p.is_hr_pass)
|
|
use_denoise_start = not txt2img and p.refiner_start > 0 and p.refiner_start < 1
|
|
|
|
shared.sd_model = update_pipeline(shared.sd_model, p)
|
|
update_sampler(p, shared.sd_model)
|
|
timer.process.record('prepare')
|
|
process_pre(p)
|
|
desc = 'Base'
|
|
if 'detailer' in p.ops:
|
|
desc = 'Detail'
|
|
base_args = set_pipeline_args(
|
|
p=p,
|
|
model=shared.sd_model,
|
|
prompts=p.prompts,
|
|
negative_prompts=p.negative_prompts,
|
|
prompts_2=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
|
|
negative_prompts_2=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
|
|
num_inference_steps=calculate_base_steps(p, use_refiner_start=use_refiner_start, use_denoise_start=use_denoise_start),
|
|
eta=shared.opts.scheduler_eta,
|
|
guidance_scale=p.cfg_scale,
|
|
guidance_rescale=p.diffusers_guidance_rescale,
|
|
true_cfg_scale=p.pag_scale,
|
|
denoising_start=0 if use_refiner_start else p.refiner_start if use_denoise_start else None,
|
|
denoising_end=p.refiner_start if use_refiner_start else 1 if use_denoise_start else None,
|
|
num_frames=getattr(p, 'frames', 1),
|
|
output_type=output_type,
|
|
clip_skip=p.clip_skip,
|
|
desc=desc,
|
|
)
|
|
base_steps = base_args.get('prior_num_inference_steps', None) or p.steps or base_args.get('num_inference_steps', None)
|
|
shared.state.update(get_job_name(p, shared.sd_model), base_steps, 1)
|
|
if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1:
|
|
p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta
|
|
output = None
|
|
if debug:
|
|
modelstats.analyze()
|
|
try:
|
|
t0 = time.time()
|
|
extra_networks.activate(p, exclude=['text_encoder', 'text_encoder_2', 'text_encoder_3'])
|
|
|
|
if hasattr(shared.sd_model, 'tgate') and getattr(p, 'gate_step', -1) > 0:
|
|
base_args['gate_step'] = p.gate_step
|
|
output = shared.sd_model.tgate(**base_args) # pylint: disable=not-callable
|
|
else:
|
|
taskid = shared.state.begin('Inference')
|
|
output = shared.sd_model(**base_args)
|
|
shared.state.end(taskid)
|
|
if isinstance(output, dict):
|
|
output = SimpleNamespace(**output)
|
|
if isinstance(output, list):
|
|
output = SimpleNamespace(images=output)
|
|
if isinstance(output, Image.Image):
|
|
output = SimpleNamespace(images=[output])
|
|
if hasattr(output, 'image'):
|
|
output.images = output.image
|
|
if hasattr(output, 'images'):
|
|
shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
|
|
timer.process.record('pipeline')
|
|
sd_models_compile.openvino_post_compile(op="base") # only executes on compiled vino models
|
|
if shared.cmd_opts.profile:
|
|
t1 = time.time()
|
|
shared.log.debug(f'Profile: pipeline call: {t1-t0:.2f}')
|
|
if not hasattr(output, 'images') and hasattr(output, 'frames'):
|
|
if hasattr(output.frames[0], 'shape'):
|
|
shared.log.debug(f'Generated: frames={output.frames[0].shape[1]}')
|
|
else:
|
|
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
|
|
output.images = output.frames[0]
|
|
if hasattr(output, 'images') and isinstance(output.images, np.ndarray):
|
|
output.images = torch.from_numpy(output.images)
|
|
except AssertionError as e:
|
|
shared.log.info(e)
|
|
except ValueError as e:
|
|
shared.state.interrupted = True
|
|
err_args = base_args.copy()
|
|
for k, v in base_args.items():
|
|
if isinstance(v, torch.Tensor):
|
|
err_args[k] = f'{v.device}:{v.dtype}:{v.shape}'
|
|
shared.log.error(f'Processing: args={err_args} {e}')
|
|
if shared.cmd_opts.debug:
|
|
errors.display(e, 'Processing')
|
|
except RuntimeError as e:
|
|
shared.state.interrupted = True
|
|
err_args = base_args.copy()
|
|
for k, v in base_args.items():
|
|
if isinstance(v, torch.Tensor):
|
|
err_args[k] = f'{v.device}:{v.dtype}:{v.shape}'
|
|
shared.log.error(f'Processing: step=base args={err_args} {e}')
|
|
errors.display(e, 'Processing')
|
|
modelstats.analyze()
|
|
finally:
|
|
process_post(p)
|
|
|
|
if hasattr(shared.sd_model, 'postprocess') and callable(shared.sd_model.postprocess):
|
|
output = shared.sd_model.postprocess(p, output)
|
|
|
|
shared.state.end(jobid)
|
|
shared.state.nextjob()
|
|
return output
|
|
|
|
|
|
def process_hires(p: processing.StableDiffusionProcessing, output):
|
|
# optional second pass
|
|
if (output is None) or (output.images is None):
|
|
return output
|
|
if p.enable_hr:
|
|
jobid = shared.state.begin('Hires')
|
|
p.is_hr_pass = True
|
|
if hasattr(p, 'init_hr'):
|
|
p.init_hr(p.hr_scale, p.hr_upscaler, force=p.hr_force)
|
|
else:
|
|
if not p.is_hr_pass: # fake hires for img2img if not actual hr pass
|
|
p.hr_scale = p.scale_by
|
|
p.hr_upscaler = p.resize_name
|
|
p.hr_resize_mode = p.resize_mode
|
|
p.hr_resize_context = p.resize_context
|
|
p.hr_upscale_to_x = int(p.width * p.hr_scale) if p.hr_resize_x == 0 else p.hr_resize_x
|
|
p.hr_upscale_to_y = int(p.height * p.hr_scale) if p.hr_resize_y == 0 else p.hr_resize_y
|
|
|
|
# hires runs on original pipeline
|
|
if hasattr(shared.sd_model, 'restore_pipeline') and (shared.sd_model.restore_pipeline is not None) and (not shared.opts.control_hires):
|
|
shared.sd_model.restore_pipeline()
|
|
if (getattr(shared.sd_model, 'controlnet', None) is not None) and (((isinstance(shared.sd_model.controlnet, list) and len(shared.sd_model.controlnet) > 1)) or ('Multi' in type(shared.sd_model.controlnet).__name__)):
|
|
shared.log.warning(f'Process: control={type(shared.sd_model.controlnet)} not supported in hires')
|
|
return output
|
|
|
|
# upscale
|
|
if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
|
|
shared.log.info(f'Upscale: mode={p.hr_resize_mode} upscaler="{p.hr_upscaler}" context="{p.hr_resize_context}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}')
|
|
p.ops.append('upscale')
|
|
if shared.opts.samples_save and not p.do_not_save_samples and shared.opts.save_images_before_highres_fix and hasattr(shared.sd_model, 'vae'):
|
|
save_intermediate(p, latents=output.images, suffix="-before-hires")
|
|
output.images = resize_hires(p, latents=output.images)
|
|
sd_hijack_hypertile.hypertile_set(p, hr=True)
|
|
elif torch.is_tensor(output.images) and output.images.shape[-1] == 3: # nhwc
|
|
if output.images.dim() == 3:
|
|
output.images = TF.to_pil_image(output.images.permute(2,0,1))
|
|
elif output.images.dim() == 4:
|
|
output.images = [TF.to_pil_image(output.images[i].permute(2,0,1)) for i in range(output.images.shape[0])]
|
|
|
|
strength = p.hr_denoising_strength if p.hr_denoising_strength > 0 else p.denoising_strength
|
|
if (p.hr_upscaler is not None) and (p.hr_upscaler.lower().startswith('latent') or p.hr_force) and strength > 0:
|
|
p.ops.append('hires')
|
|
sd_models_compile.openvino_recompile_model(p, hires=True, refiner=False)
|
|
if shared.sd_model.__class__.__name__ == "OnnxRawPipeline":
|
|
shared.sd_model = preprocess_onnx_pipeline(p)
|
|
p.hr_force = True
|
|
|
|
# hires
|
|
if p.hr_force and strength == 0:
|
|
shared.log.warning('Hires skip: denoising=0')
|
|
p.hr_force = False
|
|
if p.hr_force:
|
|
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
|
|
if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__ or 'Kandinsky' in shared.sd_model.__class__.__name__:
|
|
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
|
|
if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None:
|
|
if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
|
|
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
|
|
update_sampler(p, shared.sd_model, second_pass=True)
|
|
orig_denoise = p.denoising_strength
|
|
p.denoising_strength = strength
|
|
orig_image = p.task_args.pop('image', None) # remove image override from hires
|
|
process_pre(p)
|
|
hires_args = set_pipeline_args(
|
|
p=p,
|
|
model=shared.sd_model,
|
|
prompts=len(output.images)* [p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
|
|
negative_prompts=len(output.images) * [p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
|
|
prompts_2=len(output.images) * [p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
|
|
negative_prompts_2=len(output.images) * [p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
|
|
num_inference_steps=calculate_hires_steps(p),
|
|
eta=shared.opts.scheduler_eta,
|
|
guidance_scale=p.image_cfg_scale if p.image_cfg_scale is not None else p.cfg_scale,
|
|
guidance_rescale=p.diffusers_guidance_rescale,
|
|
output_type=output_type,
|
|
clip_skip=p.clip_skip,
|
|
image=output.images,
|
|
strength=strength,
|
|
desc='Hires',
|
|
)
|
|
hires_steps = hires_args.get('prior_num_inference_steps', None) or p.hr_second_pass_steps or hires_args.get('num_inference_steps', None)
|
|
shared.state.update(get_job_name(p, shared.sd_model), hires_steps, 1)
|
|
try:
|
|
if 'base' in p.skip:
|
|
extra_networks.activate(p)
|
|
taskid = shared.state.begin('Inference')
|
|
output = shared.sd_model(**hires_args) # pylint: disable=not-callable
|
|
shared.state.end(taskid)
|
|
if isinstance(output, dict):
|
|
output = SimpleNamespace(**output)
|
|
if hasattr(output, 'images'):
|
|
shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
|
|
sd_models_compile.check_deepcache(enable=False)
|
|
sd_models_compile.openvino_post_compile(op="base")
|
|
except AssertionError as e:
|
|
shared.log.info(e)
|
|
except RuntimeError as e:
|
|
shared.state.interrupted = True
|
|
shared.log.error(f'Processing step=hires: args={hires_args} {e}')
|
|
errors.display(e, 'Processing')
|
|
modelstats.analyze()
|
|
finally:
|
|
process_post(p)
|
|
if hasattr(shared.sd_model, 'postprocess') and callable(shared.sd_model.postprocess):
|
|
output = shared.sd_model.postprocess(p, output)
|
|
if orig_image is not None:
|
|
p.task_args['image'] = orig_image
|
|
p.denoising_strength = orig_denoise
|
|
shared.state.end(jobid)
|
|
shared.state.nextjob()
|
|
p.is_hr_pass = False
|
|
timer.process.record('hires')
|
|
return output
|
|
|
|
|
|
def process_refine(p: processing.StableDiffusionProcessing, output):
|
|
# optional refiner pass or decode
|
|
if (output is None) or (output.images is None):
|
|
return output
|
|
if is_refiner_enabled(p):
|
|
if shared.opts.samples_save and not p.do_not_save_samples and shared.opts.save_images_before_refiner and hasattr(shared.sd_model, 'vae'):
|
|
save_intermediate(p, latents=output.images, suffix="-before-refiner")
|
|
if shared.opts.diffusers_move_base:
|
|
shared.log.debug('Moving to CPU: model=base')
|
|
sd_models.move_model(shared.sd_model, devices.cpu)
|
|
if shared.state.interrupted or shared.state.skipped:
|
|
shared.sd_model = orig_pipeline
|
|
return output
|
|
jobid = shared.state.begin('Refine')
|
|
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
|
|
if shared.opts.diffusers_move_refiner:
|
|
sd_models.move_model(shared.sd_refiner, devices.device)
|
|
if hasattr(shared.sd_refiner, 'unet'):
|
|
sd_models.move_model(shared.sd_model.unet, devices.device)
|
|
if hasattr(shared.sd_refiner, 'transformer'):
|
|
sd_models.move_model(shared.sd_model.transformer, devices.device)
|
|
p.ops.append('refine')
|
|
p.is_refiner_pass = True
|
|
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=True)
|
|
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE)
|
|
shared.sd_refiner = sd_models.set_diffuser_pipe(shared.sd_refiner, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
|
|
for i in range(len(output.images)):
|
|
image = output.images[i]
|
|
noise_level = round(350 * p.denoising_strength)
|
|
refiner_output_type = output_type
|
|
if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__ or 'Kandinsky' in shared.sd_refiner.__class__.__name__:
|
|
image = processing_vae.vae_decode(latents=image, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
|
|
p.extra_generation_params['Noise level'] = noise_level
|
|
refiner_output_type = 'np'
|
|
update_sampler(p, shared.sd_refiner, second_pass=True)
|
|
shared.opts.prompt_attention = 'fixed'
|
|
refiner_args = set_pipeline_args(
|
|
p=p,
|
|
model=shared.sd_refiner,
|
|
prompts=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts[i],
|
|
negative_prompts=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts[i],
|
|
num_inference_steps=calculate_refiner_steps(p),
|
|
eta=shared.opts.scheduler_eta,
|
|
# strength=p.denoising_strength,
|
|
noise_level=noise_level, # StableDiffusionUpscalePipeline only
|
|
guidance_scale=p.image_cfg_scale if p.image_cfg_scale is not None else p.cfg_scale,
|
|
guidance_rescale=p.diffusers_guidance_rescale,
|
|
denoising_start=p.refiner_start if p.refiner_start > 0 and p.refiner_start < 1 else None,
|
|
denoising_end=1 if p.refiner_start > 0 and p.refiner_start < 1 else None,
|
|
image=image,
|
|
output_type=refiner_output_type,
|
|
clip_skip=p.clip_skip,
|
|
prompt_attention='fixed',
|
|
desc='Refiner',
|
|
)
|
|
refiner_steps = refiner_args.get('prior_num_inference_steps', None) or p.steps or refiner_args.get('num_inference_steps', None)
|
|
shared.state.update(get_job_name(p, shared.sd_refiner), refiner_steps, 1)
|
|
try:
|
|
if 'requires_aesthetics_score' in shared.sd_refiner.config: # sdxl-model needs false and sdxl-refiner needs true
|
|
shared.sd_refiner.register_to_config(requires_aesthetics_score = getattr(shared.sd_refiner, 'tokenizer', None) is None)
|
|
output = shared.sd_refiner(**refiner_args) # pylint: disable=not-callable
|
|
if isinstance(output, dict):
|
|
output = SimpleNamespace(**output)
|
|
if hasattr(output, 'images'):
|
|
shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
|
|
sd_models_compile.openvino_post_compile(op="refiner")
|
|
except AssertionError as e:
|
|
shared.log.info(e)
|
|
except RuntimeError as e:
|
|
shared.state.interrupted = True
|
|
shared.log.error(f'Processing step=refine: args={refiner_args} {e}')
|
|
errors.display(e, 'Processing')
|
|
modelstats.analyze()
|
|
|
|
if shared.opts.diffusers_offload_mode == "balanced":
|
|
shared.sd_refiner = sd_models.apply_balanced_offload(shared.sd_refiner)
|
|
elif shared.opts.diffusers_move_refiner:
|
|
shared.log.debug('Moving to CPU: model=refiner')
|
|
sd_models.move_model(shared.sd_refiner, devices.cpu)
|
|
shared.state.end(jobid)
|
|
shared.state.nextjob()
|
|
p.is_refiner_pass = False
|
|
timer.process.record('refine')
|
|
return output
|
|
|
|
|
|
def process_decode(p: processing.StableDiffusionProcessing, output):
|
|
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model, exclude=['vae'])
|
|
if output is not None:
|
|
if hasattr(output, 'bytes') and output.bytes is not None:
|
|
shared.log.debug(f'Generated: bytes={len(output.bytes)}')
|
|
return output
|
|
if not hasattr(output, 'images') and hasattr(output, 'frames'):
|
|
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
|
|
output.images = output.frames[0]
|
|
if output.images is not None and len(output.images) > 0 and isinstance(output.images[0], Image.Image):
|
|
return output.images
|
|
model = shared.sd_model if not is_refiner_enabled(p) else shared.sd_refiner
|
|
if not hasattr(model, 'vae'):
|
|
if hasattr(model, 'pipe') and hasattr(model.pipe, 'vae'):
|
|
model = model.pipe
|
|
if (hasattr(model, "vae") or hasattr(model, "vqgan")) and (output.images is not None) and (len(output.images) > 0):
|
|
if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
|
|
width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0))
|
|
height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0))
|
|
else:
|
|
width = getattr(p, 'width', 0)
|
|
height = getattr(p, 'height', 0)
|
|
frames = p.task_args.get('num_frames', None) or getattr(p, 'frames', None)
|
|
if isinstance(output.images, list):
|
|
results = []
|
|
for i in range(len(output.images)):
|
|
result_batch = processing_vae.vae_decode(
|
|
latents = output.images[i],
|
|
model = model,
|
|
vae_type = p.vae_type,
|
|
width = width,
|
|
height = height,
|
|
frames = frames,
|
|
)
|
|
for result in list(result_batch):
|
|
results.append(result)
|
|
else:
|
|
results = processing_vae.vae_decode(
|
|
latents = output.images,
|
|
model = model,
|
|
vae_type = p.vae_type,
|
|
width = width,
|
|
height = height,
|
|
frames = frames,
|
|
)
|
|
if not isinstance(results, list):
|
|
results = list(results)
|
|
elif hasattr(output, 'images'):
|
|
results = output.images
|
|
else:
|
|
shared.log.warning('Processing: no results')
|
|
results = []
|
|
else:
|
|
shared.log.warning('Processing: no results')
|
|
results = []
|
|
return results
|
|
|
|
|
|
def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
|
|
if sd_models.get_diffusers_task(sd_model) == sd_models.DiffusersTaskType.INPAINTING and getattr(p, 'image_mask', None) is None and p.task_args.get('image_mask', None) is None and getattr(p, 'mask', None) is None:
|
|
shared.log.warning('Processing: mode=inpaint mask=None')
|
|
sd_model = sd_models.set_diffuser_pipe(sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
|
|
if shared.opts.cuda_compile_backend == "olive-ai":
|
|
sd_model = olive_check_parameters_changed(p, is_refiner_enabled(p))
|
|
if sd_model.__class__.__name__ == "OnnxRawPipeline":
|
|
sd_model = preprocess_onnx_pipeline(p)
|
|
global orig_pipeline # pylint: disable=global-statement
|
|
orig_pipeline = sd_model # processed ONNX pipeline should not be replaced with original pipeline.
|
|
if getattr(sd_model, "current_attn_name", None) != shared.opts.cross_attention_optimization:
|
|
shared.log.info(f"Setting attention optimization: {shared.opts.cross_attention_optimization}")
|
|
attention.set_diffusers_attention(sd_model)
|
|
return sd_model
|
|
|
|
|
|
def validate_pipeline(p: processing.StableDiffusionProcessing):
|
|
from modules.video_models.models_def import models as video_models
|
|
models_cls = []
|
|
for family in video_models:
|
|
for m in video_models[family]:
|
|
if m.repo_cls is not None:
|
|
models_cls.append(m.repo_cls.__name__)
|
|
if m.custom is not None:
|
|
models_cls.append(m.custom)
|
|
is_video_model = shared.sd_model.__class__.__name__ in models_cls
|
|
override_video_pipelines = ['WanPipeline', 'WanImageToVideoPipeline', 'WanVACEPipeline']
|
|
is_video_pipeline = ('video' in p.__class__.__name__.lower()) or (shared.sd_model.__class__.__name__ in override_video_pipelines)
|
|
if is_video_model and not is_video_pipeline:
|
|
shared.log.error(f'Mismatch: type={shared.sd_model_type} cls={shared.sd_model.__class__.__name__} request={p.__class__.__name__} video model with non-video pipeline')
|
|
return False
|
|
elif not is_video_model and is_video_pipeline:
|
|
shared.log.error(f'Mismatch: type={shared.sd_model_type} cls={shared.sd_model.__class__.__name__} request={p.__class__.__name__} non-video model with video pipeline')
|
|
return False
|
|
return True
|
|
|
|
|
|
def process_diffusers(p: processing.StableDiffusionProcessing):
|
|
results = []
|
|
if debug:
|
|
shared.log.trace(f'Process diffusers args: {vars(p)}')
|
|
if not validate_pipeline(p):
|
|
return results
|
|
|
|
p = restore_state(p)
|
|
global orig_pipeline # pylint: disable=global-statement
|
|
orig_pipeline = shared.sd_model
|
|
|
|
if shared.state.interrupted or shared.state.skipped:
|
|
shared.sd_model = orig_pipeline
|
|
return results
|
|
|
|
# sanitize init_images
|
|
if hasattr(p, 'init_images') and not isinstance(getattr(p, 'init_images', []), list):
|
|
p.init_images = [p.init_images]
|
|
if hasattr(p, 'init_images') and isinstance(getattr(p, 'init_images', []), list):
|
|
p.init_images = [i for i in p.init_images if i is not None]
|
|
if len(getattr(p, 'init_images', [])) > 0:
|
|
while len(p.init_images) < len(p.prompts):
|
|
p.init_images.append(p.init_images[-1])
|
|
|
|
# pipeline type is set earlier in processing, but check for sanity
|
|
is_control = getattr(p, 'is_control', False) is True
|
|
has_images = len(getattr(p, 'init_images', [])) > 0
|
|
if (sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE) and (not has_images) and (not is_control):
|
|
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE) # reset pipeline
|
|
if hasattr(shared.sd_model, 'unet') and hasattr(shared.sd_model.unet, 'config') and hasattr(shared.sd_model.unet.config, 'in_channels') and shared.sd_model.unet.config.in_channels == 9 and not is_control:
|
|
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.INPAINTING) # force pipeline
|
|
if len(getattr(p, 'init_images', [])) == 0:
|
|
p.init_images = [TF.to_pil_image(torch.rand((3, getattr(p, 'height', 512), getattr(p, 'width', 512))))]
|
|
if p.prompts is None or len(p.prompts) == 0:
|
|
p.prompts = p.all_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
|
|
if p.negative_prompts is None or len(p.negative_prompts) == 0:
|
|
p.negative_prompts = p.all_negative_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
|
|
|
|
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
|
|
|
|
if hasattr(p, 'dummy'):
|
|
images = [Image.new(mode='RGB', size=(p.width, p.height))]
|
|
return images
|
|
if 'base' not in p.skip:
|
|
output = process_base(p)
|
|
else:
|
|
images, _index=shared.history.selected
|
|
output = SimpleNamespace(images=images)
|
|
|
|
if (output is None or (hasattr(output, 'images') and len(output.images) == 0)) and has_images:
|
|
if output is not None:
|
|
shared.log.debug('Processing: using input as base output')
|
|
output.images = p.init_images
|
|
|
|
if shared.state.interrupted or shared.state.skipped:
|
|
shared.sd_model = orig_pipeline
|
|
return results
|
|
|
|
if 'hires' not in p.skip:
|
|
output = process_hires(p, output)
|
|
if shared.state.interrupted or shared.state.skipped:
|
|
shared.sd_model = orig_pipeline
|
|
return results
|
|
|
|
if 'refine' not in p.skip:
|
|
output = process_refine(p, output)
|
|
if shared.state.interrupted or shared.state.skipped:
|
|
shared.sd_model = orig_pipeline
|
|
return results
|
|
|
|
extra_networks.deactivate(p)
|
|
timer.process.add('lora', lora_common.timer.total)
|
|
lora_common.timer.clear(complete=True)
|
|
|
|
results = process_decode(p, output)
|
|
timer.process.record('decode')
|
|
|
|
shared.sd_model = orig_pipeline
|
|
|
|
if p.state == '':
|
|
global last_p # pylint: disable=global-statement
|
|
last_p = p
|
|
return results
|