1
0
mirror of https://github.com/vladmandic/sdnext.git synced 2026-01-27 15:02:48 +03:00
Files
sdnext/modules/processing_diffusers.py
vladmandic deb59c60fc add SD_VAE_DEFAULT
Signed-off-by: vladmandic <mandic00@live.com>
2026-01-04 17:55:01 +01:00

616 lines
33 KiB
Python

from types import SimpleNamespace
import os
import time
import numpy as np
import torch
import torchvision.transforms.functional as TF
from PIL import Image
from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, timer, modelstats, extra_networks, attention
from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled, get_job_name
from modules.processing_args import set_pipeline_args
from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
from modules.lora import lora_common
debug = os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None
output_type = 'np' if os.environ.get('SD_VAE_DEFAULT', None) is not None else 'latent'
last_p = None
orig_pipeline = shared.sd_model
def restore_state(p: processing.StableDiffusionProcessing):
if p.state in ['reprocess_refine', 'reprocess_detail']:
# validate
if last_p is None:
shared.log.warning(f'Restore state: op={p.state} last state missing')
return p
if p.__class__ != last_p.__class__:
shared.log.warning(f'Restore state: op={p.state} last state is different type')
return p
if shared.history.count == 0:
shared.log.warning(f'Restore state: op={p.state} last latents missing')
return p
state = p.state
# set ops
if state == 'reprocess_refine':
width, width_before, width_after, width_mask = p.width, p.width_before, p.width_after, p.width_mask
height, height_before, height_after, height_mask = p.height, p.height_before, p.height_after, p.height_mask
scale_by, scale_by_before, scale_by_after, scale_by_mask = p.scale_by, p.scale_by_before, p.scale_by_after, p.scale_by_mask
resize_name, resize_name_before, resize_name_after, resize_name_mask = p.resize_name, p.resize_name_before, p.resize_name_after, p.resize_name_mask
resize_mode, resize_mode_before, resize_mode_after, resize_mode_mask = p.resize_mode, p.resize_mode_before, p.resize_mode_after, p.resize_mode_mask
resize_context, resize_context_before, resize_context_after, resize_context_mask = p.resize_context, p.resize_context_before, p.resize_context_after, p.resize_context_mask
selected_scale_tab, selected_scale_tab_before, selected_scale_tab_after, selected_scale_tab_mask = p.selected_scale_tab, p.selected_scale_tab_before, p.selected_scale_tab_after, p.selected_scale_tab_mask
hr_scale, hr_resize_mode, hr_resize_context, hr_upscaler, hr_second_pass_steps = p.hr_scale, p.hr_resize_mode, p.hr_resize_context, p.hr_upscaler, p.hr_second_pass_steps
hr_resize_x, hr_resize_y, hr_upscale_to_x, hr_upscale_to_y, hr_denoising_strength = p.hr_resize_x, p.hr_resize_y, p.hr_upscale_to_x, p.hr_upscale_to_y, p.hr_denoising_strength
p = last_p
p.skip = ['encode', 'base']
p.state = state
p.enable_hr = True
p.hr_force = True
p.init_images = None
p.width, p.width_before, p.width_after, p.width_mask = width, width_before, width_after, width_mask
p.height, p.height_before, p.height_after, p.height_mask = height, height_before, height_after, height_mask
p.resize_name, p.resize_name_before, p.resize_name_after, p.resize_name_mask = resize_name, resize_name_before, resize_name_after, resize_name_mask
p.resize_mode, p.resize_mode_before, p.resize_mode_after, p.resize_mode_mask = resize_mode, resize_mode_before, resize_mode_after, resize_mode_mask
p.resize_context, p.resize_context_before, p.resize_context_after, p.resize_context_mask = resize_context, resize_context_before, resize_context_after, resize_context_mask
p.selected_scale_tab, p.selected_scale_tab_before, p.selected_scale_tab_after, p.selected_scale_tab_mask = selected_scale_tab, selected_scale_tab_before, selected_scale_tab_after, selected_scale_tab_mask
p.scale_by, p.scale_by_before, p.scale_by_after, p.scale_by_mask = scale_by, scale_by_before, scale_by_after, scale_by_mask
p.hr_scale, p.hr_resize_mode, p.hr_resize_context, p.hr_upscaler, p.hr_second_pass_steps = hr_scale, hr_resize_mode, hr_resize_context, hr_upscaler, hr_second_pass_steps
p.hr_resize_x, p.hr_resize_y, p.hr_upscale_to_x, p.hr_upscale_to_y, p.hr_denoising_strength = hr_resize_x, hr_resize_y, hr_upscale_to_x, hr_upscale_to_y, hr_denoising_strength
if state == 'reprocess_detail':
p.skip = ['encode', 'base', 'hires']
p.detailer_enabled = True
shared.log.info(f'Restore state: op={p.state} skip={p.skip}')
return p
def process_pre(p: processing.StableDiffusionProcessing):
from modules import ipadapter, sd_hijack_freeu, para_attention, teacache, hidiffusion, ras, pag, cfgzero, transformer_cache, token_merge, linfusion, cachedit
shared.log.info('Processing modifiers: apply')
try:
# apply-with-unapply
sd_models_compile.check_deepcache(enable=True)
ipadapter.apply(shared.sd_model, p)
token_merge.apply_token_merging(p.sd_model)
hidiffusion.apply(p, shared.sd_model_type)
ras.apply(shared.sd_model, p)
pag.apply(p)
cfgzero.apply(p)
linfusion.apply(shared.sd_model)
cachedit.apply_cache_dit(shared.sd_model)
# apply-only
sd_hijack_freeu.apply_freeu(p)
transformer_cache.set_cache()
para_attention.apply_first_block_cache()
teacache.apply_teacache(p)
except Exception as e:
shared.log.error(f'Processing apply: {e}')
errors.display(e, 'apply')
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
# if hasattr(shared.sd_model, 'unet'):
# sd_models.move_model(shared.sd_model.unet, devices.device)
# if hasattr(shared.sd_model, 'transformer'):
# sd_models.move_model(shared.sd_model.transformer, devices.device)
from modules import modular
if modular.is_compatible(shared.sd_model):
modular_pipe = modular.convert_to_modular(shared.sd_model)
if modular_pipe is not None:
shared.sd_model = modular_pipe
if modular.is_guider(shared.sd_model):
from modules import modular_guiders
modular_guiders.set_guider(p)
timer.process.record('pre')
def process_post(p: processing.StableDiffusionProcessing):
from modules import ipadapter, hidiffusion, ras, pag, cfgzero, token_merge, linfusion, cachedit
shared.log.info('Processing modifiers: unapply')
try:
sd_models_compile.check_deepcache(enable=False)
ipadapter.unapply(shared.sd_model, unload=getattr(p, 'ip_adapter_unload', False))
token_merge.remove_token_merging(p.sd_model)
hidiffusion.unapply()
ras.unapply(shared.sd_model)
pag.unapply()
cfgzero.unapply()
linfusion.unapply(shared.sd_model)
cachedit.unapply_cache_dir(shared.sd_model)
except Exception as e:
shared.log.error(f'Processing unapply: {e}')
errors.display(e, 'unapply')
timer.process.record('post')
def process_base(p: processing.StableDiffusionProcessing):
jobid = shared.state.begin('Base')
txt2img = is_txt2img()
use_refiner_start = is_refiner_enabled(p) and (not p.is_hr_pass)
use_denoise_start = not txt2img and p.refiner_start > 0 and p.refiner_start < 1
shared.sd_model = update_pipeline(shared.sd_model, p)
update_sampler(p, shared.sd_model)
timer.process.record('prepare')
process_pre(p)
desc = 'Base'
if 'detailer' in p.ops:
desc = 'Detail'
base_args = set_pipeline_args(
p=p,
model=shared.sd_model,
prompts=p.prompts,
negative_prompts=p.negative_prompts,
prompts_2=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
negative_prompts_2=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
num_inference_steps=calculate_base_steps(p, use_refiner_start=use_refiner_start, use_denoise_start=use_denoise_start),
eta=shared.opts.scheduler_eta,
guidance_scale=p.cfg_scale,
guidance_rescale=p.diffusers_guidance_rescale,
true_cfg_scale=p.pag_scale,
denoising_start=0 if use_refiner_start else p.refiner_start if use_denoise_start else None,
denoising_end=p.refiner_start if use_refiner_start else 1 if use_denoise_start else None,
num_frames=getattr(p, 'frames', 1),
output_type=output_type,
clip_skip=p.clip_skip,
desc=desc,
)
base_steps = base_args.get('prior_num_inference_steps', None) or p.steps or base_args.get('num_inference_steps', None)
shared.state.update(get_job_name(p, shared.sd_model), base_steps, 1)
if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1:
p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta
output = None
if debug:
modelstats.analyze()
try:
t0 = time.time()
extra_networks.activate(p, exclude=['text_encoder', 'text_encoder_2', 'text_encoder_3'])
if hasattr(shared.sd_model, 'tgate') and getattr(p, 'gate_step', -1) > 0:
base_args['gate_step'] = p.gate_step
output = shared.sd_model.tgate(**base_args) # pylint: disable=not-callable
else:
taskid = shared.state.begin('Inference')
output = shared.sd_model(**base_args)
shared.state.end(taskid)
if isinstance(output, dict):
output = SimpleNamespace(**output)
if isinstance(output, list):
output = SimpleNamespace(images=output)
if isinstance(output, Image.Image):
output = SimpleNamespace(images=[output])
if hasattr(output, 'image'):
output.images = output.image
if hasattr(output, 'images'):
shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
timer.process.record('pipeline')
sd_models_compile.openvino_post_compile(op="base") # only executes on compiled vino models
if shared.cmd_opts.profile:
t1 = time.time()
shared.log.debug(f'Profile: pipeline call: {t1-t0:.2f}')
if not hasattr(output, 'images') and hasattr(output, 'frames'):
if hasattr(output.frames[0], 'shape'):
shared.log.debug(f'Generated: frames={output.frames[0].shape[1]}')
else:
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
output.images = output.frames[0]
if hasattr(output, 'images') and isinstance(output.images, np.ndarray):
output.images = torch.from_numpy(output.images)
except AssertionError as e:
shared.log.info(e)
except ValueError as e:
shared.state.interrupted = True
err_args = base_args.copy()
for k, v in base_args.items():
if isinstance(v, torch.Tensor):
err_args[k] = f'{v.device}:{v.dtype}:{v.shape}'
shared.log.error(f'Processing: args={err_args} {e}')
if shared.cmd_opts.debug:
errors.display(e, 'Processing')
except RuntimeError as e:
shared.state.interrupted = True
err_args = base_args.copy()
for k, v in base_args.items():
if isinstance(v, torch.Tensor):
err_args[k] = f'{v.device}:{v.dtype}:{v.shape}'
shared.log.error(f'Processing: step=base args={err_args} {e}')
errors.display(e, 'Processing')
modelstats.analyze()
finally:
process_post(p)
if hasattr(shared.sd_model, 'postprocess') and callable(shared.sd_model.postprocess):
output = shared.sd_model.postprocess(p, output)
shared.state.end(jobid)
shared.state.nextjob()
return output
def process_hires(p: processing.StableDiffusionProcessing, output):
# optional second pass
if (output is None) or (output.images is None):
return output
if p.enable_hr:
jobid = shared.state.begin('Hires')
p.is_hr_pass = True
if hasattr(p, 'init_hr'):
p.init_hr(p.hr_scale, p.hr_upscaler, force=p.hr_force)
else:
if not p.is_hr_pass: # fake hires for img2img if not actual hr pass
p.hr_scale = p.scale_by
p.hr_upscaler = p.resize_name
p.hr_resize_mode = p.resize_mode
p.hr_resize_context = p.resize_context
p.hr_upscale_to_x = int(p.width * p.hr_scale) if p.hr_resize_x == 0 else p.hr_resize_x
p.hr_upscale_to_y = int(p.height * p.hr_scale) if p.hr_resize_y == 0 else p.hr_resize_y
# hires runs on original pipeline
if hasattr(shared.sd_model, 'restore_pipeline') and (shared.sd_model.restore_pipeline is not None) and (not shared.opts.control_hires):
shared.sd_model.restore_pipeline()
if (getattr(shared.sd_model, 'controlnet', None) is not None) and (((isinstance(shared.sd_model.controlnet, list) and len(shared.sd_model.controlnet) > 1)) or ('Multi' in type(shared.sd_model.controlnet).__name__)):
shared.log.warning(f'Process: control={type(shared.sd_model.controlnet)} not supported in hires')
return output
# upscale
if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
shared.log.info(f'Upscale: mode={p.hr_resize_mode} upscaler="{p.hr_upscaler}" context="{p.hr_resize_context}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}')
p.ops.append('upscale')
if shared.opts.samples_save and not p.do_not_save_samples and shared.opts.save_images_before_highres_fix and hasattr(shared.sd_model, 'vae'):
save_intermediate(p, latents=output.images, suffix="-before-hires")
output.images = resize_hires(p, latents=output.images)
sd_hijack_hypertile.hypertile_set(p, hr=True)
elif torch.is_tensor(output.images) and output.images.shape[-1] == 3: # nhwc
if output.images.dim() == 3:
output.images = TF.to_pil_image(output.images.permute(2,0,1))
elif output.images.dim() == 4:
output.images = [TF.to_pil_image(output.images[i].permute(2,0,1)) for i in range(output.images.shape[0])]
strength = p.hr_denoising_strength if p.hr_denoising_strength > 0 else p.denoising_strength
if (p.hr_upscaler is not None) and (p.hr_upscaler.lower().startswith('latent') or p.hr_force) and strength > 0:
p.ops.append('hires')
sd_models_compile.openvino_recompile_model(p, hires=True, refiner=False)
if shared.sd_model.__class__.__name__ == "OnnxRawPipeline":
shared.sd_model = preprocess_onnx_pipeline(p)
p.hr_force = True
# hires
if p.hr_force and strength == 0:
shared.log.warning('Hires skip: denoising=0')
p.hr_force = False
if p.hr_force:
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__ or 'Kandinsky' in shared.sd_model.__class__.__name__:
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None:
if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
update_sampler(p, shared.sd_model, second_pass=True)
orig_denoise = p.denoising_strength
p.denoising_strength = strength
orig_image = p.task_args.pop('image', None) # remove image override from hires
process_pre(p)
hires_args = set_pipeline_args(
p=p,
model=shared.sd_model,
prompts=len(output.images)* [p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
negative_prompts=len(output.images) * [p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
prompts_2=len(output.images) * [p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts,
negative_prompts_2=len(output.images) * [p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts,
num_inference_steps=calculate_hires_steps(p),
eta=shared.opts.scheduler_eta,
guidance_scale=p.image_cfg_scale if p.image_cfg_scale is not None else p.cfg_scale,
guidance_rescale=p.diffusers_guidance_rescale,
output_type=output_type,
clip_skip=p.clip_skip,
image=output.images,
strength=strength,
desc='Hires',
)
hires_steps = hires_args.get('prior_num_inference_steps', None) or p.hr_second_pass_steps or hires_args.get('num_inference_steps', None)
shared.state.update(get_job_name(p, shared.sd_model), hires_steps, 1)
try:
if 'base' in p.skip:
extra_networks.activate(p)
taskid = shared.state.begin('Inference')
output = shared.sd_model(**hires_args) # pylint: disable=not-callable
shared.state.end(taskid)
if isinstance(output, dict):
output = SimpleNamespace(**output)
if hasattr(output, 'images'):
shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
sd_models_compile.check_deepcache(enable=False)
sd_models_compile.openvino_post_compile(op="base")
except AssertionError as e:
shared.log.info(e)
except RuntimeError as e:
shared.state.interrupted = True
shared.log.error(f'Processing step=hires: args={hires_args} {e}')
errors.display(e, 'Processing')
modelstats.analyze()
finally:
process_post(p)
if hasattr(shared.sd_model, 'postprocess') and callable(shared.sd_model.postprocess):
output = shared.sd_model.postprocess(p, output)
if orig_image is not None:
p.task_args['image'] = orig_image
p.denoising_strength = orig_denoise
shared.state.end(jobid)
shared.state.nextjob()
p.is_hr_pass = False
timer.process.record('hires')
return output
def process_refine(p: processing.StableDiffusionProcessing, output):
# optional refiner pass or decode
if (output is None) or (output.images is None):
return output
if is_refiner_enabled(p):
if shared.opts.samples_save and not p.do_not_save_samples and shared.opts.save_images_before_refiner and hasattr(shared.sd_model, 'vae'):
save_intermediate(p, latents=output.images, suffix="-before-refiner")
if shared.opts.diffusers_move_base:
shared.log.debug('Moving to CPU: model=base')
sd_models.move_model(shared.sd_model, devices.cpu)
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return output
jobid = shared.state.begin('Refine')
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
if shared.opts.diffusers_move_refiner:
sd_models.move_model(shared.sd_refiner, devices.device)
if hasattr(shared.sd_refiner, 'unet'):
sd_models.move_model(shared.sd_model.unet, devices.device)
if hasattr(shared.sd_refiner, 'transformer'):
sd_models.move_model(shared.sd_model.transformer, devices.device)
p.ops.append('refine')
p.is_refiner_pass = True
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=True)
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE)
shared.sd_refiner = sd_models.set_diffuser_pipe(shared.sd_refiner, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
for i in range(len(output.images)):
image = output.images[i]
noise_level = round(350 * p.denoising_strength)
refiner_output_type = output_type
if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__ or 'Kandinsky' in shared.sd_refiner.__class__.__name__:
image = processing_vae.vae_decode(latents=image, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
p.extra_generation_params['Noise level'] = noise_level
refiner_output_type = 'np'
update_sampler(p, shared.sd_refiner, second_pass=True)
shared.opts.prompt_attention = 'fixed'
refiner_args = set_pipeline_args(
p=p,
model=shared.sd_refiner,
prompts=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts[i],
negative_prompts=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts[i],
num_inference_steps=calculate_refiner_steps(p),
eta=shared.opts.scheduler_eta,
# strength=p.denoising_strength,
noise_level=noise_level, # StableDiffusionUpscalePipeline only
guidance_scale=p.image_cfg_scale if p.image_cfg_scale is not None else p.cfg_scale,
guidance_rescale=p.diffusers_guidance_rescale,
denoising_start=p.refiner_start if p.refiner_start > 0 and p.refiner_start < 1 else None,
denoising_end=1 if p.refiner_start > 0 and p.refiner_start < 1 else None,
image=image,
output_type=refiner_output_type,
clip_skip=p.clip_skip,
prompt_attention='fixed',
desc='Refiner',
)
refiner_steps = refiner_args.get('prior_num_inference_steps', None) or p.steps or refiner_args.get('num_inference_steps', None)
shared.state.update(get_job_name(p, shared.sd_refiner), refiner_steps, 1)
try:
if 'requires_aesthetics_score' in shared.sd_refiner.config: # sdxl-model needs false and sdxl-refiner needs true
shared.sd_refiner.register_to_config(requires_aesthetics_score = getattr(shared.sd_refiner, 'tokenizer', None) is None)
output = shared.sd_refiner(**refiner_args) # pylint: disable=not-callable
if isinstance(output, dict):
output = SimpleNamespace(**output)
if hasattr(output, 'images'):
shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
sd_models_compile.openvino_post_compile(op="refiner")
except AssertionError as e:
shared.log.info(e)
except RuntimeError as e:
shared.state.interrupted = True
shared.log.error(f'Processing step=refine: args={refiner_args} {e}')
errors.display(e, 'Processing')
modelstats.analyze()
if shared.opts.diffusers_offload_mode == "balanced":
shared.sd_refiner = sd_models.apply_balanced_offload(shared.sd_refiner)
elif shared.opts.diffusers_move_refiner:
shared.log.debug('Moving to CPU: model=refiner')
sd_models.move_model(shared.sd_refiner, devices.cpu)
shared.state.end(jobid)
shared.state.nextjob()
p.is_refiner_pass = False
timer.process.record('refine')
return output
def process_decode(p: processing.StableDiffusionProcessing, output):
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model, exclude=['vae'])
if output is not None:
if hasattr(output, 'bytes') and output.bytes is not None:
shared.log.debug(f'Generated: bytes={len(output.bytes)}')
return output
if not hasattr(output, 'images') and hasattr(output, 'frames'):
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
output.images = output.frames[0]
if output.images is not None and len(output.images) > 0 and isinstance(output.images[0], Image.Image):
return output.images
model = shared.sd_model if not is_refiner_enabled(p) else shared.sd_refiner
if not hasattr(model, 'vae'):
if hasattr(model, 'pipe') and hasattr(model.pipe, 'vae'):
model = model.pipe
if (hasattr(model, "vae") or hasattr(model, "vqgan")) and (output.images is not None) and (len(output.images) > 0):
if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0))
height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0))
else:
width = getattr(p, 'width', 0)
height = getattr(p, 'height', 0)
frames = p.task_args.get('num_frames', None) or getattr(p, 'frames', None)
if isinstance(output.images, list):
results = []
for i in range(len(output.images)):
result_batch = processing_vae.vae_decode(
latents = output.images[i],
model = model,
vae_type = p.vae_type,
width = width,
height = height,
frames = frames,
)
for result in list(result_batch):
results.append(result)
else:
results = processing_vae.vae_decode(
latents = output.images,
model = model,
vae_type = p.vae_type,
width = width,
height = height,
frames = frames,
)
if not isinstance(results, list):
results = list(results)
elif hasattr(output, 'images'):
results = output.images
else:
shared.log.warning('Processing: no results')
results = []
else:
shared.log.warning('Processing: no results')
results = []
return results
def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
if sd_models.get_diffusers_task(sd_model) == sd_models.DiffusersTaskType.INPAINTING and getattr(p, 'image_mask', None) is None and p.task_args.get('image_mask', None) is None and getattr(p, 'mask', None) is None:
shared.log.warning('Processing: mode=inpaint mask=None')
sd_model = sd_models.set_diffuser_pipe(sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
if shared.opts.cuda_compile_backend == "olive-ai":
sd_model = olive_check_parameters_changed(p, is_refiner_enabled(p))
if sd_model.__class__.__name__ == "OnnxRawPipeline":
sd_model = preprocess_onnx_pipeline(p)
global orig_pipeline # pylint: disable=global-statement
orig_pipeline = sd_model # processed ONNX pipeline should not be replaced with original pipeline.
if getattr(sd_model, "current_attn_name", None) != shared.opts.cross_attention_optimization:
shared.log.info(f"Setting attention optimization: {shared.opts.cross_attention_optimization}")
attention.set_diffusers_attention(sd_model)
return sd_model
def validate_pipeline(p: processing.StableDiffusionProcessing):
from modules.video_models.models_def import models as video_models
models_cls = []
for family in video_models:
for m in video_models[family]:
if m.repo_cls is not None:
models_cls.append(m.repo_cls.__name__)
if m.custom is not None:
models_cls.append(m.custom)
is_video_model = shared.sd_model.__class__.__name__ in models_cls
override_video_pipelines = ['WanPipeline', 'WanImageToVideoPipeline', 'WanVACEPipeline']
is_video_pipeline = ('video' in p.__class__.__name__.lower()) or (shared.sd_model.__class__.__name__ in override_video_pipelines)
if is_video_model and not is_video_pipeline:
shared.log.error(f'Mismatch: type={shared.sd_model_type} cls={shared.sd_model.__class__.__name__} request={p.__class__.__name__} video model with non-video pipeline')
return False
elif not is_video_model and is_video_pipeline:
shared.log.error(f'Mismatch: type={shared.sd_model_type} cls={shared.sd_model.__class__.__name__} request={p.__class__.__name__} non-video model with video pipeline')
return False
return True
def process_diffusers(p: processing.StableDiffusionProcessing):
results = []
if debug:
shared.log.trace(f'Process diffusers args: {vars(p)}')
if not validate_pipeline(p):
return results
p = restore_state(p)
global orig_pipeline # pylint: disable=global-statement
orig_pipeline = shared.sd_model
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return results
# sanitize init_images
if hasattr(p, 'init_images') and not isinstance(getattr(p, 'init_images', []), list):
p.init_images = [p.init_images]
if hasattr(p, 'init_images') and isinstance(getattr(p, 'init_images', []), list):
p.init_images = [i for i in p.init_images if i is not None]
if len(getattr(p, 'init_images', [])) > 0:
while len(p.init_images) < len(p.prompts):
p.init_images.append(p.init_images[-1])
# pipeline type is set earlier in processing, but check for sanity
is_control = getattr(p, 'is_control', False) is True
has_images = len(getattr(p, 'init_images', [])) > 0
if (sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE) and (not has_images) and (not is_control):
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE) # reset pipeline
if hasattr(shared.sd_model, 'unet') and hasattr(shared.sd_model.unet, 'config') and hasattr(shared.sd_model.unet.config, 'in_channels') and shared.sd_model.unet.config.in_channels == 9 and not is_control:
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.INPAINTING) # force pipeline
if len(getattr(p, 'init_images', [])) == 0:
p.init_images = [TF.to_pil_image(torch.rand((3, getattr(p, 'height', 512), getattr(p, 'width', 512))))]
if p.prompts is None or len(p.prompts) == 0:
p.prompts = p.all_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
if p.negative_prompts is None or len(p.negative_prompts) == 0:
p.negative_prompts = p.all_negative_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
if hasattr(p, 'dummy'):
images = [Image.new(mode='RGB', size=(p.width, p.height))]
return images
if 'base' not in p.skip:
output = process_base(p)
else:
images, _index=shared.history.selected
output = SimpleNamespace(images=images)
if (output is None or (hasattr(output, 'images') and len(output.images) == 0)) and has_images:
if output is not None:
shared.log.debug('Processing: using input as base output')
output.images = p.init_images
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return results
if 'hires' not in p.skip:
output = process_hires(p, output)
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return results
if 'refine' not in p.skip:
output = process_refine(p, output)
if shared.state.interrupted or shared.state.skipped:
shared.sd_model = orig_pipeline
return results
extra_networks.deactivate(p)
timer.process.add('lora', lora_common.timer.total)
lora_common.timer.clear(complete=True)
results = process_decode(p, output)
timer.process.record('decode')
shared.sd_model = orig_pipeline
if p.state == '':
global last_p # pylint: disable=global-statement
last_p = p
return results