from types import SimpleNamespace import os import time import numpy as np import torch import torchvision.transforms.functional as TF from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, hidiffusion from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler from modules.processing_args import set_pipeline_args from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None debug('Trace: DIFFUSERS') def process_diffusers(p: processing.StableDiffusionProcessing): debug(f'Process diffusers args: {vars(p)}') orig_pipeline = shared.sd_model results = [] def is_txt2img(): return sd_models.get_diffusers_task(shared.sd_model) == sd_models.DiffusersTaskType.TEXT_2_IMAGE def is_refiner_enabled(): return p.enable_hr and p.refiner_steps > 0 and p.refiner_start > 0 and p.refiner_start < 1 and shared.sd_refiner is not None def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): if sd_models.get_diffusers_task(sd_model) == sd_models.DiffusersTaskType.INPAINTING and getattr(p, 'image_mask', None) is None and p.task_args.get('image_mask', None) is None and getattr(p, 'mask', None) is None: shared.log.warning('Processing: mode=inpaint mask=None') sd_model = sd_models.set_diffuser_pipe(sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) if shared.opts.cuda_compile_backend == "olive-ai": sd_model = olive_check_parameters_changed(p, is_refiner_enabled()) if sd_model.__class__.__name__ == "OnnxRawPipeline": sd_model = preprocess_onnx_pipeline(p) nonlocal orig_pipeline orig_pipeline = sd_model # processed ONNX pipeline should not be replaced with original pipeline. if getattr(sd_model, "current_attn_name", None) != shared.opts.cross_attention_optimization: shared.log.info(f"Setting attention optimization: {shared.opts.cross_attention_optimization}") sd_models.set_diffusers_attention(sd_model) return sd_model # sanitize init_images if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None: del p.init_images if hasattr(p, 'init_images') and not isinstance(getattr(p, 'init_images', []), list): p.init_images = [p.init_images] if len(getattr(p, 'init_images', [])) > 0: while len(p.init_images) < len(p.prompts): p.init_images.append(p.init_images[-1]) if shared.state.interrupted or shared.state.skipped: shared.sd_model = orig_pipeline return results # pipeline type is set earlier in processing, but check for sanity is_control = getattr(p, 'is_control', False) is True has_images = len(getattr(p, 'init_images' ,[])) > 0 if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE and not has_images and not is_control: shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE) # reset pipeline if hasattr(shared.sd_model, 'unet') and hasattr(shared.sd_model.unet, 'config') and hasattr(shared.sd_model.unet.config, 'in_channels') and shared.sd_model.unet.config.in_channels == 9 and not is_control: shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.INPAINTING) # force pipeline if len(getattr(p, 'init_images', [])) == 0: p.init_images = [TF.to_pil_image(torch.rand((3, getattr(p, 'height', 512), getattr(p, 'width', 512))))] sd_models.move_model(shared.sd_model, devices.device) sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes use_refiner_start = is_txt2img() and is_refiner_enabled() and not p.is_hr_pass and p.refiner_start > 0 and p.refiner_start < 1 use_denoise_start = not is_txt2img() and p.refiner_start > 0 and p.refiner_start < 1 shared.sd_model = update_pipeline(shared.sd_model, p) shared.log.info(f'Base: class={shared.sd_model.__class__.__name__}') update_sampler(p, shared.sd_model) base_args = set_pipeline_args( p=p, model=shared.sd_model, prompts=p.prompts, negative_prompts=p.negative_prompts, prompts_2=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts, negative_prompts_2=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts, num_inference_steps=calculate_base_steps(p, use_refiner_start=use_refiner_start, use_denoise_start=use_denoise_start), eta=shared.opts.scheduler_eta, guidance_scale=p.cfg_scale, guidance_rescale=p.diffusers_guidance_rescale, denoising_start=0 if use_refiner_start else p.refiner_start if use_denoise_start else None, denoising_end=p.refiner_start if use_refiner_start else 1 if use_denoise_start else None, output_type='latent' if hasattr(shared.sd_model, 'vae') else 'np', # output_type='pil', clip_skip=p.clip_skip, desc='Base', ) shared.state.sampling_steps = base_args.get('prior_num_inference_steps', None) or p.steps or base_args.get('num_inference_steps', None) if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1: p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta output = None try: t0 = time.time() sd_models_compile.check_deepcache(enable=True) sd_models.move_model(shared.sd_model, devices.device) hidiffusion.apply(p, shared.sd_model_type) # if 'image' in base_args: # base_args['image'] = set_latents(p) if hasattr(shared.sd_model, 'tgate') and getattr(p, 'gate_step', -1) > 0: base_args['gate_step'] = p.gate_step output = shared.sd_model.tgate(**base_args) # pylint: disable=not-callable else: output = shared.sd_model(**base_args) if isinstance(output, dict): output = SimpleNamespace(**output) hidiffusion.unapply() sd_models_compile.openvino_post_compile(op="base") # only executes on compiled vino models sd_models_compile.check_deepcache(enable=False) if shared.cmd_opts.profile: t1 = time.time() shared.log.debug(f'Profile: pipeline call: {t1-t0:.2f}') if not hasattr(output, 'images') and hasattr(output, 'frames'): if hasattr(output.frames[0], 'shape'): shared.log.debug(f'Generated: frames={output.frames[0].shape[1]}') else: shared.log.debug(f'Generated: frames={len(output.frames[0])}') output.images = output.frames[0] if isinstance(output.images, np.ndarray): output.images = torch.from_numpy(output.images) except AssertionError as e: shared.log.info(e) except ValueError as e: shared.state.interrupted = True shared.log.error(f'Processing: args={base_args} {e}') if shared.cmd_opts.debug: errors.display(e, 'Processing') except RuntimeError as e: shared.state.interrupted = True shared.log.error(f'Processing: args={base_args} {e}') errors.display(e, 'Processing') if hasattr(shared.sd_model, 'embedding_db') and len(shared.sd_model.embedding_db.embeddings_used) > 0: # register used embeddings p.extra_generation_params['Embeddings'] = ', '.join(shared.sd_model.embedding_db.embeddings_used) shared.state.nextjob() if shared.state.interrupted or shared.state.skipped: shared.sd_model = orig_pipeline return results # optional second pass if p.enable_hr: p.is_hr_pass = True p.init_hr(p.hr_scale, p.hr_upscaler, force=p.hr_force) prev_job = shared.state.job # hires runs on original pipeline if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None: shared.sd_model.restore_pipeline() # upscale if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_resize_mode >0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5): shared.log.info(f'Upscale: mode={p.hr_resize_mode} upscaler="{p.hr_upscaler}" context="{p.hr_resize_context}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}') p.ops.append('upscale') if shared.opts.save and not p.do_not_save_samples and shared.opts.save_images_before_highres_fix and hasattr(shared.sd_model, 'vae'): save_intermediate(p, latents=output.images, suffix="-before-hires") shared.state.job = 'Upscale' output.images = resize_hires(p, latents=output.images) sd_hijack_hypertile.hypertile_set(p, hr=True) latent_upscale = shared.latent_upscale_modes.get(p.hr_upscaler, None) if (latent_upscale is not None or p.hr_force) and getattr(p, 'hr_denoising_strength', p.denoising_strength) > 0: p.ops.append('hires') sd_models_compile.openvino_recompile_model(p, hires=True, refiner=False) if shared.sd_model.__class__.__name__ == "OnnxRawPipeline": shared.sd_model = preprocess_onnx_pipeline(p) p.hr_force = True # hires p.denoising_strength = getattr(p, 'hr_denoising_strength', p.denoising_strength) if p.hr_force and p.denoising_strength == 0: shared.log.warning('HiRes skip: denoising=0') p.hr_force = False if p.hr_force: shared.state.job_count = 2 * p.n_iter shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) shared.log.info(f'HiRes: class={shared.sd_model.__class__.__name__} sampler="{p.hr_sampler_name}"') if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__: output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height) if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None: if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0: output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input p.task_args['image'] = output.images # replace so hires uses new output sd_models.move_model(shared.sd_model, devices.device) orig_denoise = p.denoising_strength p.denoising_strength = getattr(p, 'hr_denoising_strength', p.denoising_strength) update_sampler(p, shared.sd_model, second_pass=True) hires_args = set_pipeline_args( p=p, model=shared.sd_model, prompts=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts, negative_prompts=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts, prompts_2=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts, negative_prompts_2=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts, num_inference_steps=calculate_hires_steps(p), eta=shared.opts.scheduler_eta, guidance_scale=p.image_cfg_scale if p.image_cfg_scale is not None else p.cfg_scale, guidance_rescale=p.diffusers_guidance_rescale, output_type='latent' if hasattr(shared.sd_model, 'vae') else 'np', clip_skip=p.clip_skip, image=output.images, strength=p.denoising_strength, desc='Hires', ) shared.state.job = 'HiRes' shared.state.sampling_steps = hires_args.get('prior_num_inference_steps', None) or p.steps or hires_args.get('num_inference_steps', None) try: sd_models_compile.check_deepcache(enable=True) output = shared.sd_model(**hires_args) # pylint: disable=not-callable if isinstance(output, dict): output = SimpleNamespace(**output) sd_models_compile.check_deepcache(enable=False) sd_models_compile.openvino_post_compile(op="base") except AssertionError as e: shared.log.info(e) p.denoising_strength = orig_denoise shared.state.job = prev_job shared.state.nextjob() p.is_hr_pass = False # optional refiner pass or decode if is_refiner_enabled(): prev_job = shared.state.job shared.state.job = 'Refine' shared.state.job_count +=1 if shared.opts.save and not p.do_not_save_samples and shared.opts.save_images_before_refiner and hasattr(shared.sd_model, 'vae'): save_intermediate(p, latents=output.images, suffix="-before-refiner") if shared.opts.diffusers_move_base: shared.log.debug('Moving to CPU: model=base') sd_models.move_model(shared.sd_model, devices.cpu) if shared.state.interrupted or shared.state.skipped: shared.sd_model = orig_pipeline return results if shared.opts.diffusers_offload_mode == "balanced": shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model) if shared.opts.diffusers_move_refiner: sd_models.move_model(shared.sd_refiner, devices.device) p.ops.append('refine') p.is_refiner_pass = True sd_models_compile.openvino_recompile_model(p, hires=False, refiner=True) shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE) shared.sd_refiner = sd_models.set_diffuser_pipe(shared.sd_refiner, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) for i in range(len(output.images)): image = output.images[i] noise_level = round(350 * p.denoising_strength) output_type='latent' if hasattr(shared.sd_refiner, 'vae') else 'np' if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__: image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height) p.extra_generation_params['Noise level'] = noise_level output_type = 'np' if hasattr(p, 'task_args') and p.task_args.get('image', None) is not None and output is not None: # replace input with output so it can be used by hires/refine p.task_args['image'] = image shared.log.info(f'Refiner: class={shared.sd_refiner.__class__.__name__}') update_sampler(p, shared.sd_refiner, second_pass=True) refiner_args = set_pipeline_args( p=p, model=shared.sd_refiner, prompts=[p.refiner_prompt] if len(p.refiner_prompt) > 0 else p.prompts[i], negative_prompts=[p.refiner_negative] if len(p.refiner_negative) > 0 else p.negative_prompts[i], num_inference_steps=calculate_refiner_steps(p), eta=shared.opts.scheduler_eta, # strength=p.denoising_strength, noise_level=noise_level, # StableDiffusionUpscalePipeline only guidance_scale=p.image_cfg_scale if p.image_cfg_scale is not None else p.cfg_scale, guidance_rescale=p.diffusers_guidance_rescale, denoising_start=p.refiner_start if p.refiner_start > 0 and p.refiner_start < 1 else None, denoising_end=1 if p.refiner_start > 0 and p.refiner_start < 1 else None, image=image, output_type=output_type, clip_skip=p.clip_skip, desc='Refiner', ) shared.state.sampling_steps = refiner_args.get('prior_num_inference_steps', None) or p.steps or refiner_args.get('num_inference_steps', None) try: if 'requires_aesthetics_score' in shared.sd_refiner.config: # sdxl-model needs false and sdxl-refiner needs true shared.sd_refiner.register_to_config(requires_aesthetics_score = getattr(shared.sd_refiner, 'tokenizer', None) is None) refiner_output = shared.sd_refiner(**refiner_args) # pylint: disable=not-callable if isinstance(refiner_output, dict): refiner_output = SimpleNamespace(**refiner_output) sd_models_compile.openvino_post_compile(op="refiner") except AssertionError as e: shared.log.info(e) if not shared.state.interrupted and not shared.state.skipped: refiner_images = processing_vae.vae_decode(latents=refiner_output.images, model=shared.sd_refiner, full_quality=True, width=max(p.width, p.hr_upscale_to_x), height=max(p.height, p.hr_upscale_to_y)) for refiner_image in refiner_images: results.append(refiner_image) if shared.opts.diffusers_offload_mode == "balanced": shared.sd_refiner = sd_models.apply_balanced_offload(shared.sd_refiner) elif shared.opts.diffusers_move_refiner: shared.log.debug('Moving to CPU: model=refiner') sd_models.move_model(shared.sd_refiner, devices.cpu) shared.state.job = prev_job shared.state.nextjob() p.is_refiner_pass = False # final decode since there is no refiner if not is_refiner_enabled(): if output is not None: if not hasattr(output, 'images') and hasattr(output, 'frames'): shared.log.debug(f'Generated: frames={len(output.frames[0])}') output.images = output.frames[0] if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0: if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5): width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0)) height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0)) else: width = getattr(p, 'width', 0) height = getattr(p, 'height', 0) results = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, width=width, height=height) elif hasattr(output, 'images'): results = output.images else: shared.log.warning('Processing returned no results') results = [] else: shared.log.warning('Processing returned no results') results = [] shared.sd_model = orig_pipeline return results