1
0
mirror of https://github.com/vladmandic/sdnext.git synced 2026-01-27 15:02:48 +03:00
Files
sdnext/scripts/mulan.py
Vladimir Mandic e8b5ea3847 major refactor: remove backend original
Signed-off-by: Vladimir Mandic <mandic00@live.com>
2025-07-05 13:16:46 -04:00

136 lines
6.0 KiB
Python

# https://github.com/mulanai/MuLan
# https://huggingface.co/mulanai/mulan-lang-adapter
# https://huggingface.co/OpenGVLab/InternVL-14B-224px
"""
- [MuLan](https://github.com/mulanai/MuLan) Multi-langunage prompts - wirte your prompts in ~110 auto-detected languages!
Compatible with SD15 and SDXL
Enable in scripts -> MuLan and set encoder to `InternVL-14B-224px` encoder
(that is currently only supported encoder, but others will be added)
Note: Model will be auto-downloaded on first use: note its huge size of 27GB
Even executing it in FP16 context will require ~16GB of VRAM for text encoder alone
*Note*: Uses fixed prompt parser, so no prompt attention will be used
Examples:
- English: photo of a beautiful woman wearing a white bikini on a beach with a city skyline in the background
- Croatian: fotografija lijepe žene u bijelom bikiniju na plaži s gradskim obzorom u pozadini
- Italian: Foto di una bella donna che indossa un bikini bianco su una spiaggia con lo skyline di una città sullo sfondo
- Spanish: Foto de una hermosa mujer con un bikini blanco en una playa con un horizonte de la ciudad en el fondo
- German: Foto einer schönen Frau in einem weißen Bikini an einem Strand mit einer Skyline der Stadt im Hintergrund
- Arabic: صورة لامرأة جميلة ترتدي بيكيني أبيض على شاطئ مع أفق المدينة في الخلفية
- Japanese: 街のスカイラインを背景にビーチで白いビキニを着た美しい女性の写真
- Chinese: 一个美丽的女人在海滩上穿着白色比基尼的照片, 背景是城市天际线
- Korean: 도시의 스카이라인을 배경으로 해변에서 흰색 비키니를 입은 아름 다운 여성의 사진
"""
import gradio as gr
from modules import shared, scripts_manager, processing, devices
ENCODERS =[
# 'None',
'OpenGVLab/InternVL-14B-224px',
# 'OpenGVLab/InternViT-6B-224px',
# 'OpenGVLab/InternViT-6B-448px-V1-0',
# 'OpenGVLab/InternViT-6B-448px-V1-2',
# 'OpenGVLab/InternViT-6B-448px-V1-5',
]
GITPATH = 'git+https://github.com/mulanai/MuLan'
pipe_type = None
adapter = None
text_encoder = None
tokenizer = None
text_encoder_path = None
class Script(scripts_manager.Script):
def title(self):
return 'MuLan: Multi Language Prompts'
def show(self, is_img2img):
return True
def ui(self, _is_img2img):
with gr.Row():
gr.HTML('<a href="https://github.com/mulanai/MuLan">&nbsp MuLan: Multi Language Prompts</a><br>')
with gr.Row():
selected_encoder = gr.Dropdown(label='Encoder', choices=ENCODERS, value=ENCODERS[0])
return [selected_encoder]
def run(self, p: processing.StableDiffusionProcessing, selected_encoder): # pylint: disable=arguments-differ
global pipe_type, adapter, text_encoder, tokenizer, text_encoder_path # pylint: disable=global-statement
if not selected_encoder or selected_encoder == 'None':
return None
# create pipeline
if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl':
shared.log.error(f'MuLan: incorrect base model: {shared.sd_model.__class__.__name__}')
return None
adapter_path = None
if shared.sd_model_type == 'sd':
adapter_path = 'mulanai/mulan-lang-adapter::sd15_aesthetic.pth'
if shared.sd_model_type == 'sdxl':
adapter_path = 'mulanai/mulan-lang-adapter::sdxl_aesthetic.pth'
if adapter_path is None:
return None
# install-on-demand
import installer
installer.install(GITPATH, 'mulankit')
import mulankit
# backup pipeline and params
orig_pipeline = shared.sd_model
orig_prompt_attention = shared.opts.prompt_attention
# mulan only works with single image, single prompt and in fixed attention
p.batch_size = 1
p.n_iter = 1
shared.opts.prompt_attention = 'fixed'
if isinstance(p.prompt, list):
p.prompt = p.prompt[0]
p.task_args['prompt'] = p.prompt
if isinstance(p.negative_prompt, list):
p.prompt = p.negative_prompt[0]
p.task_args['negative_prompt'] = p.negative_prompt
if pipe_type != ('sd15' if shared.sd_model_type == 'sd' else 'sdxl'):
pipe_type = 'sd15' if shared.sd_model_type == 'sd' else 'sdxl'
adapter = None
if text_encoder is None or tokenizer is None or text_encoder_path != selected_encoder:
text_encoder_path = selected_encoder
shared.log.debug(f'MuLan loading: encoder="{text_encoder_path}"')
text_encoder = None
tokenizer = None
devices.torch_gc(force=True)
text_encoder, tokenizer = mulankit.api.load_internvl(text_encoder_path, text_encoder, tokenizer, torch_dtype=shared.sd_model.text_encoder.dtype)
devices.torch_gc(force=True)
if adapter is None:
shared.log.debug(f'MuLan loading: adapter="{adapter_path}"')
adapter = None
devices.torch_gc(force=True)
adapter = mulankit.api.load_adapter(adapter_path, type=pipe_type)
devices.torch_gc(force=True)
if not getattr(shared.sd_model, 'mulan', False):
shared.log.info(f'MuLan apply: adapter="{adapter_path}" encoder="{text_encoder_path}"')
# mulankit.setup(force_sdxl_zero_empty_prompt=False, force_sdxl_zero_pool_prompt=False)
shared.sd_model = mulankit.transform(shared.sd_model,
adapter=adapter,
adapter_path=adapter_path,
text_encoder=text_encoder,
text_encoder_path=text_encoder_path,
pipe_type=pipe_type,
replace=False)
shared.sd_model.mulan = True
devices.torch_gc(force=True)
processing.fix_seed(p)
processed: processing.Processed = processing.process_images(p) # runs processing using main loop
# restore pipeline and params
shared.opts.data['prompt_attention'] = orig_prompt_attention
shared.sd_model = orig_pipeline
return processed