# https://github.com/mulanai/MuLan # https://huggingface.co/mulanai/mulan-lang-adapter # https://huggingface.co/OpenGVLab/InternVL-14B-224px """ - [MuLan](https://github.com/mulanai/MuLan) Multi-langunage prompts - wirte your prompts in ~110 auto-detected languages! Compatible with SD15 and SDXL Enable in scripts -> MuLan and set encoder to `InternVL-14B-224px` encoder (that is currently only supported encoder, but others will be added) Note: Model will be auto-downloaded on first use: note its huge size of 27GB Even executing it in FP16 context will require ~16GB of VRAM for text encoder alone *Note*: Uses fixed prompt parser, so no prompt attention will be used Examples: - English: photo of a beautiful woman wearing a white bikini on a beach with a city skyline in the background - Croatian: fotografija lijepe žene u bijelom bikiniju na plaži s gradskim obzorom u pozadini - Italian: Foto di una bella donna che indossa un bikini bianco su una spiaggia con lo skyline di una città sullo sfondo - Spanish: Foto de una hermosa mujer con un bikini blanco en una playa con un horizonte de la ciudad en el fondo - German: Foto einer schönen Frau in einem weißen Bikini an einem Strand mit einer Skyline der Stadt im Hintergrund - Arabic: صورة لامرأة جميلة ترتدي بيكيني أبيض على شاطئ مع أفق المدينة في الخلفية - Japanese: 街のスカイラインを背景にビーチで白いビキニを着た美しい女性の写真 - Chinese: 一个美丽的女人在海滩上穿着白色比基尼的照片, 背景是城市天际线 - Korean: 도시의 스카이라인을 배경으로 해변에서 흰색 비키니를 입은 아름 다운 여성의 사진 """ import gradio as gr from modules import shared, scripts_manager, processing, devices ENCODERS =[ # 'None', 'OpenGVLab/InternVL-14B-224px', # 'OpenGVLab/InternViT-6B-224px', # 'OpenGVLab/InternViT-6B-448px-V1-0', # 'OpenGVLab/InternViT-6B-448px-V1-2', # 'OpenGVLab/InternViT-6B-448px-V1-5', ] GITPATH = 'git+https://github.com/mulanai/MuLan' pipe_type = None adapter = None text_encoder = None tokenizer = None text_encoder_path = None class Script(scripts_manager.Script): def title(self): return 'MuLan: Multi Language Prompts' def show(self, is_img2img): return True def ui(self, _is_img2img): with gr.Row(): gr.HTML('  MuLan: Multi Language Prompts
') with gr.Row(): selected_encoder = gr.Dropdown(label='Encoder', choices=ENCODERS, value=ENCODERS[0]) return [selected_encoder] def run(self, p: processing.StableDiffusionProcessing, selected_encoder): # pylint: disable=arguments-differ global pipe_type, adapter, text_encoder, tokenizer, text_encoder_path # pylint: disable=global-statement if not selected_encoder or selected_encoder == 'None': return None # create pipeline if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl': shared.log.error(f'MuLan: incorrect base model: {shared.sd_model.__class__.__name__}') return None adapter_path = None if shared.sd_model_type == 'sd': adapter_path = 'mulanai/mulan-lang-adapter::sd15_aesthetic.pth' if shared.sd_model_type == 'sdxl': adapter_path = 'mulanai/mulan-lang-adapter::sdxl_aesthetic.pth' if adapter_path is None: return None # install-on-demand import installer installer.install(GITPATH, 'mulankit') import mulankit # backup pipeline and params orig_pipeline = shared.sd_model orig_prompt_attention = shared.opts.prompt_attention # mulan only works with single image, single prompt and in fixed attention p.batch_size = 1 p.n_iter = 1 shared.opts.prompt_attention = 'fixed' if isinstance(p.prompt, list): p.prompt = p.prompt[0] p.task_args['prompt'] = p.prompt if isinstance(p.negative_prompt, list): p.prompt = p.negative_prompt[0] p.task_args['negative_prompt'] = p.negative_prompt if pipe_type != ('sd15' if shared.sd_model_type == 'sd' else 'sdxl'): pipe_type = 'sd15' if shared.sd_model_type == 'sd' else 'sdxl' adapter = None if text_encoder is None or tokenizer is None or text_encoder_path != selected_encoder: text_encoder_path = selected_encoder shared.log.debug(f'MuLan loading: encoder="{text_encoder_path}"') text_encoder = None tokenizer = None devices.torch_gc(force=True) text_encoder, tokenizer = mulankit.api.load_internvl(text_encoder_path, text_encoder, tokenizer, torch_dtype=shared.sd_model.text_encoder.dtype) devices.torch_gc(force=True) if adapter is None: shared.log.debug(f'MuLan loading: adapter="{adapter_path}"') adapter = None devices.torch_gc(force=True) adapter = mulankit.api.load_adapter(adapter_path, type=pipe_type) devices.torch_gc(force=True) if not getattr(shared.sd_model, 'mulan', False): shared.log.info(f'MuLan apply: adapter="{adapter_path}" encoder="{text_encoder_path}"') # mulankit.setup(force_sdxl_zero_empty_prompt=False, force_sdxl_zero_pool_prompt=False) shared.sd_model = mulankit.transform(shared.sd_model, adapter=adapter, adapter_path=adapter_path, text_encoder=text_encoder, text_encoder_path=text_encoder_path, pipe_type=pipe_type, replace=False) shared.sd_model.mulan = True devices.torch_gc(force=True) processing.fix_seed(p) processed: processing.Processed = processing.process_images(p) # runs processing using main loop # restore pipeline and params shared.opts.data['prompt_attention'] = orig_prompt_attention shared.sd_model = orig_pipeline return processed