diff --git a/example_workflows/wanvideo_1_3B_UniLumos_relight_example_01.json b/example_workflows/wanvideo_1_3B_UniLumos_relight_example_01.json
new file mode 100644
index 0000000..bf0da46
--- /dev/null
+++ b/example_workflows/wanvideo_1_3B_UniLumos_relight_example_01.json
@@ -0,0 +1,1938 @@
+{
+ "id": "206247b6-9fec-4ed2-8927-e4f388c674d4",
+ "revision": 0,
+ "last_node_id": 153,
+ "last_link_id": 279,
+ "nodes": [
+ {
+ "id": 35,
+ "type": "WanVideoTorchCompileSettings",
+ "pos": [
+ -664.2244873046875,
+ -1274.048828125
+ ],
+ "size": [
+ 390.5999755859375,
+ 250
+ ],
+ "flags": {},
+ "order": 0,
+ "mode": 0,
+ "inputs": [],
+ "outputs": [
+ {
+ "name": "torch_compile_args",
+ "type": "WANCOMPILEARGS",
+ "slot_index": 0,
+ "links": []
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+ "Node name for S&R": "WanVideoTorchCompileSettings"
+ },
+ "widgets_values": [
+ "inductor",
+ false,
+ "default",
+ false,
+ 64,
+ true,
+ 128,
+ false,
+ false
+ ],
+ "color": "#223",
+ "bgcolor": "#335"
+ },
+ {
+ "id": 11,
+ "type": "LoadWanVideoT5TextEncoder",
+ "pos": [
+ -443.7521667480469,
+ -535.9060668945312
+ ],
+ "size": [
+ 377.1661376953125,
+ 130
+ ],
+ "flags": {},
+ "order": 1,
+ "mode": 0,
+ "inputs": [],
+ "outputs": [
+ {
+ "name": "wan_t5_model",
+ "type": "WANTEXTENCODER",
+ "slot_index": 0,
+ "links": [
+ 162
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+ "Node name for S&R": "LoadWanVideoT5TextEncoder"
+ },
+ "widgets_values": [
+ "umt5-xxl-enc-bf16.safetensors",
+ "bf16",
+ "offload_device",
+ "disabled"
+ ],
+ "color": "#332922",
+ "bgcolor": "#593930"
+ },
+ {
+ "id": 121,
+ "type": "WanVideoDecode",
+ "pos": [
+ 1233.328125,
+ -938.7442626953125
+ ],
+ "size": [
+ 315,
+ 198
+ ],
+ "flags": {},
+ "order": 24,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "vae",
+ "type": "WANVAE",
+ "link": 241
+ },
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "link": 196
+ }
+ ],
+ "outputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "slot_index": 0,
+ "links": [
+ 214
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+ "Node name for S&R": "WanVideoDecode"
+ },
+ "widgets_values": [
+ false,
+ 272,
+ 272,
+ 144,
+ 128,
+ "default"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 84,
+ "type": "MarkdownNote",
+ "pos": [
+ -1130.9693603515625,
+ -908.9747314453125
+ ],
+ "size": [
+ 408.80950927734375,
+ 131.6868133544922
+ ],
+ "flags": {},
+ "order": 2,
+ "mode": 0,
+ "inputs": [],
+ "outputs": [],
+ "properties": {},
+ "widgets_values": [
+ "Model links:\n\n[https://huggingface.co/Kijai/WanVideo_comfy/tree/main/FlashVSR](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/FlashVSR)\n\nText encoder:\n\n[https://huggingface.co/Kijai/WanVideo_comfy/blob/main/umt5-xxl-enc-bf16.safetensors](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/umt5-xxl-enc-bf16.safetensors)\n"
+ ],
+ "color": "#432",
+ "bgcolor": "#653"
+ },
+ {
+ "id": 140,
+ "type": "WanVideoVAELoader",
+ "pos": [
+ -469.9214988109594,
+ -739.4890567309355
+ ],
+ "size": [
+ 270,
+ 82
+ ],
+ "flags": {},
+ "order": 3,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "compile_args",
+ "shape": 7,
+ "type": "WANCOMPILEARGS",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "vae",
+ "type": "WANVAE",
+ "links": [
+ 239,
+ 240,
+ 241
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d3f33a9f09e774e43500eed1abf7d975997dd1ee",
+ "Node name for S&R": "WanVideoVAELoader"
+ },
+ "widgets_values": [
+ "wanvideo\\Wan2_1_VAE_bf16.safetensors",
+ "bf16"
+ ],
+ "color": "#322",
+ "bgcolor": "#533"
+ },
+ {
+ "id": 129,
+ "type": "WanVideoUniLumosEmbeds",
+ "pos": [
+ 632.0287517443114,
+ -249.0236941522209
+ ],
+ "size": [
+ 291.791015625,
+ 126
+ ],
+ "flags": {},
+ "order": 22,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "foreground_latents",
+ "shape": 7,
+ "type": "LATENT",
+ "link": 253
+ },
+ {
+ "name": "background_latents",
+ "shape": 7,
+ "type": "LATENT",
+ "link": 247
+ },
+ {
+ "name": "width",
+ "type": "INT",
+ "widget": {
+ "name": "width"
+ },
+ "link": 270
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "widget": {
+ "name": "height"
+ },
+ "link": 271
+ },
+ {
+ "name": "num_frames",
+ "type": "INT",
+ "widget": {
+ "name": "num_frames"
+ },
+ "link": 272
+ }
+ ],
+ "outputs": [
+ {
+ "name": "image_embeds",
+ "type": "WANVIDIMAGE_EMBEDS",
+ "links": [
+ 218
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d3f33a9f09e774e43500eed1abf7d975997dd1ee",
+ "Node name for S&R": "WanVideoUniLumosEmbeds"
+ },
+ "widgets_values": [
+ 832,
+ 480,
+ 81
+ ]
+ },
+ {
+ "id": 131,
+ "type": "TransparentBGSession+",
+ "pos": [
+ -1397.6703345939957,
+ 299.3425442320567
+ ],
+ "size": [
+ 299.1265625,
+ 82
+ ],
+ "flags": {},
+ "order": 4,
+ "mode": 0,
+ "inputs": [],
+ "outputs": [
+ {
+ "name": "REMBG_SESSION",
+ "type": "REMBG_SESSION",
+ "links": [
+ 220
+ ]
+ }
+ ],
+ "properties": {
+ "aux_id": "kijai/ComfyUI_essentials",
+ "ver": "76e9d1e4399bd025ce8b12c290753d58f9f53e93",
+ "Node name for S&R": "TransparentBGSession+"
+ },
+ "widgets_values": [
+ "base",
+ true
+ ]
+ },
+ {
+ "id": 132,
+ "type": "ImageRemoveBackground+",
+ "pos": [
+ -1070.6544436657757,
+ 172.50010261795865
+ ],
+ "size": [
+ 236.5494140625,
+ 46
+ ],
+ "flags": {},
+ "order": 15,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "rembg_session",
+ "type": "REMBG_SESSION",
+ "link": 220
+ },
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 251
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": null
+ },
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 248
+ ]
+ }
+ ],
+ "properties": {
+ "aux_id": "kijai/ComfyUI_essentials",
+ "ver": "76e9d1e4399bd025ce8b12c290753d58f9f53e93",
+ "Node name for S&R": "ImageRemoveBackground+"
+ },
+ "widgets_values": []
+ },
+ {
+ "id": 143,
+ "type": "InvertMask",
+ "pos": [
+ -953.89299150393,
+ 265.9695110814195
+ ],
+ "size": [
+ 140,
+ 26
+ ],
+ "flags": {
+ "collapsed": true
+ },
+ "order": 16,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 248
+ }
+ ],
+ "outputs": [
+ {
+ "name": "MASK",
+ "type": "MASK",
+ "links": [
+ 249,
+ 275
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfy-core",
+ "ver": "0.3.68",
+ "Node name for S&R": "InvertMask"
+ },
+ "widgets_values": []
+ },
+ {
+ "id": 138,
+ "type": "WanVideoEncode",
+ "pos": [
+ 41.81258209963073,
+ -175.52415894294688
+ ],
+ "size": [
+ 270,
+ 242
+ ],
+ "flags": {},
+ "order": 19,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "vae",
+ "type": "WANVAE",
+ "link": 240
+ },
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 257
+ },
+ {
+ "name": "mask",
+ "shape": 7,
+ "type": "MASK",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "links": [
+ 253
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d3f33a9f09e774e43500eed1abf7d975997dd1ee",
+ "Node name for S&R": "WanVideoEncode"
+ },
+ "widgets_values": [
+ false,
+ 272,
+ 272,
+ 144,
+ 128,
+ 0,
+ 1
+ ]
+ },
+ {
+ "id": 142,
+ "type": "DrawMaskOnImage",
+ "pos": [
+ -396.8575317980403,
+ 159.54059918670168
+ ],
+ "size": [
+ 270,
+ 102
+ ],
+ "flags": {},
+ "order": 17,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 252
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 249
+ }
+ ],
+ "outputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "links": [
+ 257
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-kjnodes",
+ "ver": "c661baadd9683c0033cd2a6ad90157c6d099a6c2",
+ "Node name for S&R": "DrawMaskOnImage"
+ },
+ "widgets_values": [
+ "127,127,127",
+ "cpu"
+ ]
+ },
+ {
+ "id": 137,
+ "type": "ImageResizeKJv2",
+ "pos": [
+ -757.5118370533156,
+ -188.2085103246569
+ ],
+ "size": [
+ 270,
+ 336
+ ],
+ "flags": {},
+ "order": 14,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 250
+ },
+ {
+ "name": "mask",
+ "shape": 7,
+ "type": "MASK",
+ "link": null
+ },
+ {
+ "name": "width",
+ "type": "INT",
+ "widget": {
+ "name": "width"
+ },
+ "link": 231
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "widget": {
+ "name": "height"
+ },
+ "link": 232
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 251,
+ 252,
+ 276
+ ]
+ },
+ {
+ "name": "width",
+ "type": "INT",
+ "links": [
+ 270
+ ]
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "links": [
+ 271
+ ]
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "links": null
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-kjnodes",
+ "ver": "3fcd22f2fe2be69c3229f192362b91888277cbcb",
+ "Node name for S&R": "ImageResizeKJv2"
+ },
+ "widgets_values": [
+ 640,
+ 640,
+ "lanczos",
+ "crop",
+ "0, 0, 0",
+ "center",
+ 2,
+ "cpu",
+ "
| Output: | 49 x 832 x 480 | 223.95MB |
"
+ ]
+ },
+ {
+ "id": 116,
+ "type": "ImageResizeKJv2",
+ "pos": [
+ -913.566903523739,
+ 605.914177132552
+ ],
+ "size": [
+ 270,
+ 336
+ ],
+ "flags": {},
+ "order": 13,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 206
+ },
+ {
+ "name": "mask",
+ "shape": 7,
+ "type": "MASK",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 268
+ ]
+ },
+ {
+ "name": "width",
+ "type": "INT",
+ "links": [
+ 231
+ ]
+ },
+ {
+ "name": "height",
+ "type": "INT",
+ "links": [
+ 232
+ ]
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "links": null
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-kjnodes",
+ "ver": "3fcd22f2fe2be69c3229f192362b91888277cbcb",
+ "Node name for S&R": "ImageResizeKJv2"
+ },
+ "widgets_values": [
+ 832,
+ 480,
+ "lanczos",
+ "crop",
+ "0, 0, 0",
+ "center",
+ 2,
+ "cpu",
+ "| Output: | 49 x 832 x 480 | 223.95MB |
"
+ ]
+ },
+ {
+ "id": 130,
+ "type": "WanVideoEncode",
+ "pos": [
+ -92.55150017434968,
+ 616.0030055996451
+ ],
+ "size": [
+ 270,
+ 242
+ ],
+ "flags": {},
+ "order": 20,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "vae",
+ "type": "WANVAE",
+ "link": 239
+ },
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 269
+ },
+ {
+ "name": "mask",
+ "shape": 7,
+ "type": "MASK",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "links": [
+ 247
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d3f33a9f09e774e43500eed1abf7d975997dd1ee",
+ "Node name for S&R": "WanVideoEncode"
+ },
+ "widgets_values": [
+ false,
+ 272,
+ 272,
+ 144,
+ 128,
+ 0,
+ 1
+ ]
+ },
+ {
+ "id": 148,
+ "type": "DrawGaussianNoiseOnImage",
+ "pos": [
+ -496.2003871215788,
+ 611.1061525629224
+ ],
+ "size": [
+ 300.6509765625,
+ 126
+ ],
+ "flags": {},
+ "order": 18,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 268
+ },
+ {
+ "name": "mask",
+ "type": "MASK",
+ "link": 275
+ }
+ ],
+ "outputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "links": [
+ 269,
+ 277
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "68392684b5d6c8948b7e051690bfb2a935ed31c9",
+ "Node name for S&R": "DrawGaussianNoiseOnImage"
+ },
+ "widgets_values": [
+ "cpu",
+ 42,
+ "fixed"
+ ]
+ },
+ {
+ "id": 141,
+ "type": "VHS_LoadVideo",
+ "pos": [
+ -1375.0499073306823,
+ -235.4550546032312
+ ],
+ "size": [
+ 247.455078125,
+ 455.2626221778634
+ ],
+ "flags": {},
+ "order": 5,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "meta_batch",
+ "shape": 7,
+ "type": "VHS_BatchManager",
+ "link": null
+ },
+ {
+ "name": "vae",
+ "shape": 7,
+ "type": "VAE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 250
+ ]
+ },
+ {
+ "name": "frame_count",
+ "type": "INT",
+ "links": [
+ 272
+ ]
+ },
+ {
+ "name": "audio",
+ "type": "AUDIO",
+ "links": null
+ },
+ {
+ "name": "video_info",
+ "type": "VHS_VIDEOINFO",
+ "links": null
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-videohelpersuite",
+ "ver": "0edce8ef7ce173ac97a3ed3d6f4636029d1a4530",
+ "Node name for S&R": "VHS_LoadVideo"
+ },
+ "widgets_values": {
+ "video": "example_3.mp4",
+ "force_rate": 0,
+ "custom_width": 0,
+ "custom_height": 0,
+ "frame_load_cap": 0,
+ "skip_first_frames": 0,
+ "select_every_nth": 1,
+ "format": "AnimateDiff",
+ "choose video to upload": "image",
+ "videopreview": {
+ "hidden": false,
+ "paused": false,
+ "params": {
+ "filename": "example_3.mp4",
+ "type": "input",
+ "format": "video/mp4",
+ "force_rate": 0,
+ "custom_width": 0,
+ "custom_height": 0,
+ "frame_load_cap": 0,
+ "skip_first_frames": 0,
+ "select_every_nth": 1
+ }
+ }
+ }
+ },
+ {
+ "id": 22,
+ "type": "WanVideoModelLoader",
+ "pos": [
+ -72.96549987792969,
+ -1239.5343017578125
+ ],
+ "size": [
+ 624.1026611328125,
+ 338
+ ],
+ "flags": {},
+ "order": 6,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "compile_args",
+ "shape": 7,
+ "type": "WANCOMPILEARGS",
+ "link": null
+ },
+ {
+ "name": "block_swap_args",
+ "shape": 7,
+ "type": "BLOCKSWAPARGS",
+ "link": null
+ },
+ {
+ "name": "lora",
+ "shape": 7,
+ "type": "WANVIDLORA",
+ "link": null
+ },
+ {
+ "name": "vram_management_args",
+ "shape": 7,
+ "type": "VRAM_MANAGEMENTARGS",
+ "link": null
+ },
+ {
+ "name": "extra_model",
+ "shape": 7,
+ "type": "VACEPATH",
+ "link": null
+ },
+ {
+ "name": "fantasytalking_model",
+ "shape": 7,
+ "type": "FANTASYTALKINGMODEL",
+ "link": null
+ },
+ {
+ "name": "multitalk_model",
+ "shape": 7,
+ "type": "MULTITALKMODEL",
+ "link": null
+ },
+ {
+ "name": "fantasyportrait_model",
+ "shape": 7,
+ "type": "FANTASYPORTRAITMODEL",
+ "link": null
+ },
+ {
+ "name": "vace_model",
+ "shape": 7,
+ "type": "VACEPATH",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "model",
+ "type": "WANVIDEOMODEL",
+ "slot_index": 0,
+ "links": [
+ 163,
+ 258
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+ "Node name for S&R": "WanVideoModelLoader"
+ },
+ "widgets_values": [
+ "WanVideo\\UniLumos\\Wan2_1_UniLumos_1_3B_bf16.safetensors",
+ "bf16",
+ "disabled",
+ "offload_device",
+ "sdpa",
+ "default"
+ ],
+ "color": "#223",
+ "bgcolor": "#335"
+ },
+ {
+ "id": 149,
+ "type": "MarkdownNote",
+ "pos": [
+ -55.83642934666729,
+ -1394.8578370053212
+ ],
+ "size": [
+ 482.3308796642473,
+ 88
+ ],
+ "flags": {},
+ "order": 7,
+ "mode": 0,
+ "inputs": [],
+ "outputs": [],
+ "properties": {},
+ "widgets_values": [
+ "[https://huggingface.co/Kijai/WanVideo_comfy/blob/main/UniLumos/Wan2_1_UniLumos_1_3B_bf16.safetensors](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/UniLumos/Wan2_1_UniLumos_1_3B_bf16.safetensors)"
+ ],
+ "color": "#432",
+ "bgcolor": "#653"
+ },
+ {
+ "id": 150,
+ "type": "ImageConcatMulti",
+ "pos": [
+ 1593.3320649456534,
+ -1189.6308807064013
+ ],
+ "size": [
+ 270,
+ 170
+ ],
+ "flags": {},
+ "order": 21,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image_1",
+ "type": "IMAGE",
+ "link": 276
+ },
+ {
+ "name": "image_2",
+ "shape": 7,
+ "type": "IMAGE",
+ "link": 277
+ }
+ ],
+ "outputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "links": [
+ 278
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-kjnodes",
+ "ver": "3fcd22f2fe2be69c3229f192362b91888277cbcb"
+ },
+ "widgets_values": [
+ 2,
+ "down",
+ true,
+ null
+ ]
+ },
+ {
+ "id": 126,
+ "type": "GetImageSizeAndCount",
+ "pos": [
+ 1618.2845458984375,
+ -665.3915405273438
+ ],
+ "size": [
+ 190.86484375,
+ 86
+ ],
+ "flags": {},
+ "order": 25,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "link": 214
+ }
+ ],
+ "outputs": [
+ {
+ "name": "image",
+ "type": "IMAGE",
+ "links": [
+ 279
+ ]
+ },
+ {
+ "label": "832 width",
+ "name": "width",
+ "type": "INT",
+ "links": null
+ },
+ {
+ "label": "480 height",
+ "name": "height",
+ "type": "INT",
+ "links": null
+ },
+ {
+ "label": "49 count",
+ "name": "count",
+ "type": "INT",
+ "links": null
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-kjnodes",
+ "ver": "3fcd22f2fe2be69c3229f192362b91888277cbcb",
+ "Node name for S&R": "GetImageSizeAndCount"
+ },
+ "widgets_values": []
+ },
+ {
+ "id": 117,
+ "type": "ImageConcatMulti",
+ "pos": [
+ 1593.6346855323757,
+ -950.6464694929421
+ ],
+ "size": [
+ 270,
+ 170
+ ],
+ "flags": {},
+ "order": 26,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "image_1",
+ "type": "IMAGE",
+ "link": 279
+ },
+ {
+ "name": "image_2",
+ "shape": 7,
+ "type": "IMAGE",
+ "link": 278
+ }
+ ],
+ "outputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "links": [
+ 264
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-kjnodes",
+ "ver": "3fcd22f2fe2be69c3229f192362b91888277cbcb"
+ },
+ "widgets_values": [
+ 2,
+ "left",
+ true,
+ null
+ ]
+ },
+ {
+ "id": 146,
+ "type": "WanVideoLoraSelect",
+ "pos": [
+ 701.5992933320292,
+ -1474.8119218889833
+ ],
+ "size": [
+ 388.7068013400708,
+ 212
+ ],
+ "flags": {},
+ "order": 8,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "prev_lora",
+ "shape": 7,
+ "type": "WANVIDLORA",
+ "link": null
+ },
+ {
+ "name": "blocks",
+ "shape": 7,
+ "type": "SELECTEDBLOCKS",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "lora",
+ "type": "WANVIDLORA",
+ "links": [
+ 260
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d3f33a9f09e774e43500eed1abf7d975997dd1ee",
+ "Node name for S&R": "WanVideoLoraSelect"
+ },
+ "widgets_values": [
+ "Wan21_CausVid_bidirect2_T2V_1_3B_lora_rank32.safetensors",
+ 1,
+ false,
+ false,
+ "Metadata
| Metadata |
| ss_network_alpha | Dynamic |
| ss_training_comment | Dynamic resize with sv_fro: 0.9 from 265; |
| ss_network_dim | Dynamic |
"
+ ]
+ },
+ {
+ "id": 153,
+ "type": "Note",
+ "pos": [
+ 1152.161577728121,
+ -1358.6514950636351
+ ],
+ "size": [
+ 293.1480353732686,
+ 136.57397644805678
+ ],
+ "flags": {},
+ "order": 9,
+ "mode": 0,
+ "inputs": [],
+ "outputs": [],
+ "properties": {},
+ "widgets_values": [
+ "Without distill LoRA, default settings were cfg 6.0, 25 steps"
+ ],
+ "color": "#432",
+ "bgcolor": "#653"
+ },
+ {
+ "id": 145,
+ "type": "WanVideoSetLoRAs",
+ "pos": [
+ 846.8432925305337,
+ -1099.7290129043226
+ ],
+ "size": [
+ 174.5337890625,
+ 46
+ ],
+ "flags": {},
+ "order": 12,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "WANVIDEOMODEL",
+ "link": 258
+ },
+ {
+ "name": "lora",
+ "shape": 7,
+ "type": "WANVIDLORA",
+ "link": 260
+ }
+ ],
+ "outputs": [
+ {
+ "name": "model",
+ "type": "WANVIDEOMODEL",
+ "links": [
+ 259
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d3f33a9f09e774e43500eed1abf7d975997dd1ee",
+ "Node name for S&R": "WanVideoSetLoRAs"
+ },
+ "widgets_values": []
+ },
+ {
+ "id": 27,
+ "type": "WanVideoSampler",
+ "pos": [
+ 1221.549560546875,
+ -644.4454956054688
+ ],
+ "size": [
+ 315,
+ 874.1923076923076
+ ],
+ "flags": {},
+ "order": 23,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "model",
+ "type": "WANVIDEOMODEL",
+ "link": 259
+ },
+ {
+ "name": "image_embeds",
+ "type": "WANVIDIMAGE_EMBEDS",
+ "link": 218
+ },
+ {
+ "name": "text_embeds",
+ "shape": 7,
+ "type": "WANVIDEOTEXTEMBEDS",
+ "link": 166
+ },
+ {
+ "name": "samples",
+ "shape": 7,
+ "type": "LATENT",
+ "link": null
+ },
+ {
+ "name": "feta_args",
+ "shape": 7,
+ "type": "FETAARGS",
+ "link": null
+ },
+ {
+ "name": "context_options",
+ "shape": 7,
+ "type": "WANVIDCONTEXT",
+ "link": null
+ },
+ {
+ "name": "cache_args",
+ "shape": 7,
+ "type": "CACHEARGS",
+ "link": null
+ },
+ {
+ "name": "flowedit_args",
+ "shape": 7,
+ "type": "FLOWEDITARGS",
+ "link": null
+ },
+ {
+ "name": "slg_args",
+ "shape": 7,
+ "type": "SLGARGS",
+ "link": null
+ },
+ {
+ "name": "loop_args",
+ "shape": 7,
+ "type": "LOOPARGS",
+ "link": null
+ },
+ {
+ "name": "experimental_args",
+ "shape": 7,
+ "type": "EXPERIMENTALARGS",
+ "link": null
+ },
+ {
+ "name": "sigmas",
+ "shape": 7,
+ "type": "SIGMAS",
+ "link": null
+ },
+ {
+ "name": "unianimate_poses",
+ "shape": 7,
+ "type": "UNIANIMATE_POSE",
+ "link": null
+ },
+ {
+ "name": "fantasytalking_embeds",
+ "shape": 7,
+ "type": "FANTASYTALKING_EMBEDS",
+ "link": null
+ },
+ {
+ "name": "uni3c_embeds",
+ "shape": 7,
+ "type": "UNI3C_EMBEDS",
+ "link": null
+ },
+ {
+ "name": "multitalk_embeds",
+ "shape": 7,
+ "type": "MULTITALK_EMBEDS",
+ "link": null
+ },
+ {
+ "name": "freeinit_args",
+ "shape": 7,
+ "type": "FREEINITARGS",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "samples",
+ "type": "LATENT",
+ "slot_index": 0,
+ "links": [
+ 196
+ ]
+ },
+ {
+ "name": "denoised_samples",
+ "type": "LATENT",
+ "links": null
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+ "Node name for S&R": "WanVideoSampler"
+ },
+ "widgets_values": [
+ 6,
+ 1,
+ 8,
+ 74,
+ "fixed",
+ true,
+ "lcm",
+ 0,
+ 1,
+ false,
+ "comfy",
+ 0,
+ -1,
+ false
+ ]
+ },
+ {
+ "id": 104,
+ "type": "WanVideoTextEncode",
+ "pos": [
+ 35.80370644531885,
+ -784.2830795966373
+ ],
+ "size": [
+ 541.4558526699016,
+ 453.9207161620674
+ ],
+ "flags": {},
+ "order": 11,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "t5",
+ "shape": 7,
+ "type": "WANTEXTENCODER",
+ "link": 162
+ },
+ {
+ "name": "model_to_offload",
+ "shape": 7,
+ "type": "WANVIDEOMODEL",
+ "link": 163
+ }
+ ],
+ "outputs": [
+ {
+ "name": "text_embeds",
+ "type": "WANVIDEOTEXTEMBEDS",
+ "slot_index": 0,
+ "links": [
+ 166
+ ]
+ }
+ ],
+ "properties": {
+ "cnr_id": "ComfyUI-WanVideoWrapper",
+ "ver": "d9b1f4d1a5aea91d101ae97a54714a5861af3f50",
+ "Node name for S&R": "WanVideoTextEncode"
+ },
+ "widgets_values": [
+ "A man with a beard, wearing a dark shirt and a light-colored hat, is playing a black electric bass guitar in a medium shot. The bass guitar features a white pickguard with a glittery texture, four strings, a black bridge, and four control knobs. The headstock is black with gold tuning pegs and has the brand name \"Spector\" written in white. The man's left hand is on the fretboard, while his right hand is plucking the strings. He is wearing a simple gold ring with a small, round, clear stone. The background is a plain, light-colored wall, providing a neutral backdrop that highlights the musician and his instrument. Direction of Light: Back Light, Light Source Type: Artificial Light, Light Intensity: Moderate, Color Temperature: Warm Tone, Light Changes in Time: Dynamic Light (Intensity Changing Light), Optical Phenomena: None.\t",
+ "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards\"",
+ true,
+ true,
+ "gpu"
+ ],
+ "color": "#332922",
+ "bgcolor": "#593930"
+ },
+ {
+ "id": 123,
+ "type": "VHS_LoadVideo",
+ "pos": [
+ -1379.3857312668438,
+ 568.6701788113254
+ ],
+ "size": [
+ 247.455078125,
+ 455.2626221778634
+ ],
+ "flags": {},
+ "order": 10,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "meta_batch",
+ "shape": 7,
+ "type": "VHS_BatchManager",
+ "link": null
+ },
+ {
+ "name": "vae",
+ "shape": 7,
+ "type": "VAE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "IMAGE",
+ "type": "IMAGE",
+ "links": [
+ 206
+ ]
+ },
+ {
+ "name": "frame_count",
+ "type": "INT",
+ "links": []
+ },
+ {
+ "name": "audio",
+ "type": "AUDIO",
+ "links": null
+ },
+ {
+ "name": "video_info",
+ "type": "VHS_VIDEOINFO",
+ "links": null
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-videohelpersuite",
+ "ver": "0edce8ef7ce173ac97a3ed3d6f4636029d1a4530",
+ "Node name for S&R": "VHS_LoadVideo"
+ },
+ "widgets_values": {
+ "video": "bg_video_5 (1).mp4",
+ "force_rate": 0,
+ "custom_width": 0,
+ "custom_height": 0,
+ "frame_load_cap": 0,
+ "skip_first_frames": 0,
+ "select_every_nth": 1,
+ "format": "AnimateDiff",
+ "choose video to upload": "image",
+ "videopreview": {
+ "hidden": false,
+ "paused": false,
+ "params": {
+ "filename": "bg_video_5 (1).mp4",
+ "type": "input",
+ "format": "video/mp4",
+ "force_rate": 0,
+ "custom_width": 0,
+ "custom_height": 0,
+ "frame_load_cap": 0,
+ "skip_first_frames": 0,
+ "select_every_nth": 1
+ }
+ }
+ }
+ },
+ {
+ "id": 30,
+ "type": "VHS_VideoCombine",
+ "pos": [
+ 1897.1763350423657,
+ -703.6082482669302
+ ],
+ "size": [
+ 1972.1238276510217,
+ 1098.8168567888545
+ ],
+ "flags": {},
+ "order": 27,
+ "mode": 0,
+ "inputs": [
+ {
+ "name": "images",
+ "type": "IMAGE",
+ "link": 264
+ },
+ {
+ "name": "audio",
+ "shape": 7,
+ "type": "AUDIO",
+ "link": null
+ },
+ {
+ "name": "meta_batch",
+ "shape": 7,
+ "type": "VHS_BatchManager",
+ "link": null
+ },
+ {
+ "name": "vae",
+ "shape": 7,
+ "type": "VAE",
+ "link": null
+ }
+ ],
+ "outputs": [
+ {
+ "name": "Filenames",
+ "type": "VHS_FILENAMES",
+ "links": null
+ }
+ ],
+ "properties": {
+ "cnr_id": "comfyui-videohelpersuite",
+ "ver": "0a75c7958fe320efcb052f1d9f8451fd20c730a8",
+ "Node name for S&R": "VHS_VideoCombine"
+ },
+ "widgets_values": {
+ "frame_rate": 16,
+ "loop_count": 0,
+ "filename_prefix": "WanVideoWrapper_UniLumos",
+ "format": "video/h264-mp4",
+ "pix_fmt": "yuv420p",
+ "crf": 19,
+ "save_metadata": true,
+ "trim_to_audio": false,
+ "pingpong": false,
+ "save_output": false,
+ "videopreview": {
+ "hidden": false,
+ "paused": false,
+ "params": {
+ "filename": "WanVideoWrapper_UniLumos_00023.mp4",
+ "subfolder": "",
+ "type": "temp",
+ "format": "video/h264-mp4",
+ "frame_rate": 16,
+ "workflow": "WanVideoWrapper_UniLumos_00023.png",
+ "fullpath": "N:\\AI\\ComfyUI\\temp\\WanVideoWrapper_UniLumos_00023.mp4"
+ }
+ }
+ }
+ }
+ ],
+ "links": [
+ [
+ 162,
+ 11,
+ 0,
+ 104,
+ 0,
+ "WANTEXTENCODER"
+ ],
+ [
+ 163,
+ 22,
+ 0,
+ 104,
+ 1,
+ "WANVIDEOMODEL"
+ ],
+ [
+ 166,
+ 104,
+ 0,
+ 27,
+ 2,
+ "WANVIDEOTEXTEMBEDS"
+ ],
+ [
+ 196,
+ 27,
+ 0,
+ 121,
+ 1,
+ "LATENT"
+ ],
+ [
+ 206,
+ 123,
+ 0,
+ 116,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 214,
+ 121,
+ 0,
+ 126,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 218,
+ 129,
+ 0,
+ 27,
+ 1,
+ "WANVIDIMAGE_EMBEDS"
+ ],
+ [
+ 220,
+ 131,
+ 0,
+ 132,
+ 0,
+ "REMBG_SESSION"
+ ],
+ [
+ 231,
+ 116,
+ 1,
+ 137,
+ 2,
+ "INT"
+ ],
+ [
+ 232,
+ 116,
+ 2,
+ 137,
+ 3,
+ "INT"
+ ],
+ [
+ 239,
+ 140,
+ 0,
+ 130,
+ 0,
+ "WANVAE"
+ ],
+ [
+ 240,
+ 140,
+ 0,
+ 138,
+ 0,
+ "WANVAE"
+ ],
+ [
+ 241,
+ 140,
+ 0,
+ 121,
+ 0,
+ "WANVAE"
+ ],
+ [
+ 247,
+ 130,
+ 0,
+ 129,
+ 1,
+ "LATENT"
+ ],
+ [
+ 248,
+ 132,
+ 1,
+ 143,
+ 0,
+ "MASK"
+ ],
+ [
+ 249,
+ 143,
+ 0,
+ 142,
+ 1,
+ "MASK"
+ ],
+ [
+ 250,
+ 141,
+ 0,
+ 137,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 251,
+ 137,
+ 0,
+ 132,
+ 1,
+ "IMAGE"
+ ],
+ [
+ 252,
+ 137,
+ 0,
+ 142,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 253,
+ 138,
+ 0,
+ 129,
+ 0,
+ "LATENT"
+ ],
+ [
+ 257,
+ 142,
+ 0,
+ 138,
+ 1,
+ "IMAGE"
+ ],
+ [
+ 258,
+ 22,
+ 0,
+ 145,
+ 0,
+ "WANVIDEOMODEL"
+ ],
+ [
+ 259,
+ 145,
+ 0,
+ 27,
+ 0,
+ "WANVIDEOMODEL"
+ ],
+ [
+ 260,
+ 146,
+ 0,
+ 145,
+ 1,
+ "WANVIDLORA"
+ ],
+ [
+ 264,
+ 117,
+ 0,
+ 30,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 268,
+ 116,
+ 0,
+ 148,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 269,
+ 148,
+ 0,
+ 130,
+ 1,
+ "IMAGE"
+ ],
+ [
+ 270,
+ 137,
+ 1,
+ 129,
+ 2,
+ "INT"
+ ],
+ [
+ 271,
+ 137,
+ 2,
+ 129,
+ 3,
+ "INT"
+ ],
+ [
+ 272,
+ 141,
+ 1,
+ 129,
+ 4,
+ "INT"
+ ],
+ [
+ 275,
+ 143,
+ 0,
+ 148,
+ 1,
+ "MASK"
+ ],
+ [
+ 276,
+ 137,
+ 0,
+ 150,
+ 0,
+ "IMAGE"
+ ],
+ [
+ 277,
+ 148,
+ 0,
+ 150,
+ 1,
+ "IMAGE"
+ ],
+ [
+ 278,
+ 150,
+ 0,
+ 117,
+ 1,
+ "IMAGE"
+ ],
+ [
+ 279,
+ 126,
+ 0,
+ 117,
+ 0,
+ "IMAGE"
+ ]
+ ],
+ "groups": [
+ {
+ "id": 1,
+ "title": "Background",
+ "bounding": [
+ -1550.1677854575216,
+ 478.44687498202524,
+ 1932.592752464471,
+ 648.3538858901612
+ ],
+ "color": "#3f789e",
+ "font_size": 24,
+ "flags": {}
+ },
+ {
+ "id": 2,
+ "title": "Foreground",
+ "bounding": [
+ -1557.3622468309832,
+ -297.3440571225005,
+ 1954.1756974764817,
+ 719.0982517735347
+ ],
+ "color": "#3f789e",
+ "font_size": 24,
+ "flags": {}
+ }
+ ],
+ "config": {},
+ "extra": {
+ "ds": {
+ "scale": 0.5054470284993371,
+ "offset": [
+ 534.93840363206,
+ 1416.8404480907056
+ ]
+ },
+ "frontendVersion": "1.32.0",
+ "node_versions": {
+ "ComfyUI-WanVideoWrapper": "5a2383621a05825d0d0437781afcb8552d9590fd",
+ "comfy-core": "0.3.26",
+ "ComfyUI-KJNodes": "a5bd3c86c8ed6b83c55c2d0e7a59515b15a0137f",
+ "ComfyUI-VideoHelperSuite": "0a75c7958fe320efcb052f1d9f8451fd20c730a8"
+ },
+ "VHS_latentpreview": true,
+ "VHS_latentpreviewrate": 0,
+ "VHS_MetadataImage": true,
+ "VHS_KeepIntermediate": true
+ },
+ "version": 0.4
+}
\ No newline at end of file
diff --git a/nodes.py b/nodes.py
index c6fbd55..0a611e9 100644
--- a/nodes.py
+++ b/nodes.py
@@ -1246,6 +1246,46 @@ class WanVideoAnimateEmbeds:
}
return (image_embeds,)
+
+# region UniLumos
+class WanVideoUniLumosEmbeds:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {"required": {
+ "width": ("INT", {"default": 832, "min": 64, "max": 8096, "step": 8, "tooltip": "Width of the image to encode"}),
+ "height": ("INT", {"default": 480, "min": 64, "max": 8096, "step": 8, "tooltip": "Height of the image to encode"}),
+ "num_frames": ("INT", {"default": 81, "min": 1, "max": 10000, "step": 4, "tooltip": "Number of frames to encode"}),
+ },
+ "optional": {
+ "foreground_latents": ("LATENT", {"tooltip": "Video foreground latents"}),
+ "background_latents": ("LATENT", {"tooltip": "Video background latents"}),
+ }
+ }
+
+ RETURN_TYPES = ("WANVIDIMAGE_EMBEDS", )
+ RETURN_NAMES = ("image_embeds",)
+ FUNCTION = "process"
+ CATEGORY = "WanVideoWrapper"
+
+ def process(self, num_frames, width, height, foreground_latents=None, background_latents=None):
+ target_shape = (16, (num_frames - 1) // VAE_STRIDE[0] + 1,
+ height // VAE_STRIDE[1],
+ width // VAE_STRIDE[2])
+
+ embeds = {
+ "target_shape": target_shape,
+ "num_frames": num_frames,
+ }
+ if foreground_latents is not None:
+ embeds["foreground_latents"] = foreground_latents["samples"][0]
+ else:
+ embeds["foreground_latents"] = torch.zeros(target_shape[0], target_shape[1], target_shape[2], target_shape[3], device=torch.device("cpu"), dtype=torch.float32)
+ if background_latents is not None:
+ embeds["background_latents"] = background_latents["samples"][0]
+ else:
+ embeds["background_latents"] = torch.zeros(target_shape[0], target_shape[1], target_shape[2], target_shape[3], device=torch.device("cpu"), dtype=torch.float32)
+
+ return (embeds,)
class WanVideoEmptyEmbeds:
@classmethod
@@ -2296,6 +2336,7 @@ NODE_CLASS_MAPPINGS = {
"WanVideoSchedulerSA_ODE": WanVideoSchedulerSA_ODE,
"WanVideoAddBindweaveEmbeds": WanVideoAddBindweaveEmbeds,
"TextImageEncodeQwenVL": TextImageEncodeQwenVL,
+ "WanVideoUniLumosEmbeds": WanVideoUniLumosEmbeds,
}
NODE_DISPLAY_NAME_MAPPINGS = {
@@ -2336,4 +2377,5 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"WanVideoAddLucyEditLatents": "WanVideo Add LucyEdit Latents",
"WanVideoSchedulerSA_ODE": "WanVideo Scheduler SA-ODE",
"WanVideoAddBindweaveEmbeds": "WanVideo Add Bindweave Embeds",
+ "WanVideoUniLumosEmbeds": "WanVideo UniLumos Embeds",
}
diff --git a/nodes_model_loading.py b/nodes_model_loading.py
index 88e4958..97ae7f9 100644
--- a/nodes_model_loading.py
+++ b/nodes_model_loading.py
@@ -1088,6 +1088,14 @@ class WanVideoModelLoader:
sd, reader = load_gguf(model_path)
gguf_reader.append(reader)
+ # Ovi
+ extra_audio_model = False
+ if any(key.startswith("video_model.") for key in sd.keys()):
+ sd = {key.replace("video_model.", "", 1).replace("modulation.modulation", "modulation"): value for key, value in sd.items()}
+ if any(key.startswith("audio_model.") for key in sd.keys()) and any(key.startswith("blocks.") for key in sd.keys()):
+ extra_audio_model = True
+
+
is_wananimate = "pose_patch_embedding.weight" in sd
# rename WanAnimate face fuser block keys to insert into main blocks instead
if is_wananimate:
@@ -1140,7 +1148,6 @@ class WanVideoModelLoader:
raise ValueError("You are attempting to load a VACE module as a WanVideo model, instead you should use the vace_model input and matching T2V base model")
# currently this can be VACE, MTV-Crafter, Lynx or Ovi-audio weights
- extra_audio_model = False
if extra_model is not None:
for _model in extra_model:
print("Loading extra model: ", _model["path"])
diff --git a/nodes_sampler.py b/nodes_sampler.py
index be138aa..b8cd80a 100644
--- a/nodes_sampler.py
+++ b/nodes_sampler.py
@@ -1141,6 +1141,16 @@ class WanVideoSampler:
lynx_embeds["ref_buffer_uncond"] = lynx_ref_buffer_uncond if not math.isclose(cfg[0], 1.0) else None
mm.soft_empty_cache()
+ # UniLumos
+ foreground_latents = image_embeds.get("foreground_latents", None)
+ if foreground_latents is not None:
+ log.info(f"UniLumos foreground latent input shape: {foreground_latents.shape}")
+ foreground_latents = foreground_latents.to(device, dtype)
+ background_latents = image_embeds.get("background_latents", None)
+ if background_latents is not None:
+ log.info(f"UniLumos background latent input shape: {background_latents.shape}")
+ background_latents = background_latents.to(device, dtype)
+
#region model pred
def predict_with_cfg(z, cfg_scale, positive_embeds, negative_embeds, timestep, idx, image_cond=None, clip_fea=None,
control_latents=None, vace_data=None, unianim_data=None, audio_proj=None, control_camera_latents=None,
@@ -1361,6 +1371,9 @@ class WanVideoSampler:
else:
self.noise_front_pad_num = 0
+ if background_latents is not None or foreground_latents is not None:
+ z = torch.cat([z, foreground_latents.to(z), background_latents.to(z)], dim=0)
+
base_params = {
'x': [z], # latent
'y': [image_cond_input] if image_cond_input is not None else None, # image cond
diff --git a/nodes_utility.py b/nodes_utility.py
index 14f9090..55cd9ce 100644
--- a/nodes_utility.py
+++ b/nodes_utility.py
@@ -1,6 +1,8 @@
import torch
import numpy as np
from comfy.utils import common_upscale
+from comfy import model_management
+from tqdm import tqdm
from .utils import log
from einops import rearrange
@@ -12,6 +14,9 @@ except:
VAE_STRIDE = (4, 8, 8)
PATCH_SIZE = (1, 2, 2)
+main_device = model_management.get_torch_device()
+offload_device = model_management.unet_offload_device()
+
class WanVideoImageResizeToClosest:
@classmethod
def INPUT_TYPES(s):
@@ -660,6 +665,96 @@ class FaceMaskFromPoseKeypoints:
cv2.fillPoly(canvas, pts=[outer_contour], color=part_color)
return canvas
+
+
+class DrawGaussianNoiseOnImage:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {"required": {
+ "image": ("IMAGE", ),
+ "mask": ("MASK", ),
+ },
+ "optional": {
+ "device": (["cpu", "gpu"], {"default": "cpu", "tooltip": "Device to use for processing"}),
+ "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
+ }
+ }
+
+ RETURN_TYPES = ("IMAGE", )
+ RETURN_NAMES = ("images",)
+ FUNCTION = "apply"
+ CATEGORY = "KJNodes/masking"
+ DESCRIPTION = "Fills the background (masked area) with Gaussian noise sampled using the mean and variance of the subject (unmasked) region."
+
+ def apply(self, image, mask, device="cpu", seed=0):
+ B, H, W, C = image.shape
+ BM, HM, WM = mask.shape
+
+ processing_device = main_device if device == "gpu" else torch.device("cpu")
+
+ in_masks = mask.clone().to(processing_device)
+ in_images = image.clone().to(processing_device)
+
+ # Resize mask to match image dimensions
+ if HM != H or WM != W:
+ in_masks = F.interpolate(mask.unsqueeze(1), size=(H, W), mode='nearest-exact').squeeze(1)
+
+ # Match batch sizes
+ if B > BM:
+ in_masks = in_masks.repeat((B + BM - 1) // BM, 1, 1)[:B]
+ elif BM > B:
+ in_masks = in_masks[:B]
+
+ output_images = []
+
+ # Set random seed for reproducibility
+ generator = torch.Generator(device=processing_device).manual_seed(seed)
+
+ for i in tqdm(range(B), desc="DrawGaussianNoiseOnImage batch"):
+ curr_mask = in_masks[i]
+ img_idx = min(i, B - 1)
+ curr_image = in_images[img_idx]
+
+ # Expand mask to 3 channels
+ mask_expanded = curr_mask.unsqueeze(-1).expand(-1, -1, 3)
+
+ # Calculate mean and std per channel from the subject region (where mask is 1)
+ subject_mask = mask_expanded > 0.5
+
+ # Initialize noise tensor
+ noise = torch.zeros_like(curr_image)
+
+ for c in range(C):
+ channel = curr_image[:, :, c]
+ channel_mask = subject_mask[:, :, c]
+
+ if channel_mask.sum() > 0:
+ # Get subject pixels
+ subject_pixels = channel[channel_mask]
+
+ # Calculate statistics
+ mean = subject_pixels.mean()
+ std = subject_pixels.std()
+
+ # Generate Gaussian noise for this channel
+ noise[:, :, c] = torch.normal(mean=mean.item(), std=std.item(),
+ size=(H, W), generator=generator,
+ device=processing_device)
+
+ # Clamp noise to valid range
+ noise = torch.clamp(noise, 0.0, 1.0)
+
+ # Apply: keep subject, fill background with noise
+ masked_image = curr_image * mask_expanded + noise * (1 - mask_expanded)
+ output_images.append(masked_image)
+
+ # If no masks were processed, return empty tensor
+ if not output_images:
+ return (torch.zeros((0, H, W, 3), dtype=image.dtype),)
+
+ out_rgb = torch.stack(output_images, dim=0).cpu()
+
+ return (out_rgb, )
NODE_CLASS_MAPPINGS = {
"WanVideoImageResizeToClosest": WanVideoImageResizeToClosest,
@@ -673,6 +768,7 @@ NODE_CLASS_MAPPINGS = {
"NormalizeAudioLoudness": NormalizeAudioLoudness,
"WanVideoPassImagesFromSamples": WanVideoPassImagesFromSamples,
"FaceMaskFromPoseKeypoints": FaceMaskFromPoseKeypoints,
+ "DrawGaussianNoiseOnImage": DrawGaussianNoiseOnImage,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"WanVideoImageResizeToClosest": "WanVideo Image Resize To Closest",
@@ -686,4 +782,5 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"NormalizeAudioLoudness": "Normalize Audio Loudness",
"WanVideoPassImagesFromSamples": "WanVideo Pass Images From Samples",
"FaceMaskFromPoseKeypoints": "Face Mask From Pose Keypoints",
+ "DrawGaussianNoiseOnImage": "Draw Gaussian Noise On Image",
}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 507faf6..4bcd4d7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[project]
name = "ComfyUI-WanVideoWrapper"
description = "ComfyUI wrapper nodes for WanVideo"
-version = "1.3.8"
+version = "1.3.9"
license = {file = "LICENSE"}
dependencies = ["accelerate >= 1.2.1", "diffusers >= 0.33.0", "peft >= 0.17.0", "ftfy", "gguf >= 0.17.1", "pyloudnorm"]