diff --git a/scripts/convert_hunyuan_video1_5_to_diffusers.py b/scripts/convert_hunyuan_video1_5_to_diffusers.py
index c5f9515c6b..7546a909df 100644
--- a/scripts/convert_hunyuan_video1_5_to_diffusers.py
+++ b/scripts/convert_hunyuan_video1_5_to_diffusers.py
@@ -31,24 +31,6 @@ import argparse
 import os
 
 TRANSFORMER_CONFIGS = {
-    "480p_i2v": {
-        "in_channels": 65,
-        "out_channels": 32,
-        "num_attention_heads": 16,
-        "attention_head_dim": 128,
-        "num_layers": 54,
-        "num_refiner_layers": 2,
-        "mlp_ratio": 4.0,
-        "patch_size": 1,
-        "patch_size_t": 1,
-        "qk_norm": "rms_norm",
-        "text_embed_dim": 3584,
-        "text_embed_2_dim": 1472,
-        "image_embed_dim": 1152,
-        "rope_theta": 256.0,
-        "rope_axes_dim": (16, 56, 56),
-        "use_meanflow": False,
-    },
     "480p_t2v": {
         "in_channels": 65,
         "out_channels": 32,
@@ -66,29 +48,128 @@ TRANSFORMER_CONFIGS = {
         "rope_theta": 256.0,
         "rope_axes_dim": (16, 56, 56),
         "use_meanflow": False,
+        "target_size": 640,
+        "task_type": "t2v",
     },
+    "480p_i2v": {
+        "in_channels": 65,
+        "out_channels": 32,
+        "num_attention_heads": 16,
+        "attention_head_dim": 128,
+        "num_layers": 54,
+        "num_refiner_layers": 2,
+        "mlp_ratio": 4.0,
+        "patch_size": 1,
+        "patch_size_t": 1,
+        "qk_norm": "rms_norm",
+        "text_embed_dim": 3584,
+        "text_embed_2_dim": 1472,
+        "image_embed_dim": 1152,
+        "rope_theta": 256.0,
+        "rope_axes_dim": (16, 56, 56),
+        "use_meanflow": False,
+        "target_size": 640,
+        "task_type": "i2v",
+    },
+    "720p_t2v": {
+        "in_channels": 65,
+        "out_channels": 32,
+        "num_attention_heads": 16,
+        "attention_head_dim": 128,
+        "num_layers": 54,
+        "num_refiner_layers": 2,
+        "mlp_ratio": 4.0,
+        "patch_size": 1,
+        "patch_size_t": 1,
+        "qk_norm": "rms_norm",
+        "text_embed_dim": 3584,
+        "text_embed_2_dim": 1472,
+        "image_embed_dim": 1152,
+        "rope_theta": 256.0,
+        "rope_axes_dim": (16, 56, 56),
+        "use_meanflow": False,
+        "target_size": 960,
+        "task_type": "t2v",
+    },
+    "720p_i2v": {},
+    "480p_t2v_distilled": {
+        "in_channels": 65,
+        "out_channels": 32,
+        "num_attention_heads": 16,
+        "attention_head_dim": 128,
+        "num_layers": 54,
+        "num_refiner_layers": 2,
+        "mlp_ratio": 4.0,
+        "patch_size": 1,
+        "patch_size_t": 1,
+        "qk_norm": "rms_norm",
+        "text_embed_dim": 3584,
+        "text_embed_2_dim": 1472,
+        "image_embed_dim": 1152,
+        "rope_theta": 256.0,
+        "rope_axes_dim": (16, 56, 56),
+        "use_meanflow": False,
+        "target_size": 640,
+        "task_type": "t2v",
+    },
+    "480p_i2v_distilled": {},
+    "720p_t2v_distilled": {},
+    "720p_i2v_distilled": {},
 }
 
 SCHEDULER_CONFIGS = {
+    "480p_t2v": {
+        "shift": 5.0,
+    },
     "480p_i2v": {
         "shift": 5.0,
     },
-    "480p_t2v": {
+    "720p_t2v": {
+        "shift": 9.0,
+    },
+    "720p_i2v": {
+        "shift": 7.0,
+    },
+    "480p_t2v_distilled": {
         "shift": 5.0,
     },
+    "480p_i2v_distilled": {
+        "shift": 5.0,
+    },
+    "720p_t2v_distilled": {
+        "shift": 9.0,
+    },
+    "720p_i2v_distilled": {
+        "shift": 7.0,
+    },
 }
 
 GUIDANCE_CONFIGS = {
-    "480p_i2v": {
-        "guidance_scale": 6.0,
-        "embedded_guidance_scale": None,
-    },
     "480p_t2v": {
         "guidance_scale": 6.0,
-        "embedded_guidance_scale": None,
     },
-
-    }
+    "480p_i2v": {
+        "guidance_scale": 6.0,
+    },
+    "720p_t2v": {
+        "guidance_scale": 6.0,
+    },
+    "720p_i2v": {
+        "guidance_scale": 6.0,
+    },
+    "480p_t2v_distilled": {
+        "guidance_scale": 1.0,
+    },
+    "480p_i2v_distilled": {
+        "guidance_scale": 1.0,
+    },
+    "720p_t2v_distilled": {
+        "guidance_scale": 1.0,
+    },
+    "720p_i2v_distilled": {
+        "guidance_scale": 1.0,
+    },
+}
 
 def swap_scale_shift(weight):
     shift, scale = weight.chunk(2, dim=0)