diff --git a/examples/cogvideo/test_cogvideox_lora.py b/examples/cogvideo/test_cogvideox_lora.py index 7ac702735e..c2d3982d48 100644 --- a/examples/cogvideo/test_cogvideox_lora.py +++ b/examples/cogvideo/test_cogvideox_lora.py @@ -18,10 +18,10 @@ import shutil import sys import tempfile -from PIL import Image +import pytest +from huggingface_hub import snapshot_download from diffusers import CogVideoXTransformer3DModel, DiffusionPipeline -from diffusers.utils import export_to_video sys.path.append("..") @@ -36,41 +36,36 @@ logger.addHandler(stream_handler) class CogVideoXLoRA(ExamplesTestsAccelerate): + dataset_name = "hf-internal-testing/tiny-video-dataset" instance_data_dir = "videos/" - caption_column = "prompts.txt" + caption_column = "captions.txt" video_column = "videos.txt" - video_filename = "00001.mp4" - instance_prompt = "A panda playing a guitar" + instance_prompt = "A hiker standing at the peak of mountain" + max_num_frames = 9 pretrained_model_name_or_path = "hf-internal-testing/tiny-cogvideox-pipe" script_path = "examples/cogvideo/train_cogvideox_lora.py" - def prepare_dummy_inputs(self, instance_data_root: str, num_frames: int = 8): - caption = "A panda playing a guitar" + dataset_path = None - # We create a longer video to also verify if the max_num_frames parameter is working correctly - video = [Image.new("RGB", (32, 32), color=0)] * (num_frames * 2) + @pytest.fixture(scope="class", autouse=True) + def prepare_dummy_inputs(self, request): + tmpdir = tempfile.mkdtemp() - print(os.path.join(instance_data_root, self.caption_column)) - with open(os.path.join(instance_data_root, self.caption_column), "w") as file: - file.write(caption) + try: + if request.cls.dataset_path is None: + request.cls.dataset_path = snapshot_download(self.dataset_name, repo_type="dataset", cache_dir=tmpdir) - with open(os.path.join(instance_data_root, self.video_column), "w") as file: - file.write(f"{self.instance_data_dir}/{self.video_filename}") - - video_dir = os.path.join(instance_data_root, self.instance_data_dir) - os.makedirs(video_dir, exist_ok=True) - export_to_video(video, os.path.join(video_dir, self.video_filename), fps=8) + yield + finally: + shutil.rmtree(tmpdir) def test_lora(self): with tempfile.TemporaryDirectory() as tmpdir: - max_num_frames = 9 - self.prepare_dummy_inputs(tmpdir, num_frames=max_num_frames) - test_args = f""" {self.script_path} --pretrained_model_name_or_path {self.pretrained_model_name_or_path} - --instance_data_root {tmpdir} + --instance_data_root {self.dataset_path} --caption_column {self.caption_column} --video_column {self.video_column} --rank 1 @@ -79,7 +74,7 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): --height 32 --width 32 --fps 8 - --max_num_frames {max_num_frames} + --max_num_frames {self.max_num_frames} --train_batch_size 1 --gradient_accumulation_steps 1 --max_train_steps 2 @@ -99,13 +94,10 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): # max_train_steps == 4, checkpointing_steps == 2 # Should create checkpoints at steps 2, 4 - max_num_frames = 9 - self.prepare_dummy_inputs(tmpdir, num_frames=max_num_frames) - initial_run_args = f""" {self.script_path} --pretrained_model_name_or_path {self.pretrained_model_name_or_path} - --instance_data_root {tmpdir} + --instance_data_root {self.dataset_path} --caption_column {self.caption_column} --video_column {self.video_column} --rank 1 @@ -114,7 +106,7 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): --height 32 --width 32 --fps 8 - --max_num_frames 9 + --max_num_frames {self.max_num_frames} --train_batch_size 1 --gradient_accumulation_steps 1 --learning_rate 1e-3 @@ -164,7 +156,7 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): resume_run_args = f""" {self.script_path} --pretrained_model_name_or_path {self.pretrained_model_name_or_path} - --instance_data_root {tmpdir} + --instance_data_root {self.dataset_path} --caption_column {self.caption_column} --video_column {self.video_column} --rank 1 @@ -173,7 +165,7 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): --height 32 --width 32 --fps 8 - --max_num_frames 9 + --max_num_frames {self.max_num_frames} --train_batch_size 1 --gradient_accumulation_steps 1 --learning_rate 1e-3 @@ -207,13 +199,10 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): def test_lora_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: - max_num_frames = 9 - self.prepare_dummy_inputs(tmpdir, num_frames=max_num_frames) - test_args = f""" {self.script_path} --pretrained_model_name_or_path {self.pretrained_model_name_or_path} - --instance_data_root {tmpdir} + --instance_data_root {self.dataset_path} --caption_column {self.caption_column} --video_column {self.video_column} --rank 1 @@ -222,7 +211,7 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): --height 32 --width 32 --fps 8 - --max_num_frames 9 + --max_num_frames {self.max_num_frames} --train_batch_size 1 --gradient_accumulation_steps 1 --learning_rate 1e-3 @@ -244,13 +233,10 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): def test_lora_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): with tempfile.TemporaryDirectory() as tmpdir: - max_num_frames = 9 - self.prepare_dummy_inputs(tmpdir, num_frames=max_num_frames) - test_args = f""" {self.script_path} --pretrained_model_name_or_path {self.pretrained_model_name_or_path} - --instance_data_root {tmpdir} + --instance_data_root {self.dataset_path} --caption_column {self.caption_column} --video_column {self.video_column} --rank 1 @@ -259,7 +245,7 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): --height 32 --width 32 --fps 8 - --max_num_frames 9 + --max_num_frames {self.max_num_frames} --train_batch_size 1 --gradient_accumulation_steps 1 --learning_rate 1e-3 @@ -281,7 +267,7 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): resume_run_args = f""" {self.script_path} --pretrained_model_name_or_path {self.pretrained_model_name_or_path} - --instance_data_root {tmpdir} + --instance_data_root {self.dataset_path} --caption_column {self.caption_column} --video_column {self.video_column} --rank 1 @@ -290,7 +276,7 @@ class CogVideoXLoRA(ExamplesTestsAccelerate): --height 32 --width 32 --fps 8 - --max_num_frames 9 + --max_num_frames {self.max_num_frames} --train_batch_size 1 --gradient_accumulation_steps 1 --learning_rate 1e-3