From 5fbb4d32d50d8aa411b06fb6b2b08203ddc54b0b Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Thu, 25 Jul 2024 16:00:43 +0530 Subject: [PATCH] [CI] Slow Test Updates (#8870) * update * update * update --- .github/workflows/nightly_tests.yml | 101 +++--------------- .github/workflows/push_tests.yml | 72 ++----------- docker/diffusers-onnxruntime-cuda/Dockerfile | 1 + .../diffusers-pytorch-compile-cuda/Dockerfile | 1 + docker/diffusers-pytorch-cuda/Dockerfile | 1 + .../Dockerfile | 1 + {scripts => utils}/log_reports.py | 0 7 files changed, 29 insertions(+), 148 deletions(-) rename {scripts => utils}/log_reports.py (100%) diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml index 3862cfc7d7..986dd83527 100644 --- a/.github/workflows/nightly_tests.yml +++ b/.github/workflows/nightly_tests.yml @@ -7,7 +7,7 @@ on: env: DIFFUSERS_IS_CI: yes - HF_HOME: /mnt/cache + HF_HUB_ENABLE_HF_TRANSFER: 1 OMP_NUM_THREADS: 8 MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 600 @@ -27,10 +27,6 @@ jobs: uses: actions/checkout@v3 with: fetch-depth: 2 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.8" - name: Install dependencies run: | pip install -e . @@ -50,16 +46,17 @@ jobs: path: reports run_nightly_tests_for_torch_pipelines: - name: Torch Pipelines CUDA Nightly Tests + name: Nightly Torch Pipelines CUDA Tests needs: setup_torch_cuda_pipeline_matrix strategy: fail-fast: false + max-parallel: 8 matrix: module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }} runs-on: [single-gpu, nvidia-gpu, t4, ci] container: image: diffusers/diffusers-pytorch-cuda - options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 + options: --shm-size "16gb" --ipc host --gpus 0 steps: - name: Checkout diffusers uses: actions/checkout@v3 @@ -67,19 +64,16 @@ jobs: fetch-depth: 2 - name: NVIDIA-SMI run: nvidia-smi - - name: Install dependencies run: | python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" python -m uv pip install -e [quality,test] python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install pytest-reportlog - - name: Environment run: | python utils/print_env.py - - - name: Nightly PyTorch CUDA checkpoint (pipelines) tests + - name: Pipeline CUDA Test env: HF_TOKEN: ${{ secrets.HF_TOKEN }} # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms @@ -90,38 +84,36 @@ jobs: --make-reports=tests_pipeline_${{ matrix.module }}_cuda \ --report-log=tests_pipeline_${{ matrix.module }}_cuda.log \ tests/pipelines/${{ matrix.module }} - - name: Failure short reports if: ${{ failure() }} run: | cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt - - name: Test suite reports artifacts if: ${{ always() }} uses: actions/upload-artifact@v2 with: name: pipeline_${{ matrix.module }}_test_reports path: reports - - name: Generate Report and Notify Channel if: always() run: | pip install slack_sdk tabulate - python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_nightly_tests_for_other_torch_modules: - name: Torch Non-Pipelines CUDA Nightly Tests + name: Nightly Torch CUDA Tests runs-on: [single-gpu, nvidia-gpu, t4, ci] container: image: diffusers/diffusers-pytorch-cuda - options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 + options: --shm-size "16gb" --ipc host --gpus 0 defaults: run: shell: bash strategy: matrix: - module: [models, schedulers, others, examples] + max-parallel: 2 + module: [models, schedulers, lora, others, single_file, examples] steps: - name: Checkout diffusers uses: actions/checkout@v3 @@ -133,8 +125,8 @@ jobs: python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" python -m uv pip install -e [quality,test] python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git + python -m uv pip install peft@git+https://github.com/huggingface/peft.git python -m uv pip install pytest-reportlog - - name: Environment run: python utils/print_env.py @@ -158,7 +150,6 @@ jobs: # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms CUBLAS_WORKSPACE_CONFIG: :16:8 run: | - python -m uv pip install peft@git+https://github.com/huggingface/peft.git python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ -s -v --make-reports=examples_torch_cuda \ --report-log=examples_torch_cuda.log \ @@ -181,64 +172,7 @@ jobs: if: always() run: | pip install slack_sdk tabulate - python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY - - run_lora_nightly_tests: - name: Nightly LoRA Tests with PEFT and TORCH - runs-on: [single-gpu, nvidia-gpu, t4, ci] - container: - image: diffusers/diffusers-pytorch-cuda - options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 - defaults: - run: - shell: bash - steps: - - name: Checkout diffusers - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - - name: Install dependencies - run: | - python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" - python -m uv pip install -e [quality,test] - python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git - python -m uv pip install peft@git+https://github.com/huggingface/peft.git - python -m uv pip install pytest-reportlog - - - name: Environment - run: python utils/print_env.py - - - name: Run nightly LoRA tests with PEFT and Torch - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms - CUBLAS_WORKSPACE_CONFIG: :16:8 - run: | - python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ - -s -v -k "not Flax and not Onnx" \ - --make-reports=tests_torch_lora_cuda \ - --report-log=tests_torch_lora_cuda.log \ - tests/lora - - - name: Failure short reports - if: ${{ failure() }} - run: | - cat reports/tests_torch_lora_cuda_stats.txt - cat reports/tests_torch_lora_cuda_failures_short.txt - - - name: Test suite reports artifacts - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: torch_lora_cuda_test_reports - path: reports - - - name: Generate Report and Notify Channel - if: always() - run: | - pip install slack_sdk tabulate - python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_flax_tpu_tests: name: Nightly Flax TPU Tests @@ -294,14 +228,14 @@ jobs: if: always() run: | pip install slack_sdk tabulate - python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_nightly_onnx_tests: name: Nightly ONNXRuntime CUDA tests on Ubuntu runs-on: [single-gpu, nvidia-gpu, t4, ci] container: image: diffusers/diffusers-onnxruntime-cuda - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ + options: --gpus 0 --shm-size "16gb" --ipc host steps: - name: Checkout diffusers @@ -318,11 +252,10 @@ jobs: python -m uv pip install -e [quality,test] python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git python -m uv pip install pytest-reportlog - - name: Environment run: python utils/print_env.py - - name: Run nightly ONNXRuntime CUDA tests + - name: Run Nightly ONNXRuntime CUDA tests env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | @@ -349,7 +282,7 @@ jobs: if: always() run: | pip install slack_sdk tabulate - python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + python utils/log_reports.py >> $GITHUB_STEP_SUMMARY run_nightly_tests_apple_m1: name: Nightly PyTorch MPS tests on MacOS @@ -411,4 +344,4 @@ jobs: if: always() run: | pip install slack_sdk tabulate - python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY + python utils/log_reports.py >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index 19deecb2be..e12ead72f9 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -11,11 +11,9 @@ on: env: DIFFUSERS_IS_CI: yes - HF_HOME: /mnt/cache OMP_NUM_THREADS: 8 MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 600 - RUN_SLOW: yes PIPELINE_USAGE_CUTOFF: 50000 jobs: @@ -52,7 +50,7 @@ jobs: path: reports torch_pipelines_cuda_tests: - name: Torch Pipelines CUDA Slow Tests + name: Torch Pipelines CUDA Tests needs: setup_torch_cuda_pipeline_matrix strategy: fail-fast: false @@ -62,7 +60,7 @@ jobs: runs-on: [single-gpu, nvidia-gpu, t4, ci] container: image: diffusers/diffusers-pytorch-cuda - options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 + options: --shm-size "16gb" --ipc host --gpus 0 steps: - name: Checkout diffusers uses: actions/checkout@v3 @@ -106,7 +104,7 @@ jobs: runs-on: [single-gpu, nvidia-gpu, t4, ci] container: image: diffusers/diffusers-pytorch-cuda - options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 + options: --shm-size "16gb" --ipc host --gpus 0 defaults: run: shell: bash @@ -124,12 +122,13 @@ jobs: python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" python -m uv pip install -e [quality,test] python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git + python -m uv pip install peft@git+https://github.com/huggingface/peft.git - name: Environment run: | python utils/print_env.py - - name: Run slow PyTorch CUDA tests + - name: Run PyTorch CUDA tests env: HF_TOKEN: ${{ secrets.HF_TOKEN }} # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms @@ -153,61 +152,6 @@ jobs: name: torch_cuda_test_reports path: reports - peft_cuda_tests: - name: PEFT CUDA Tests - runs-on: [single-gpu, nvidia-gpu, t4, ci] - container: - image: diffusers/diffusers-pytorch-cuda - options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 - defaults: - run: - shell: bash - steps: - - name: Checkout diffusers - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - - name: Install dependencies - run: | - python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" - python -m uv pip install -e [quality,test] - python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git - python -m pip install -U peft@git+https://github.com/huggingface/peft.git - - - name: Environment - run: | - python utils/print_env.py - - - name: Run slow PEFT CUDA tests - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms - CUBLAS_WORKSPACE_CONFIG: :16:8 - run: | - python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ - -s -v -k "not Flax and not Onnx and not PEFTLoRALoading" \ - --make-reports=tests_peft_cuda \ - tests/lora/ - python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ - -s -v -k "lora and not Flax and not Onnx and not PEFTLoRALoading" \ - --make-reports=tests_peft_cuda_models_lora \ - tests/models/ - - - name: Failure short reports - if: ${{ failure() }} - run: | - cat reports/tests_peft_cuda_stats.txt - cat reports/tests_peft_cuda_failures_short.txt - cat reports/tests_peft_cuda_models_lora_failures_short.txt - - - name: Test suite reports artifacts - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: torch_peft_test_reports - path: reports - flax_tpu_tests: name: Flax TPU Tests runs-on: docker-tpu @@ -309,7 +253,7 @@ jobs: container: image: diffusers/diffusers-pytorch-compile-cuda - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + options: --gpus 0 --shm-size "16gb" --ipc host steps: - name: Checkout diffusers @@ -351,7 +295,7 @@ jobs: container: image: diffusers/diffusers-pytorch-xformers-cuda - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + options: --gpus 0 --shm-size "16gb" --ipc host steps: - name: Checkout diffusers @@ -392,7 +336,7 @@ jobs: container: image: diffusers/diffusers-pytorch-cuda - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + options: --gpus 0 --shm-size "16gb" --ipc host steps: - name: Checkout diffusers diff --git a/docker/diffusers-onnxruntime-cuda/Dockerfile b/docker/diffusers-onnxruntime-cuda/Dockerfile index 2019217553..3364698fe9 100644 --- a/docker/diffusers-onnxruntime-cuda/Dockerfile +++ b/docker/diffusers-onnxruntime-cuda/Dockerfile @@ -38,6 +38,7 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \ datasets \ hf-doc-builder \ huggingface-hub \ + hf_transfer \ Jinja2 \ librosa \ numpy==1.26.4 \ diff --git a/docker/diffusers-pytorch-compile-cuda/Dockerfile b/docker/diffusers-pytorch-compile-cuda/Dockerfile index c278dbc4d2..a5454328b8 100644 --- a/docker/diffusers-pytorch-compile-cuda/Dockerfile +++ b/docker/diffusers-pytorch-compile-cuda/Dockerfile @@ -38,6 +38,7 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \ datasets \ hf-doc-builder \ huggingface-hub \ + hf_transfer \ Jinja2 \ librosa \ numpy==1.26.4 \ diff --git a/docker/diffusers-pytorch-cuda/Dockerfile b/docker/diffusers-pytorch-cuda/Dockerfile index c9413053de..8b5439ffb6 100644 --- a/docker/diffusers-pytorch-cuda/Dockerfile +++ b/docker/diffusers-pytorch-cuda/Dockerfile @@ -38,6 +38,7 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \ datasets \ hf-doc-builder \ huggingface-hub \ + hf_transfer \ Jinja2 \ librosa \ numpy==1.26.4 \ diff --git a/docker/diffusers-pytorch-xformers-cuda/Dockerfile b/docker/diffusers-pytorch-xformers-cuda/Dockerfile index 7dffe9e9a8..7a3408c486 100644 --- a/docker/diffusers-pytorch-xformers-cuda/Dockerfile +++ b/docker/diffusers-pytorch-xformers-cuda/Dockerfile @@ -38,6 +38,7 @@ RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \ datasets \ hf-doc-builder \ huggingface-hub \ + hf_transfer \ Jinja2 \ librosa \ numpy==1.26.4 \ diff --git a/scripts/log_reports.py b/utils/log_reports.py similarity index 100% rename from scripts/log_reports.py rename to utils/log_reports.py