1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

[CI] Framework and hardware-specific CI tests (#997)

* [WIP][CI] Framework and hardware-specific docker images for CI tests

* username

* fix cpu

* try out the image

* push latest

* update workspace

* no root isolation for actions

* add a flax image

* flax and onnx matrix

* fix runners

* add reports

* onnxruntime image

* retry tpu

* fix

* fix

* build onnxruntime

* naming

* onnxruntime-gpu image

* onnxruntime-gpu image, slow tests

* latest jax version

* trigger flax

* run flax tests in one thread

* fast flax tests on cpu

* fast flax tests on cpu

* trigger slow tests

* rebuild torch cuda

* force cuda provider

* fix onnxruntime tests

* trigger slow

* don't specify gpu for tpu

* optimize

* memory limit

* fix flax tests

* disable docker cache
This commit is contained in:
Anton Lozhkov
2022-11-02 14:07:07 +01:00
committed by GitHub
parent b1ec61ee45
commit 4e59bcc680
16 changed files with 540 additions and 67 deletions

View File

@@ -0,0 +1,50 @@
name: Build Docker images (nightly)
on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *" # every day at midnight
concurrency:
group: docker-image-builds
cancel-in-progress: false
env:
REGISTRY: diffusers
jobs:
build-docker-images:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix:
image-name:
- diffusers-pytorch-cpu
- diffusers-pytorch-cuda
- diffusers-flax-cpu
- diffusers-flax-tpu
- diffusers-onnxruntime-cpu
- diffusers-onnxruntime-cuda
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ env.REGISTRY }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v3
with:
no-cache: true
context: ./docker/${{ matrix.image-name }}
push: true
tags: ${{ env.REGISTRY }}/${{ matrix.image-name }}:latest

View File

@@ -11,19 +11,45 @@ concurrency:
env:
DIFFUSERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
OMP_NUM_THREADS: 4
MKL_NUM_THREADS: 4
PYTEST_TIMEOUT: 60
MPS_TORCH_VERSION: 1.13.0
jobs:
run_tests_cpu:
name: CPU tests on Ubuntu
runs-on: [ self-hosted, docker-gpu ]
run_fast_tests:
strategy:
fail-fast: false
matrix:
config:
- name: Fast PyTorch CPU tests on Ubuntu
framework: pytorch
runner: docker-cpu
image: diffusers/diffusers-pytorch-cpu
report: torch_cpu
- name: Fast Flax CPU tests on Ubuntu
framework: flax
runner: docker-cpu
image: diffusers/diffusers-flax-cpu
report: flax_cpu
- name: Fast ONNXRuntime CPU tests on Ubuntu
framework: onnxruntime
runner: docker-cpu
image: diffusers/diffusers-onnxruntime-cpu
report: onnx_cpu
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.runner }}
container:
image: python:3.7
image: ${{ matrix.config.image }}
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
uses: actions/checkout@v3
@@ -32,8 +58,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
python -m pip install -e .[quality,test]
python -m pip install git+https://github.com/huggingface/accelerate
@@ -41,25 +65,49 @@ jobs:
run: |
python utils/print_env.py
- name: Run all fast tests on CPU
- name: Run fast PyTorch CPU tests
if: ${{ matrix.config.framework == 'pytorch' }}
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_cpu tests/
python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \
tests/
- name: Run fast Flax TPU tests
if: ${{ matrix.config.framework == 'flax' }}
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Flax" \
--make-reports=tests_${{ matrix.config.report }} \
tests/
- name: Run fast ONNXRuntime CPU tests
if: ${{ matrix.config.framework == 'onnxruntime' }}
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Onnx" \
--make-reports=tests_${{ matrix.config.report }} \
tests/
- name: Failure short reports
if: ${{ failure() }}
run: cat reports/tests_torch_cpu_failures_short.txt
run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: pr_torch_cpu_test_reports
name: pr_${{ matrix.config.report }}_test_reports
path: reports
run_tests_apple_m1:
name: MPS tests on Apple M1
run_fast_tests_apple_m1:
name: Fast PyTorch MPS tests on MacOS
runs-on: [ self-hosted, apple-m1 ]
steps:
@@ -91,7 +139,7 @@ jobs:
run: |
${CONDA_RUN} python utils/print_env.py
- name: Run all fast tests on MPS
- name: Run fast PyTorch tests on M1 (MPS)
shell: arch -arch arm64 bash {0}
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

View File

@@ -14,12 +14,38 @@ env:
RUN_SLOW: yes
jobs:
run_tests_single_gpu:
name: Diffusers tests
runs-on: [ self-hosted, docker-gpu, single-gpu ]
run_slow_tests:
strategy:
fail-fast: false
matrix:
config:
- name: Slow PyTorch CUDA tests on Ubuntu
framework: pytorch
runner: docker-gpu
image: diffusers/diffusers-pytorch-cuda
report: torch_cuda
- name: Slow Flax TPU tests on Ubuntu
framework: flax
runner: docker-tpu
image: diffusers/diffusers-flax-tpu
report: flax_tpu
- name: Slow ONNXRuntime CUDA tests on Ubuntu
framework: onnxruntime
runner: docker-gpu
image: diffusers/diffusers-onnxruntime-cuda
report: onnx_cuda
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.runner }}
container:
image: nvcr.io/nvidia/pytorch:22.07-py3
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache
image: ${{ matrix.config.image }}
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ ${{ matrix.config.runner == 'docker-tpu' && '--privileged' || '--gpus 0'}}
defaults:
run:
shell: bash
steps:
- name: Checkout diffusers
@@ -28,14 +54,12 @@ jobs:
fetch-depth: 2
- name: NVIDIA-SMI
if : ${{ matrix.config.runner == 'docker-gpu' }}
run: |
nvidia-smi
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip uninstall -y torch torchvision torchtext
python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu117
python -m pip install -e .[quality,test]
python -m pip install git+https://github.com/huggingface/accelerate
@@ -43,29 +67,55 @@ jobs:
run: |
python utils/print_env.py
- name: Run all (incl. slow) tests on GPU
- name: Run slow PyTorch CUDA tests
if: ${{ matrix.config.framework == 'pytorch' }}
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_gpu tests/
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "not Flax and not Onnx" \
--make-reports=tests_${{ matrix.config.report }} \
tests/
- name: Run slow Flax TPU tests
if: ${{ matrix.config.framework == 'flax' }}
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python -m pytest -n 0 \
-s -v -k "Flax" \
--make-reports=tests_${{ matrix.config.report }} \
tests/
- name: Run slow ONNXRuntime CUDA tests
if: ${{ matrix.config.framework == 'onnxruntime' }}
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-s -v -k "Onnx" \
--make-reports=tests_${{ matrix.config.report }} \
tests/
- name: Failure short reports
if: ${{ failure() }}
run: cat reports/tests_torch_gpu_failures_short.txt
run: cat reports/tests_${{ matrix.config.report }}_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: torch_test_reports
name: ${{ matrix.config.report }}_test_reports
path: reports
run_examples_single_gpu:
name: Examples tests
runs-on: [ self-hosted, docker-gpu, single-gpu ]
run_examples_tests:
name: Examples PyTorch CUDA tests on Ubuntu
runs-on: docker-gpu
container:
image: nvcr.io/nvidia/pytorch:22.07-py3
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache
image: diffusers/diffusers-pytorch-cuda
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
steps:
- name: Checkout diffusers
@@ -79,9 +129,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip uninstall -y torch torchvision torchtext
python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu117
python -m pip install -e .[quality,test,training]
python -m pip install git+https://github.com/huggingface/accelerate
@@ -93,11 +140,11 @@ jobs:
env:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_gpu examples/
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_cuda examples/
- name: Failure short reports
if: ${{ failure() }}
run: cat reports/examples_torch_gpu_failures_short.txt
run: cat reports/examples_torch_cuda_failures_short.txt
- name: Test suite reports artifacts
if: ${{ always() }}