From cc22bda5f6ced5c6c810744fcf59d017e3278d2b Mon Sep 17 00:00:00 2001
From: Anton Lozhkov <anton@huggingface.co>
Date: Mon, 5 Dec 2022 11:50:24 +0100
Subject: [PATCH] [CI] Add slow MPS tests (#1104)

* [CI] Add slow MPS tests

* fix yml

* temporarily resolve caching

* Tests: fix mps crashes.

* Skip test_load_pipeline_from_git on mps.

Not compatible with float16.

* Increase tolerance, use CPU generator, alt. slices.

* Move to nightly

* style

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
---
 .github/workflows/nightly_tests.yml           | 66 +++++++++++++++++++
 .github/workflows/pr_tests.yml                |  6 +-
 .github/workflows/push_tests.yml              |  2 +-
 tests/models/test_models_vae.py               | 22 ++++---
 .../test_latent_diffusion_uncond.py           |  8 ++-
 5 files changed, 89 insertions(+), 15 deletions(-)
 create mode 100644 .github/workflows/nightly_tests.yml

diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml
new file mode 100644
index 0000000000..c46612bddc
--- /dev/null
+++ b/.github/workflows/nightly_tests.yml
@@ -0,0 +1,66 @@
+name: Nightly integration tests
+
+on:
+  schedule:
+    - cron: "0 0 * * *" # every day at midnight
+
+env:
+  DIFFUSERS_IS_CI: yes
+  HF_HOME: /mnt/cache
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  PYTEST_TIMEOUT: 1000
+  RUN_SLOW: yes
+
+jobs:
+  run_slow_tests_apple_m1:
+    name: Slow PyTorch MPS tests on MacOS
+    runs-on: [ self-hosted, apple-m1 ]
+
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+
+      - name: Clean checkout
+        shell: arch -arch arm64 bash {0}
+        run: |
+          git clean -fxd
+
+      - name: Setup miniconda
+        uses: ./.github/actions/setup-miniconda
+        with:
+          python-version: 3.9
+
+      - name: Install dependencies
+        shell: arch -arch arm64 bash {0}
+        run: |
+          ${CONDA_RUN} python -m pip install --upgrade pip
+          ${CONDA_RUN} python -m pip install -e .[quality,test]
+          ${CONDA_RUN} python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
+          ${CONDA_RUN} python -m pip install git+https://github.com/huggingface/accelerate
+
+      - name: Environment
+        shell: arch -arch arm64 bash {0}
+        run: |
+          ${CONDA_RUN} python utils/print_env.py
+
+      - name: Run slow PyTorch tests on M1 (MPS)
+        shell: arch -arch arm64 bash {0}
+        env:
+          HF_HOME: /System/Volumes/Data/mnt/cache
+          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+        run: |
+          ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps tests/
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: cat reports/tests_torch_mps_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: torch_mps_test_reports
+          path: reports
\ No newline at end of file
diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml
index 55a9bd68de..60e65b3706 100644
--- a/.github/workflows/pr_tests.yml
+++ b/.github/workflows/pr_tests.yml
@@ -14,7 +14,6 @@ env:
   OMP_NUM_THREADS: 4
   MKL_NUM_THREADS: 4
   PYTEST_TIMEOUT: 60
-  MPS_TORCH_VERSION: 1.13.0
 
 jobs:
   run_fast_tests:
@@ -126,7 +125,7 @@ jobs:
       run: |
         ${CONDA_RUN} python -m pip install --upgrade pip
         ${CONDA_RUN} python -m pip install -e .[quality,test]
-        ${CONDA_RUN} python -m pip install --pre torch==${MPS_TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/test/cpu
+        ${CONDA_RUN} python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
         ${CONDA_RUN} python -m pip install git+https://github.com/huggingface/accelerate
         ${CONDA_RUN} python -m pip install -U git+https://github.com/huggingface/transformers
 
@@ -137,6 +136,9 @@ jobs:
 
     - name: Run fast PyTorch tests on M1 (MPS)
       shell: arch -arch arm64 bash {0}
+      env:
+        HF_HOME: /System/Volumes/Data/mnt/cache
+        HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       run: |
         ${CONDA_RUN} python -m pytest -n 0 -s -v --make-reports=tests_torch_mps tests/
 
diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml
index 4bab00b7ee..ad1821621f 100644
--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
@@ -153,4 +153,4 @@ jobs:
       uses: actions/upload-artifact@v2
       with:
         name: examples_test_reports
-        path: reports
+        path: reports
\ No newline at end of file
diff --git a/tests/models/test_models_vae.py b/tests/models/test_models_vae.py
index 1693657561..2948151e3d 100644
--- a/tests/models/test_models_vae.py
+++ b/tests/models/test_models_vae.py
@@ -165,17 +165,19 @@ class AutoencoderKLIntegrationTests(unittest.TestCase):
         return model
 
     def get_generator(self, seed=0):
+        if torch_device == "mps":
+            return torch.Generator().manual_seed(seed)
         return torch.Generator(device=torch_device).manual_seed(seed)
 
     @parameterized.expand(
         [
             # fmt: off
-            [33, [-0.1603, 0.9878, -0.0495, -0.0790, -0.2709, 0.8375, -0.2060, -0.0824]],
-            [47, [-0.2376, 0.1168, 0.1332, -0.4840, -0.2508, -0.0791, -0.0493, -0.4089]],
+            [33, [-0.1603, 0.9878, -0.0495, -0.0790, -0.2709, 0.8375, -0.2060, -0.0824], [-0.2395, 0.0098, 0.0102, -0.0709, -0.2840, -0.0274, -0.0718, -0.1824]],
+            [47, [-0.2376, 0.1168, 0.1332, -0.4840, -0.2508, -0.0791, -0.0493, -0.4089], [0.0350, 0.0847, 0.0467, 0.0344, -0.0842, -0.0547, -0.0633, -0.1131]],
             # fmt: on
         ]
     )
-    def test_stable_diffusion(self, seed, expected_slice):
+    def test_stable_diffusion(self, seed, expected_slice, expected_slice_mps):
         model = self.get_sd_vae_model()
         image = self.get_sd_image(seed)
         generator = self.get_generator(seed)
@@ -186,7 +188,7 @@ class AutoencoderKLIntegrationTests(unittest.TestCase):
         assert sample.shape == image.shape
 
         output_slice = sample[-1, -2:, -2:, :2].flatten().float().cpu()
-        expected_output_slice = torch.tensor(expected_slice)
+        expected_output_slice = torch.tensor(expected_slice_mps if torch_device == "mps" else expected_slice)
 
         assert torch_all_close(output_slice, expected_output_slice, atol=1e-3)
 
@@ -217,12 +219,12 @@ class AutoencoderKLIntegrationTests(unittest.TestCase):
     @parameterized.expand(
         [
             # fmt: off
-            [33, [-0.1609, 0.9866, -0.0487, -0.0777, -0.2716, 0.8368, -0.2055, -0.0814]],
-            [47, [-0.2377, 0.1147, 0.1333, -0.4841, -0.2506, -0.0805, -0.0491, -0.4085]],
+            [33, [-0.1609, 0.9866, -0.0487, -0.0777, -0.2716, 0.8368, -0.2055, -0.0814], [-0.2395, 0.0098, 0.0102, -0.0709, -0.2840, -0.0274, -0.0718, -0.1824]],
+            [47, [-0.2377, 0.1147, 0.1333, -0.4841, -0.2506, -0.0805, -0.0491, -0.4085], [0.0350, 0.0847, 0.0467, 0.0344, -0.0842, -0.0547, -0.0633, -0.1131]],
             # fmt: on
         ]
     )
-    def test_stable_diffusion_mode(self, seed, expected_slice):
+    def test_stable_diffusion_mode(self, seed, expected_slice, expected_slice_mps):
         model = self.get_sd_vae_model()
         image = self.get_sd_image(seed)
 
@@ -232,7 +234,7 @@ class AutoencoderKLIntegrationTests(unittest.TestCase):
         assert sample.shape == image.shape
 
         output_slice = sample[-1, -2:, -2:, :2].flatten().float().cpu()
-        expected_output_slice = torch.tensor(expected_slice)
+        expected_output_slice = torch.tensor(expected_slice_mps if torch_device == "mps" else expected_slice)
 
         assert torch_all_close(output_slice, expected_output_slice, atol=1e-3)
 
@@ -267,6 +269,7 @@ class AutoencoderKLIntegrationTests(unittest.TestCase):
             # fmt: on
         ]
     )
+    @require_torch_gpu
     def test_stable_diffusion_decode_fp16(self, seed, expected_slice):
         model = self.get_sd_vae_model(fp16=True)
         encoding = self.get_sd_image(seed, shape=(3, 4, 64, 64), fp16=True)
@@ -303,4 +306,5 @@ class AutoencoderKLIntegrationTests(unittest.TestCase):
         output_slice = sample[0, -1, -3:, -3:].flatten().cpu()
         expected_output_slice = torch.tensor(expected_slice)
 
-        assert torch_all_close(output_slice, expected_output_slice, atol=1e-3)
+        tolerance = 1e-3 if torch_device != "mps" else 1e-2
+        assert torch_all_close(output_slice, expected_output_slice, atol=tolerance)
diff --git a/tests/pipelines/latent_diffusion/test_latent_diffusion_uncond.py b/tests/pipelines/latent_diffusion/test_latent_diffusion_uncond.py
index dea2971cbb..f063d6759e 100644
--- a/tests/pipelines/latent_diffusion/test_latent_diffusion_uncond.py
+++ b/tests/pipelines/latent_diffusion/test_latent_diffusion_uncond.py
@@ -97,8 +97,9 @@ class LDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
 
         assert image.shape == (1, 64, 64, 3)
         expected_slice = np.array([0.8512, 0.818, 0.6411, 0.6808, 0.4465, 0.5618, 0.46, 0.6231, 0.5172])
-        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
-        assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
+        tolerance = 1e-2 if torch_device != "mps" else 3e-2
+        assert np.abs(image_slice.flatten() - expected_slice).max() < tolerance
+        assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < tolerance
 
 
 @slow
@@ -116,4 +117,5 @@ class LDMPipelineIntegrationTests(unittest.TestCase):
 
         assert image.shape == (1, 256, 256, 3)
         expected_slice = np.array([0.4399, 0.44975, 0.46825, 0.474, 0.4359, 0.4581, 0.45095, 0.4341, 0.4447])
-        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
+        tolerance = 1e-2 if torch_device != "mps" else 3e-2
+        assert np.abs(image_slice.flatten() - expected_slice).max() < tolerance