From ac7ff7d4a319320c1bd5e428119fa1986f3f38ce Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Wed, 10 Apr 2024 08:28:49 +0530 Subject: [PATCH] add utilities for updating diffusers pipeline metadata. (#7573) * add utilities for updating diffusers pipeline metadata. * style * remove first empty line --- .github/workflows/update_metadata.yml | 30 ++++++++ utils/update_metadata.py | 106 ++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 .github/workflows/update_metadata.yml create mode 100644 utils/update_metadata.py diff --git a/.github/workflows/update_metadata.yml b/.github/workflows/update_metadata.yml new file mode 100644 index 0000000000..33d162ef8d --- /dev/null +++ b/.github/workflows/update_metadata.yml @@ -0,0 +1,30 @@ +name: Update Diffusers metadata + +on: + workflow_dispatch: + push: + branches: + - main + - update_diffusers_metadata* + +jobs: + update_metadata: + runs-on: ubuntu-22.04 + defaults: + run: + shell: bash -l {0} + + steps: + - uses: actions/checkout@v3 + + - name: Setup environment + run: | + pip install --upgrade pip + pip install datasets pandas + pip install .[torch] + + - name: Update metadata + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }} + run: | + python utils/update_metadata.py --commit_sha ${{ github.sha }} diff --git a/utils/update_metadata.py b/utils/update_metadata.py new file mode 100644 index 0000000000..840e4be784 --- /dev/null +++ b/utils/update_metadata.py @@ -0,0 +1,106 @@ +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Utility that updates the metadata of the Diffusers library in the repository `huggingface/diffusers-metadata`. + +Usage for an update (as used by the GitHub action `update_metadata`): + +```bash +python utils/update_metadata.py +``` + +Script modified from: +https://github.com/huggingface/transformers/blob/main/utils/update_metadata.py +""" +import argparse +import os +import tempfile + +import pandas as pd +from datasets import Dataset +from huggingface_hub import upload_folder + +from diffusers.pipelines.auto_pipeline import ( + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, + AUTO_INPAINT_PIPELINES_MAPPING, + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, +) + + +def get_supported_pipeline_table() -> dict: + """ + Generates a dictionary containing the supported auto classes for each pipeline type, + using the content of the auto modules. + """ + # All supported pipelines for automatic mapping. + all_supported_pipeline_classes = [ + (class_name.__name__, "text-to-image", "AutoPipelineForText2Image") + for _, class_name in AUTO_TEXT2IMAGE_PIPELINES_MAPPING.items() + ] + all_supported_pipeline_classes += [ + (class_name.__name__, "image-to-image", "AutoPipelineForImage2Image") + for _, class_name in AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.items() + ] + all_supported_pipeline_classes += [ + (class_name.__name__, "image-to-image", "AutoPipelineForInpainting") + for _, class_name in AUTO_INPAINT_PIPELINES_MAPPING.items() + ] + all_supported_pipeline_classes.sort(key=lambda x: x[0]) + all_supported_pipeline_classes = list(set(all_supported_pipeline_classes)) + + data = {} + data["pipeline_class"] = [sample[0] for sample in all_supported_pipeline_classes] + data["pipeline_tag"] = [sample[1] for sample in all_supported_pipeline_classes] + data["auto_class"] = [sample[2] for sample in all_supported_pipeline_classes] + + return data + + +def update_metadata(commit_sha: str): + """ + Update the metadata for the Diffusers repo in `huggingface/diffusers-metadata`. + + Args: + commit_sha (`str`): The commit SHA on Diffusers corresponding to this update. + """ + pipelines_table = get_supported_pipeline_table() + pipelines_table = pd.DataFrame(pipelines_table) + pipelines_dataset = Dataset.from_pandas(pipelines_table) + + with tempfile.TemporaryDirectory() as tmp_dir: + pipelines_dataset.to_json(os.path.join(tmp_dir, "pipeline_tags.json")) + + if commit_sha is not None: + commit_message = ( + f"Update with commit {commit_sha}\n\nSee: " + f"https://github.com/huggingface/diffusers/commit/{commit_sha}" + ) + else: + commit_message = "Update" + + upload_folder( + repo_id="huggingface/diffusers-metadata", + folder_path=tmp_dir, + repo_type="dataset", + commit_message=commit_message, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--commit_sha", default=None, type=str, help="The sha of the commit going with this update.") + args = parser.parse_args() + + update_metadata(args.commit_sha)