diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 9c93b35956..5670cac797 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -120,3 +120,24 @@ git pull ``` Your Python environment will find the `main` version of 🤗 Diffusers on the next run. + +## Notice on telemetry logging + +Our library gathers telemetry information during `from_pretrained()` requests. +This data includes the version of Diffusers and PyTorch/Flax, the requested model or pipeline class, +and the path to a pretrained checkpoint if it is hosted on the Hub. +This usage data helps us debug issues and prioritize new features. +No private data, such as paths to models saved locally on disk, is ever collected. + +We understand that not everyone wants to share additional information, and we respect your privacy, +so you can disable telemetry collection by setting the `DISABLE_TELEMETRY` environment variable from your terminal: + +On Linux/MacOS: +```bash +export DISABLE_TELEMETRY=YES +``` + +On Windows: +```bash +set DISABLE_TELEMETRY=YES +``` \ No newline at end of file diff --git a/src/diffusers/hub_utils.py b/src/diffusers/hub_utils.py index a1772d8f70..fbf97ecb0f 100644 --- a/src/diffusers/hub_utils.py +++ b/src/diffusers/hub_utils.py @@ -20,10 +20,11 @@ from pathlib import Path from typing import Dict, Optional, Union from uuid import uuid4 +import requests from huggingface_hub import HfFolder, whoami from . import __version__ -from .utils import ENV_VARS_TRUE_VALUES, logging +from .utils import ENV_VARS_TRUE_VALUES, HUGGINGFACE_CO_RESOLVE_ENDPOINT, logging from .utils.import_utils import ( _flax_version, _jax_version, @@ -45,7 +46,9 @@ logger = logging.get_logger(__name__) MODEL_CARD_TEMPLATE_PATH = Path(__file__).parent / "utils" / "model_card_template.md" SESSION_ID = uuid4().hex +HF_HUB_OFFLINE = os.getenv("HF_HUB_OFFLINE", "").upper() in ENV_VARS_TRUE_VALUES DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", "").upper() in ENV_VARS_TRUE_VALUES +HUGGINGFACE_CO_TELEMETRY = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/api/telemetry/" def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str: @@ -72,6 +75,27 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str: return ua +def send_telemetry(data: Dict, name: str): + """ + Sends logs to the Hub telemetry endpoint. + + Args: + data: the fields to track, e.g. {"example_name": "dreambooth"} + name: a unique name to differentiate the telemetry logs, e.g. "diffusers_examples" or "diffusers_notebooks" + """ + if DISABLE_TELEMETRY or HF_HUB_OFFLINE: + pass + + headers = {"user-agent": http_user_agent(data)} + endpoint = HUGGINGFACE_CO_TELEMETRY + name + try: + r = requests.head(endpoint, headers=headers) + r.raise_for_status() + except Exception: + # We don't want to error in case of connection errors of any kind. + pass + + def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): if token is None: token = HfFolder.get_token() diff --git a/src/diffusers/modeling_flax_utils.py b/src/diffusers/modeling_flax_utils.py index 857fdd1b0b..fcada33773 100644 --- a/src/diffusers/modeling_flax_utils.py +++ b/src/diffusers/modeling_flax_utils.py @@ -28,6 +28,7 @@ from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, R from requests import HTTPError from . import __version__, is_torch_available +from .hub_utils import send_telemetry from .modeling_flax_pytorch_utils import convert_pytorch_state_dict_to_flax from .utils import ( CONFIG_NAME, @@ -339,6 +340,10 @@ class FlaxModelMixin: f"Error no file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME} found in directory " f"{pretrained_path_with_subfolder}." ) + send_telemetry( + {"model_class": cls.__name__, "model_path": "local", "framework": "flax"}, + name="diffusers_from_pretrained", + ) else: try: model_file = hf_hub_download( @@ -354,6 +359,10 @@ class FlaxModelMixin: subfolder=subfolder, revision=revision, ) + send_telemetry( + {"model_class": cls.__name__, "model_path": "hub", "framework": "flax"}, + name="diffusers_from_pretrained", + ) except RepositoryNotFoundError: raise EnvironmentError( diff --git a/src/diffusers/modeling_utils.py b/src/diffusers/modeling_utils.py index e270f75e05..280dca0005 100644 --- a/src/diffusers/modeling_utils.py +++ b/src/diffusers/modeling_utils.py @@ -26,6 +26,7 @@ from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, R from requests import HTTPError from . import __version__ +from .hub_utils import send_telemetry from .utils import ( CONFIG_NAME, DIFFUSERS_CACHE, @@ -400,7 +401,7 @@ class ModelMixin(torch.nn.Module): model_file = None if is_safetensors_available(): try: - model_file = _get_model_file( + model_file = cls._get_model_file( pretrained_model_name_or_path, weights_name=SAFETENSORS_WEIGHTS_NAME, cache_dir=cache_dir, @@ -416,7 +417,7 @@ class ModelMixin(torch.nn.Module): except: pass if model_file is None: - model_file = _get_model_file( + model_file = cls._get_model_file( pretrained_model_name_or_path, weights_name=WEIGHTS_NAME, cache_dir=cache_dir, @@ -531,6 +532,100 @@ class ModelMixin(torch.nn.Module): return model + @classmethod + def _get_model_file( + cls, + pretrained_model_name_or_path, + *, + weights_name, + subfolder, + cache_dir, + force_download, + proxies, + resume_download, + local_files_only, + use_auth_token, + user_agent, + revision, + ): + pretrained_model_name_or_path = str(pretrained_model_name_or_path) + if os.path.isdir(pretrained_model_name_or_path): + if os.path.isfile(os.path.join(pretrained_model_name_or_path, weights_name)): + # Load from a PyTorch checkpoint + model_file = os.path.join(pretrained_model_name_or_path, weights_name) + elif subfolder is not None and os.path.isfile( + os.path.join(pretrained_model_name_or_path, subfolder, weights_name) + ): + model_file = os.path.join(pretrained_model_name_or_path, subfolder, weights_name) + else: + raise EnvironmentError( + f"Error no file named {weights_name} found in directory {pretrained_model_name_or_path}." + ) + send_telemetry( + {"model_class": cls.__name__, "model_path": "local", "framework": "pytorch"}, + name="diffusers_from_pretrained", + ) + return model_file + else: + try: + # Load from URL or cache if already cached + model_file = hf_hub_download( + pretrained_model_name_or_path, + filename=weights_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + user_agent=user_agent, + subfolder=subfolder, + revision=revision, + ) + send_telemetry( + {"model_class": cls.__name__, "model_path": "hub", "framework": "pytorch"}, + name="diffusers_from_pretrained", + ) + return model_file + + except RepositoryNotFoundError: + raise EnvironmentError( + f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier " + "listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a " + "token having permission to this repo with `use_auth_token` or log in with `huggingface-cli " + "login`." + ) + except RevisionNotFoundError: + raise EnvironmentError( + f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for " + "this model name. Check the model page at " + f"'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions." + ) + except EntryNotFoundError: + raise EnvironmentError( + f"{pretrained_model_name_or_path} does not appear to have a file named {weights_name}." + ) + except HTTPError as err: + raise EnvironmentError( + "There was a specific connection error when trying to load" + f" {pretrained_model_name_or_path}:\n{err}" + ) + except ValueError: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a file named {weights_name} or" + " \nCheckout your internet connection or see how to run the library in" + " offline mode at 'https://huggingface.co/docs/diffusers/installation#offline-mode'." + ) + except EnvironmentError: + raise EnvironmentError( + f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing a file named {weights_name}" + ) + @classmethod def _load_pretrained_model( cls, diff --git a/src/diffusers/pipeline_flax_utils.py b/src/diffusers/pipeline_flax_utils.py index f8fd304776..8ad30bb932 100644 --- a/src/diffusers/pipeline_flax_utils.py +++ b/src/diffusers/pipeline_flax_utils.py @@ -29,7 +29,7 @@ from PIL import Image from tqdm.auto import tqdm from .configuration_utils import ConfigMixin -from .hub_utils import http_user_agent +from .hub_utils import http_user_agent, send_telemetry from .modeling_flax_utils import FLAX_WEIGHTS_NAME, FlaxModelMixin from .schedulers.scheduling_utils_flax import SCHEDULER_CONFIG_NAME, FlaxSchedulerMixin from .utils import CONFIG_NAME, DIFFUSERS_CACHE, BaseOutput, is_transformers_available, logging @@ -346,8 +346,16 @@ class FlaxDiffusionPipeline(ConfigMixin): ignore_patterns=ignore_patterns, user_agent=user_agent, ) + send_telemetry( + {"pipeline_class": requested_pipeline_class, "pipeline_path": "hub", "framework": "flax"}, + name="diffusers_from_pretrained", + ) else: cached_folder = pretrained_model_name_or_path + send_telemetry( + {"pipeline_class": cls.__name__, "pipeline_path": "local", "framework": "flax"}, + name="diffusers_from_pretrained", + ) config_dict = cls.load_config(cached_folder) diff --git a/src/diffusers/pipeline_utils.py b/src/diffusers/pipeline_utils.py index fb3ebef0a2..f76a4da149 100644 --- a/src/diffusers/pipeline_utils.py +++ b/src/diffusers/pipeline_utils.py @@ -33,7 +33,7 @@ from tqdm.auto import tqdm from .configuration_utils import ConfigMixin from .dynamic_modules_utils import get_class_from_dynamic_module -from .hub_utils import http_user_agent +from .hub_utils import http_user_agent, send_telemetry from .modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME from .utils import ( @@ -477,7 +477,7 @@ class DiffusionPipeline(ConfigMixin): else: requested_pipeline_class = config_dict.get("_class_name", cls.__name__) user_agent = {"pipeline_class": requested_pipeline_class} - if custom_pipeline is not None: + if custom_pipeline is not None and not custom_pipeline.endswith(".py"): user_agent["custom_pipeline"] = custom_pipeline user_agent = http_user_agent(user_agent) @@ -504,8 +504,16 @@ class DiffusionPipeline(ConfigMixin): ignore_patterns=ignore_patterns, user_agent=user_agent, ) + send_telemetry( + {"pipeline_class": requested_pipeline_class, "pipeline_path": "hub", "framework": "pytorch"}, + name="diffusers_from_pretrained", + ) else: cached_folder = pretrained_model_name_or_path + send_telemetry( + {"pipeline_class": cls.__name__, "pipeline_path": "local", "framework": "pytorch"}, + name="diffusers_from_pretrained", + ) config_dict = cls.load_config(cached_folder)