1
0
mirror of https://github.com/huggingface/diffusers.git synced 2026-01-27 17:22:53 +03:00

Add from_pretrained telemetry (#1461)

* Add from_pretrained usage logging

* Add classes

* add a telemetry notice

* macos
This commit is contained in:
Anton Lozhkov
2022-12-07 11:56:21 +01:00
committed by GitHub
parent 4f3ddb6cca
commit bb2d7cacc0
6 changed files with 171 additions and 6 deletions

View File

@@ -120,3 +120,24 @@ git pull
```
Your Python environment will find the `main` version of 🤗 Diffusers on the next run.
## Notice on telemetry logging
Our library gathers telemetry information during `from_pretrained()` requests.
This data includes the version of Diffusers and PyTorch/Flax, the requested model or pipeline class,
and the path to a pretrained checkpoint if it is hosted on the Hub.
This usage data helps us debug issues and prioritize new features.
No private data, such as paths to models saved locally on disk, is ever collected.
We understand that not everyone wants to share additional information, and we respect your privacy,
so you can disable telemetry collection by setting the `DISABLE_TELEMETRY` environment variable from your terminal:
On Linux/MacOS:
```bash
export DISABLE_TELEMETRY=YES
```
On Windows:
```bash
set DISABLE_TELEMETRY=YES
```

View File

@@ -20,10 +20,11 @@ from pathlib import Path
from typing import Dict, Optional, Union
from uuid import uuid4
import requests
from huggingface_hub import HfFolder, whoami
from . import __version__
from .utils import ENV_VARS_TRUE_VALUES, logging
from .utils import ENV_VARS_TRUE_VALUES, HUGGINGFACE_CO_RESOLVE_ENDPOINT, logging
from .utils.import_utils import (
_flax_version,
_jax_version,
@@ -45,7 +46,9 @@ logger = logging.get_logger(__name__)
MODEL_CARD_TEMPLATE_PATH = Path(__file__).parent / "utils" / "model_card_template.md"
SESSION_ID = uuid4().hex
HF_HUB_OFFLINE = os.getenv("HF_HUB_OFFLINE", "").upper() in ENV_VARS_TRUE_VALUES
DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", "").upper() in ENV_VARS_TRUE_VALUES
HUGGINGFACE_CO_TELEMETRY = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/api/telemetry/"
def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
@@ -72,6 +75,27 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
return ua
def send_telemetry(data: Dict, name: str):
"""
Sends logs to the Hub telemetry endpoint.
Args:
data: the fields to track, e.g. {"example_name": "dreambooth"}
name: a unique name to differentiate the telemetry logs, e.g. "diffusers_examples" or "diffusers_notebooks"
"""
if DISABLE_TELEMETRY or HF_HUB_OFFLINE:
pass
headers = {"user-agent": http_user_agent(data)}
endpoint = HUGGINGFACE_CO_TELEMETRY + name
try:
r = requests.head(endpoint, headers=headers)
r.raise_for_status()
except Exception:
# We don't want to error in case of connection errors of any kind.
pass
def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
if token is None:
token = HfFolder.get_token()

View File

@@ -28,6 +28,7 @@ from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, R
from requests import HTTPError
from . import __version__, is_torch_available
from .hub_utils import send_telemetry
from .modeling_flax_pytorch_utils import convert_pytorch_state_dict_to_flax
from .utils import (
CONFIG_NAME,
@@ -339,6 +340,10 @@ class FlaxModelMixin:
f"Error no file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME} found in directory "
f"{pretrained_path_with_subfolder}."
)
send_telemetry(
{"model_class": cls.__name__, "model_path": "local", "framework": "flax"},
name="diffusers_from_pretrained",
)
else:
try:
model_file = hf_hub_download(
@@ -354,6 +359,10 @@ class FlaxModelMixin:
subfolder=subfolder,
revision=revision,
)
send_telemetry(
{"model_class": cls.__name__, "model_path": "hub", "framework": "flax"},
name="diffusers_from_pretrained",
)
except RepositoryNotFoundError:
raise EnvironmentError(

View File

@@ -26,6 +26,7 @@ from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, R
from requests import HTTPError
from . import __version__
from .hub_utils import send_telemetry
from .utils import (
CONFIG_NAME,
DIFFUSERS_CACHE,
@@ -400,7 +401,7 @@ class ModelMixin(torch.nn.Module):
model_file = None
if is_safetensors_available():
try:
model_file = _get_model_file(
model_file = cls._get_model_file(
pretrained_model_name_or_path,
weights_name=SAFETENSORS_WEIGHTS_NAME,
cache_dir=cache_dir,
@@ -416,7 +417,7 @@ class ModelMixin(torch.nn.Module):
except:
pass
if model_file is None:
model_file = _get_model_file(
model_file = cls._get_model_file(
pretrained_model_name_or_path,
weights_name=WEIGHTS_NAME,
cache_dir=cache_dir,
@@ -531,6 +532,100 @@ class ModelMixin(torch.nn.Module):
return model
@classmethod
def _get_model_file(
cls,
pretrained_model_name_or_path,
*,
weights_name,
subfolder,
cache_dir,
force_download,
proxies,
resume_download,
local_files_only,
use_auth_token,
user_agent,
revision,
):
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
if os.path.isdir(pretrained_model_name_or_path):
if os.path.isfile(os.path.join(pretrained_model_name_or_path, weights_name)):
# Load from a PyTorch checkpoint
model_file = os.path.join(pretrained_model_name_or_path, weights_name)
elif subfolder is not None and os.path.isfile(
os.path.join(pretrained_model_name_or_path, subfolder, weights_name)
):
model_file = os.path.join(pretrained_model_name_or_path, subfolder, weights_name)
else:
raise EnvironmentError(
f"Error no file named {weights_name} found in directory {pretrained_model_name_or_path}."
)
send_telemetry(
{"model_class": cls.__name__, "model_path": "local", "framework": "pytorch"},
name="diffusers_from_pretrained",
)
return model_file
else:
try:
# Load from URL or cache if already cached
model_file = hf_hub_download(
pretrained_model_name_or_path,
filename=weights_name,
cache_dir=cache_dir,
force_download=force_download,
proxies=proxies,
resume_download=resume_download,
local_files_only=local_files_only,
use_auth_token=use_auth_token,
user_agent=user_agent,
subfolder=subfolder,
revision=revision,
)
send_telemetry(
{"model_class": cls.__name__, "model_path": "hub", "framework": "pytorch"},
name="diffusers_from_pretrained",
)
return model_file
except RepositoryNotFoundError:
raise EnvironmentError(
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier "
"listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a "
"token having permission to this repo with `use_auth_token` or log in with `huggingface-cli "
"login`."
)
except RevisionNotFoundError:
raise EnvironmentError(
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for "
"this model name. Check the model page at "
f"'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
)
except EntryNotFoundError:
raise EnvironmentError(
f"{pretrained_model_name_or_path} does not appear to have a file named {weights_name}."
)
except HTTPError as err:
raise EnvironmentError(
"There was a specific connection error when trying to load"
f" {pretrained_model_name_or_path}:\n{err}"
)
except ValueError:
raise EnvironmentError(
f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it"
f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a"
f" directory containing a file named {weights_name} or"
" \nCheckout your internet connection or see how to run the library in"
" offline mode at 'https://huggingface.co/docs/diffusers/installation#offline-mode'."
)
except EnvironmentError:
raise EnvironmentError(
f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it from "
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
f"containing a file named {weights_name}"
)
@classmethod
def _load_pretrained_model(
cls,

View File

@@ -29,7 +29,7 @@ from PIL import Image
from tqdm.auto import tqdm
from .configuration_utils import ConfigMixin
from .hub_utils import http_user_agent
from .hub_utils import http_user_agent, send_telemetry
from .modeling_flax_utils import FLAX_WEIGHTS_NAME, FlaxModelMixin
from .schedulers.scheduling_utils_flax import SCHEDULER_CONFIG_NAME, FlaxSchedulerMixin
from .utils import CONFIG_NAME, DIFFUSERS_CACHE, BaseOutput, is_transformers_available, logging
@@ -346,8 +346,16 @@ class FlaxDiffusionPipeline(ConfigMixin):
ignore_patterns=ignore_patterns,
user_agent=user_agent,
)
send_telemetry(
{"pipeline_class": requested_pipeline_class, "pipeline_path": "hub", "framework": "flax"},
name="diffusers_from_pretrained",
)
else:
cached_folder = pretrained_model_name_or_path
send_telemetry(
{"pipeline_class": cls.__name__, "pipeline_path": "local", "framework": "flax"},
name="diffusers_from_pretrained",
)
config_dict = cls.load_config(cached_folder)

View File

@@ -33,7 +33,7 @@ from tqdm.auto import tqdm
from .configuration_utils import ConfigMixin
from .dynamic_modules_utils import get_class_from_dynamic_module
from .hub_utils import http_user_agent
from .hub_utils import http_user_agent, send_telemetry
from .modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
from .utils import (
@@ -477,7 +477,7 @@ class DiffusionPipeline(ConfigMixin):
else:
requested_pipeline_class = config_dict.get("_class_name", cls.__name__)
user_agent = {"pipeline_class": requested_pipeline_class}
if custom_pipeline is not None:
if custom_pipeline is not None and not custom_pipeline.endswith(".py"):
user_agent["custom_pipeline"] = custom_pipeline
user_agent = http_user_agent(user_agent)
@@ -504,8 +504,16 @@ class DiffusionPipeline(ConfigMixin):
ignore_patterns=ignore_patterns,
user_agent=user_agent,
)
send_telemetry(
{"pipeline_class": requested_pipeline_class, "pipeline_path": "hub", "framework": "pytorch"},
name="diffusers_from_pretrained",
)
else:
cached_folder = pretrained_model_name_or_path
send_telemetry(
{"pipeline_class": cls.__name__, "pipeline_path": "local", "framework": "pytorch"},
name="diffusers_from_pretrained",
)
config_dict = cls.load_config(cached_folder)