mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
Add from_pretrained telemetry (#1461)
* Add from_pretrained usage logging * Add classes * add a telemetry notice * macos
This commit is contained in:
@@ -120,3 +120,24 @@ git pull
|
||||
```
|
||||
|
||||
Your Python environment will find the `main` version of 🤗 Diffusers on the next run.
|
||||
|
||||
## Notice on telemetry logging
|
||||
|
||||
Our library gathers telemetry information during `from_pretrained()` requests.
|
||||
This data includes the version of Diffusers and PyTorch/Flax, the requested model or pipeline class,
|
||||
and the path to a pretrained checkpoint if it is hosted on the Hub.
|
||||
This usage data helps us debug issues and prioritize new features.
|
||||
No private data, such as paths to models saved locally on disk, is ever collected.
|
||||
|
||||
We understand that not everyone wants to share additional information, and we respect your privacy,
|
||||
so you can disable telemetry collection by setting the `DISABLE_TELEMETRY` environment variable from your terminal:
|
||||
|
||||
On Linux/MacOS:
|
||||
```bash
|
||||
export DISABLE_TELEMETRY=YES
|
||||
```
|
||||
|
||||
On Windows:
|
||||
```bash
|
||||
set DISABLE_TELEMETRY=YES
|
||||
```
|
||||
@@ -20,10 +20,11 @@ from pathlib import Path
|
||||
from typing import Dict, Optional, Union
|
||||
from uuid import uuid4
|
||||
|
||||
import requests
|
||||
from huggingface_hub import HfFolder, whoami
|
||||
|
||||
from . import __version__
|
||||
from .utils import ENV_VARS_TRUE_VALUES, logging
|
||||
from .utils import ENV_VARS_TRUE_VALUES, HUGGINGFACE_CO_RESOLVE_ENDPOINT, logging
|
||||
from .utils.import_utils import (
|
||||
_flax_version,
|
||||
_jax_version,
|
||||
@@ -45,7 +46,9 @@ logger = logging.get_logger(__name__)
|
||||
|
||||
MODEL_CARD_TEMPLATE_PATH = Path(__file__).parent / "utils" / "model_card_template.md"
|
||||
SESSION_ID = uuid4().hex
|
||||
HF_HUB_OFFLINE = os.getenv("HF_HUB_OFFLINE", "").upper() in ENV_VARS_TRUE_VALUES
|
||||
DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", "").upper() in ENV_VARS_TRUE_VALUES
|
||||
HUGGINGFACE_CO_TELEMETRY = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/api/telemetry/"
|
||||
|
||||
|
||||
def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
|
||||
@@ -72,6 +75,27 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
|
||||
return ua
|
||||
|
||||
|
||||
def send_telemetry(data: Dict, name: str):
|
||||
"""
|
||||
Sends logs to the Hub telemetry endpoint.
|
||||
|
||||
Args:
|
||||
data: the fields to track, e.g. {"example_name": "dreambooth"}
|
||||
name: a unique name to differentiate the telemetry logs, e.g. "diffusers_examples" or "diffusers_notebooks"
|
||||
"""
|
||||
if DISABLE_TELEMETRY or HF_HUB_OFFLINE:
|
||||
pass
|
||||
|
||||
headers = {"user-agent": http_user_agent(data)}
|
||||
endpoint = HUGGINGFACE_CO_TELEMETRY + name
|
||||
try:
|
||||
r = requests.head(endpoint, headers=headers)
|
||||
r.raise_for_status()
|
||||
except Exception:
|
||||
# We don't want to error in case of connection errors of any kind.
|
||||
pass
|
||||
|
||||
|
||||
def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
|
||||
if token is None:
|
||||
token = HfFolder.get_token()
|
||||
|
||||
@@ -28,6 +28,7 @@ from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, R
|
||||
from requests import HTTPError
|
||||
|
||||
from . import __version__, is_torch_available
|
||||
from .hub_utils import send_telemetry
|
||||
from .modeling_flax_pytorch_utils import convert_pytorch_state_dict_to_flax
|
||||
from .utils import (
|
||||
CONFIG_NAME,
|
||||
@@ -339,6 +340,10 @@ class FlaxModelMixin:
|
||||
f"Error no file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME} found in directory "
|
||||
f"{pretrained_path_with_subfolder}."
|
||||
)
|
||||
send_telemetry(
|
||||
{"model_class": cls.__name__, "model_path": "local", "framework": "flax"},
|
||||
name="diffusers_from_pretrained",
|
||||
)
|
||||
else:
|
||||
try:
|
||||
model_file = hf_hub_download(
|
||||
@@ -354,6 +359,10 @@ class FlaxModelMixin:
|
||||
subfolder=subfolder,
|
||||
revision=revision,
|
||||
)
|
||||
send_telemetry(
|
||||
{"model_class": cls.__name__, "model_path": "hub", "framework": "flax"},
|
||||
name="diffusers_from_pretrained",
|
||||
)
|
||||
|
||||
except RepositoryNotFoundError:
|
||||
raise EnvironmentError(
|
||||
|
||||
@@ -26,6 +26,7 @@ from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, R
|
||||
from requests import HTTPError
|
||||
|
||||
from . import __version__
|
||||
from .hub_utils import send_telemetry
|
||||
from .utils import (
|
||||
CONFIG_NAME,
|
||||
DIFFUSERS_CACHE,
|
||||
@@ -400,7 +401,7 @@ class ModelMixin(torch.nn.Module):
|
||||
model_file = None
|
||||
if is_safetensors_available():
|
||||
try:
|
||||
model_file = _get_model_file(
|
||||
model_file = cls._get_model_file(
|
||||
pretrained_model_name_or_path,
|
||||
weights_name=SAFETENSORS_WEIGHTS_NAME,
|
||||
cache_dir=cache_dir,
|
||||
@@ -416,7 +417,7 @@ class ModelMixin(torch.nn.Module):
|
||||
except:
|
||||
pass
|
||||
if model_file is None:
|
||||
model_file = _get_model_file(
|
||||
model_file = cls._get_model_file(
|
||||
pretrained_model_name_or_path,
|
||||
weights_name=WEIGHTS_NAME,
|
||||
cache_dir=cache_dir,
|
||||
@@ -531,6 +532,100 @@ class ModelMixin(torch.nn.Module):
|
||||
|
||||
return model
|
||||
|
||||
@classmethod
|
||||
def _get_model_file(
|
||||
cls,
|
||||
pretrained_model_name_or_path,
|
||||
*,
|
||||
weights_name,
|
||||
subfolder,
|
||||
cache_dir,
|
||||
force_download,
|
||||
proxies,
|
||||
resume_download,
|
||||
local_files_only,
|
||||
use_auth_token,
|
||||
user_agent,
|
||||
revision,
|
||||
):
|
||||
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
||||
if os.path.isdir(pretrained_model_name_or_path):
|
||||
if os.path.isfile(os.path.join(pretrained_model_name_or_path, weights_name)):
|
||||
# Load from a PyTorch checkpoint
|
||||
model_file = os.path.join(pretrained_model_name_or_path, weights_name)
|
||||
elif subfolder is not None and os.path.isfile(
|
||||
os.path.join(pretrained_model_name_or_path, subfolder, weights_name)
|
||||
):
|
||||
model_file = os.path.join(pretrained_model_name_or_path, subfolder, weights_name)
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
f"Error no file named {weights_name} found in directory {pretrained_model_name_or_path}."
|
||||
)
|
||||
send_telemetry(
|
||||
{"model_class": cls.__name__, "model_path": "local", "framework": "pytorch"},
|
||||
name="diffusers_from_pretrained",
|
||||
)
|
||||
return model_file
|
||||
else:
|
||||
try:
|
||||
# Load from URL or cache if already cached
|
||||
model_file = hf_hub_download(
|
||||
pretrained_model_name_or_path,
|
||||
filename=weights_name,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
use_auth_token=use_auth_token,
|
||||
user_agent=user_agent,
|
||||
subfolder=subfolder,
|
||||
revision=revision,
|
||||
)
|
||||
send_telemetry(
|
||||
{"model_class": cls.__name__, "model_path": "hub", "framework": "pytorch"},
|
||||
name="diffusers_from_pretrained",
|
||||
)
|
||||
return model_file
|
||||
|
||||
except RepositoryNotFoundError:
|
||||
raise EnvironmentError(
|
||||
f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier "
|
||||
"listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a "
|
||||
"token having permission to this repo with `use_auth_token` or log in with `huggingface-cli "
|
||||
"login`."
|
||||
)
|
||||
except RevisionNotFoundError:
|
||||
raise EnvironmentError(
|
||||
f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for "
|
||||
"this model name. Check the model page at "
|
||||
f"'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
|
||||
)
|
||||
except EntryNotFoundError:
|
||||
raise EnvironmentError(
|
||||
f"{pretrained_model_name_or_path} does not appear to have a file named {weights_name}."
|
||||
)
|
||||
except HTTPError as err:
|
||||
raise EnvironmentError(
|
||||
"There was a specific connection error when trying to load"
|
||||
f" {pretrained_model_name_or_path}:\n{err}"
|
||||
)
|
||||
except ValueError:
|
||||
raise EnvironmentError(
|
||||
f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it"
|
||||
f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a"
|
||||
f" directory containing a file named {weights_name} or"
|
||||
" \nCheckout your internet connection or see how to run the library in"
|
||||
" offline mode at 'https://huggingface.co/docs/diffusers/installation#offline-mode'."
|
||||
)
|
||||
except EnvironmentError:
|
||||
raise EnvironmentError(
|
||||
f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it from "
|
||||
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
|
||||
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
|
||||
f"containing a file named {weights_name}"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _load_pretrained_model(
|
||||
cls,
|
||||
|
||||
@@ -29,7 +29,7 @@ from PIL import Image
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from .configuration_utils import ConfigMixin
|
||||
from .hub_utils import http_user_agent
|
||||
from .hub_utils import http_user_agent, send_telemetry
|
||||
from .modeling_flax_utils import FLAX_WEIGHTS_NAME, FlaxModelMixin
|
||||
from .schedulers.scheduling_utils_flax import SCHEDULER_CONFIG_NAME, FlaxSchedulerMixin
|
||||
from .utils import CONFIG_NAME, DIFFUSERS_CACHE, BaseOutput, is_transformers_available, logging
|
||||
@@ -346,8 +346,16 @@ class FlaxDiffusionPipeline(ConfigMixin):
|
||||
ignore_patterns=ignore_patterns,
|
||||
user_agent=user_agent,
|
||||
)
|
||||
send_telemetry(
|
||||
{"pipeline_class": requested_pipeline_class, "pipeline_path": "hub", "framework": "flax"},
|
||||
name="diffusers_from_pretrained",
|
||||
)
|
||||
else:
|
||||
cached_folder = pretrained_model_name_or_path
|
||||
send_telemetry(
|
||||
{"pipeline_class": cls.__name__, "pipeline_path": "local", "framework": "flax"},
|
||||
name="diffusers_from_pretrained",
|
||||
)
|
||||
|
||||
config_dict = cls.load_config(cached_folder)
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ from tqdm.auto import tqdm
|
||||
|
||||
from .configuration_utils import ConfigMixin
|
||||
from .dynamic_modules_utils import get_class_from_dynamic_module
|
||||
from .hub_utils import http_user_agent
|
||||
from .hub_utils import http_user_agent, send_telemetry
|
||||
from .modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
|
||||
from .schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME
|
||||
from .utils import (
|
||||
@@ -477,7 +477,7 @@ class DiffusionPipeline(ConfigMixin):
|
||||
else:
|
||||
requested_pipeline_class = config_dict.get("_class_name", cls.__name__)
|
||||
user_agent = {"pipeline_class": requested_pipeline_class}
|
||||
if custom_pipeline is not None:
|
||||
if custom_pipeline is not None and not custom_pipeline.endswith(".py"):
|
||||
user_agent["custom_pipeline"] = custom_pipeline
|
||||
|
||||
user_agent = http_user_agent(user_agent)
|
||||
@@ -504,8 +504,16 @@ class DiffusionPipeline(ConfigMixin):
|
||||
ignore_patterns=ignore_patterns,
|
||||
user_agent=user_agent,
|
||||
)
|
||||
send_telemetry(
|
||||
{"pipeline_class": requested_pipeline_class, "pipeline_path": "hub", "framework": "pytorch"},
|
||||
name="diffusers_from_pretrained",
|
||||
)
|
||||
else:
|
||||
cached_folder = pretrained_model_name_or_path
|
||||
send_telemetry(
|
||||
{"pipeline_class": cls.__name__, "pipeline_path": "local", "framework": "pytorch"},
|
||||
name="diffusers_from_pretrained",
|
||||
)
|
||||
|
||||
config_dict = cls.load_config(cached_folder)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user