mirror of
https://github.com/quay/quay.git
synced 2026-01-29 08:42:15 +03:00
188 lines
5.5 KiB
Python
188 lines
5.5 KiB
Python
import logging
|
|
import os
|
|
import socket
|
|
import sys
|
|
import threading
|
|
import time
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
from collections import namedtuple
|
|
|
|
from cachetools.func import lru_cache
|
|
from flask import g, request
|
|
from prometheus_client import REGISTRY, Counter, Gauge, Histogram, push_to_gateway
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# DB connections
|
|
db_pooled_connections_in_use = Gauge(
|
|
"quay_db_pooled_connections_in_use", "number of pooled db connections in use"
|
|
)
|
|
db_pooled_connections_available = Gauge(
|
|
"quay_db_pooled_connections_available", "number of pooled db connections available"
|
|
)
|
|
db_connect_calls = Counter(
|
|
"quay_db_connect_calls",
|
|
"number of connect() calls made to db",
|
|
)
|
|
db_close_calls = Counter(
|
|
"quay_db_close_calls",
|
|
"number of close() calls made to db",
|
|
)
|
|
|
|
request_duration = Histogram(
|
|
"quay_request_duration_seconds",
|
|
"seconds taken to process a request",
|
|
labelnames=["method", "route", "status"],
|
|
)
|
|
|
|
# GC: DB table rows
|
|
gc_table_rows_deleted = Counter(
|
|
"quay_gc_table_rows_deleted", "number of table rows deleted by GC", labelnames=["table"]
|
|
)
|
|
|
|
# GC: Storage blob
|
|
gc_storage_blobs_deleted = Counter(
|
|
"quay_gc_storage_blobs_deleted", "number of storage blobs deleted"
|
|
)
|
|
|
|
# GC iterations
|
|
gc_repos_purged = Counter(
|
|
"quay_gc_repos_purged",
|
|
"number of repositories purged by the RepositoryGCWorker or NamespaceGCWorker",
|
|
)
|
|
gc_namespaces_purged = Counter(
|
|
"quay_gc_namespaces_purged", "number of namespaces purged by the NamespaceGCWorker"
|
|
)
|
|
gc_iterations = Counter("quay_gc_iterations", "number of iterations by the GCWorker")
|
|
|
|
secscan_request_duration = Histogram(
|
|
"quay_secscan_request_duration_seconds",
|
|
"seconds taken to make an index request to the secscan service",
|
|
labelnames=["method", "action", "status"],
|
|
)
|
|
|
|
secscan_index_layer_size = Histogram(
|
|
"quay_secscan_index_layer_size_bytes",
|
|
"bytes submitted to index to the secscan service",
|
|
)
|
|
|
|
INF = float("inf")
|
|
SECSCAN_RESULT_BUCKETS = (60, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, INF)
|
|
|
|
secscan_result_duration = Histogram(
|
|
"quay_secscan_result_duration_seconds",
|
|
"how long it takes to receive scan results after pushing an image",
|
|
buckets=SECSCAN_RESULT_BUCKETS,
|
|
)
|
|
|
|
PROMETHEUS_PUSH_INTERVAL_SECONDS = 30
|
|
ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def process_grouping_key():
|
|
"""
|
|
Implements a grouping key based on the last argument used to run the current process.
|
|
|
|
https://github.com/prometheus/client_python#exporting-to-a-pushgateway
|
|
"""
|
|
return {
|
|
"host": socket.gethostname(),
|
|
"process_name": os.path.basename(sys.argv[-1]),
|
|
"pid": str(os.getpid()),
|
|
}
|
|
|
|
|
|
class PrometheusPlugin(object):
|
|
"""
|
|
Application plugin for reporting metrics to Prometheus.
|
|
"""
|
|
|
|
def __init__(self, app=None):
|
|
self.app = app
|
|
if app is not None:
|
|
self.state = self.init_app(app)
|
|
else:
|
|
self.state = None
|
|
|
|
def init_app(self, app):
|
|
pusher = ThreadPusher(app)
|
|
pusher.start()
|
|
|
|
# register extension with app
|
|
app.extensions = getattr(app, "extensions", {})
|
|
app.extensions["prometheus"] = pusher
|
|
return pusher
|
|
|
|
def __getattr__(self, name):
|
|
return getattr(self.state, name, None)
|
|
|
|
|
|
class ThreadPusher(threading.Thread):
|
|
def __init__(self, app):
|
|
super(ThreadPusher, self).__init__()
|
|
self.daemon = True
|
|
self._app = app
|
|
|
|
def run(self):
|
|
agg_url = self._app.config.get("PROMETHEUS_PUSHGATEWAY_URL")
|
|
while True:
|
|
# Practically disable this worker, if there is no pushgateway.
|
|
if agg_url is None or os.getenv("TEST", "false").lower() == "true":
|
|
time.sleep(ONE_DAY_IN_SECONDS)
|
|
continue
|
|
|
|
time.sleep(PROMETHEUS_PUSH_INTERVAL_SECONDS)
|
|
try:
|
|
push_to_gateway(
|
|
agg_url,
|
|
job=self._app.config.get("PROMETHEUS_NAMESPACE", "quay"),
|
|
registry=REGISTRY,
|
|
grouping_key=process_grouping_key(),
|
|
)
|
|
logger.debug(
|
|
"pushed registry to pushgateway at %s with grouping key %s",
|
|
agg_url,
|
|
process_grouping_key(),
|
|
)
|
|
except urllib.error.URLError:
|
|
# There are many scenarios when the gateway might not be running.
|
|
# These could be testing scenarios or simply processes racing to start.
|
|
# Rather than try to guess all of them, keep it simple and let it fail.
|
|
if os.getenv("DEBUGLOG", "false").lower() == "true":
|
|
logger.exception(
|
|
"failed to push registry to pushgateway at %s with grouping key %s",
|
|
agg_url,
|
|
process_grouping_key(),
|
|
)
|
|
else:
|
|
pass
|
|
|
|
|
|
def timed_blueprint(bp):
|
|
"""
|
|
Decorates a blueprint to have its request duration tracked by Prometheus.
|
|
"""
|
|
|
|
def _time_before_request():
|
|
g._request_start_time = time.time()
|
|
|
|
bp.before_request(_time_before_request)
|
|
|
|
def _time_after_request():
|
|
def f(r):
|
|
start = getattr(g, "_request_start_time", None)
|
|
if start is None:
|
|
return r
|
|
dur = time.time() - start
|
|
request_duration.labels(request.method, request.endpoint, r.status_code).observe(dur)
|
|
return r
|
|
|
|
return f
|
|
|
|
bp.after_request(_time_after_request())
|
|
return bp
|