1
0
mirror of https://github.com/minio/docs.git synced 2025-04-18 10:04:02 +03:00

DOCS-987: Fixing prometheus metrics to pull from Minio Server docs list (#1003)

---------

Co-authored-by: Andrea Longo <feorlen@users.noreply.github.com>
This commit is contained in:
Ravind Kumar 2023-09-15 12:57:30 -04:00 committed by GitHub
parent 92f45471cd
commit a077c6b9c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 63 additions and 555 deletions

1
.gitignore vendored
View File

@ -20,4 +20,5 @@ source/developers/java/*.md
source/developers/javascript/*.md
source/developers/python/*.md
source/operations/monitoring/*.md
source/includes/common-metrics-*.md
*.inv

View File

@ -70,8 +70,7 @@ linux:
@echo "Building for $@ Platform"
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-minio-version
@make sync-kes-version
@make sync-deps
ifeq ($(SYNC_SDK),TRUE)
@make sync-sdks
else
@ -86,8 +85,7 @@ windows:
@echo "Building for $@ Platform"
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-minio-version
@make sync-kes-version
@make sync-deps
@npm run build
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)/$(GITDIR)/$@" $(SPHINXOPTS) $(O) -t $@
@echo -e "Building $@ Complete\n--------------------------------------\n"
@ -97,8 +95,7 @@ macos:
@echo "Building for $@ Platform"
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-minio-version
@make sync-kes-version
@make sync-deps
@npm run build
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)/$(GITDIR)/$@" $(SPHINXOPTS) $(O) -t $@
@echo -e "Building $@ Complete\n--------------------------------------\n"
@ -109,8 +106,7 @@ k8s:
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-operator-version
@make sync-minio-version
@make sync-kes-version
@make sync-deps
@npm run build
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)/$(GITDIR)/$@" $(SPHINXOPTS) $(O) -t $@
@echo -e "Building $@ Complete\n--------------------------------------\n"
@ -121,8 +117,7 @@ openshift:
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-operator-version
@make sync-minio-version
@make sync-kes-version
@make sync-deps
@npm run build
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)/$(GITDIR)/$@" $(SPHINXOPTS) $(O) -t $@ -t k8s
@echo -e "Building $@ Complete\n--------------------------------------\n"
@ -133,8 +128,7 @@ eks:
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-operator-version
@make sync-minio-version
@make sync-kes-version
@make sync-deps
@npm run build
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)/$(GITDIR)/$@" $(SPHINXOPTS) $(O) -t $@ -t k8s
@echo -e "Building $@ Complete\n--------------------------------------\n"
@ -145,8 +139,7 @@ gke:
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-operator-version
@make sync-minio-version
@make sync-kes-version
@make sync-deps
@npm run build
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)/$(GITDIR)/$@" $(SPHINXOPTS) $(O) -t $@ -t k8s
@echo -e "Building $@ Complete\n--------------------------------------\n"
@ -157,8 +150,7 @@ aks:
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-operator-version
@make sync-minio-version
@make sync-kes-version
@make sync-deps
@npm run build
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)/$(GITDIR)/$@" $(SPHINXOPTS) $(O) -t $@ -t k8s
@echo -e "Building $@ Complete\n--------------------------------------\n"
@ -168,8 +160,7 @@ container:
@echo "Building for $@ Platform"
@echo "--------------------------------------"
@cp source/default-conf.py source/conf.py
@make sync-minio-version
@make sync-kes-version
@make sync-deps
@npm run build
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)/$(GITDIR)/$@" $(SPHINXOPTS) $(O) -t $@
@echo -e "Building $@ Complete\n--------------------------------------\n"
@ -211,6 +202,10 @@ sync-kes-version:
;; \
esac
sync-minio-server-docs:
@echo "Retrieving select docs from github.com/minio/minio/docs"
@(./sync-minio-server-docs.sh)
sync-minio-version:
@echo "Retrieving current MinIO version"
$(eval DEB = $(shell curl -s https://min.io/assets/downloads-minio.json | jq '.Linux."MinIO Server".amd64.DEB.download' | sed "s|linux-amd64|linux-amd64/archive|g"))
@ -247,6 +242,7 @@ sync-deps:
@echo "Synchronizing all external dependencies"
@make sync-minio-version
@make sync-kes-version
@make sync-minio-server-docs
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).

View File

@ -15,15 +15,37 @@ Metrics and Alerts
MinIO publishes cluster and node metrics using the :prometheus-docs:`Prometheus Data Model <data_model/>`.
You can use any scraping tool to pull metrics data from MinIO for further analysis and alerting.
MinIO provides a scraping endpoint for cluster-level metrics:
MinIO provides scraping endpoints for the following metric groups:
.. code-block:: shell
:class: copyable
.. tab-set::
http://minio.example.net:9000/minio/v2/metrics/cluster
.. tab-item:: Cluster Metrics
You can scrape :ref:`cluster-level metrics <minio-available-cluster-metrics>` using the following URL endpoint:
.. code-block:: shell
:class: copyable
http://HOSTNAME:PORT/minio/v2/metrics/cluster
Replace ``HOSTNAME:PORT`` with the :abbr:`FQDN (Fully Qualified Domain Name)` and port of the MinIO deployment.
For deployments with a load balancer managing connections between MinIO nodes, specify the address of the load balancer.
.. tab-item:: Bucket Metrics
.. versionchanged:: RELEASE.2023-08-31T15-31-16Z
You can scrape :ref:`bucket-level metrics <minio-available-bucket-metrics>` using the following URL endpoint:
.. code-block:: shell
:class: copyable
http://HOSTNAME:PORT/minio/v2/metrics/bucket
Replace ``HOSTNAME:PORT`` with the :abbr:`FQDN (Fully Qualified Domain Name)` and port of the MinIO deployment.
For deployments with a load balancer managing connections between MinIO nodes, specify the address of the load balancer.
Replace ``http://minio.example.net`` with the hostname of any node in the MinIO deployment.
For deployments with a load balancer managing connections between MinIO nodes, specify the address of the load balancer.
MinIO by default requires authentication for scraping the metrics endpoints.
Use the :mc-cmd:`mc admin prometheus generate` command to generate the necessary bearer tokens.
@ -86,541 +108,18 @@ Each metric includes a label for the MinIO server which generated that metric.
Bucket metrics have moved to use their own, separate endpoint.
- :ref:`Cluster Metrics <minio_available_cluster_metrics>`
- :ref:`Node Metrics <minio_available_node_metrics>`
- :ref:`Bucket Metrics <minio_available_bucket_metrics>`
- :ref:`Cluster Metrics <minio-available-cluster-metrics>`
- :ref:`Bucket Metrics <minio-available-bucket-metrics>`
.. _minio_available_cluster_metrics:
.. _minio-available-cluster-metrics:
Cluster Metrics
~~~~~~~~~~~~~~~
.. include:: /includes/common-metrics-cluster.md
:parser: myst_parser.sphinx_
Each metric includes the following labels:
.. _minio-available-bucket-metrics:
- Server that generated the metric.
- Server that calculated the metric.
These metrics can be obtained from any MinIO server once per collection.
Audit Metrics
+++++++++++++
``minio_audit_failed_messages``
Total number of messages that failed to send since start.
``minio_audit_target_queue_length``
Number of unsent messages in queue for target.
``minio_audit_total_messages``
Total number of messages sent since start.
Cluster Capacity Metrics
++++++++++++++++++++++++
``minio_cluster_capacity_raw_free_bytes``
Total free capacity online in the cluster.
``minio_cluster_capacity_raw_total_bytes``
Total capacity online in the cluster.
``minio_cluster_capacity_usable_free_bytes``
Total free usable capacity online in the cluster.
``minio_cluster_capacity_usable_total_bytes``
Total usable capacity online in the cluster.
Cluster Usage Metrics
+++++++++++++++++++++
``minio_cluster_objects_size_distribution``
Distribution of object sizes across a cluster.
``minio_cluster_objects_version_distribution``
Distribution of object sizes across a cluster.
``minio_cluster_usage_object_total``
Total number of objects in a cluster.
``minio_cluster_usage_total_bytes``
Total cluster usage in bytes.
``minio_cluster_usage_version_total``
Total number of versions (includes delete marker) in a cluster.
``minio_cluster_usage_deletemarker_total``
Total number of delete markers in a cluster.
``minio_cluster_usage_total_bytes``
Total cluster usage in bytes.
``minio_cluster_buckets_total``
Total number of buckets in the cluster.
Drive Metrics
+++++++++++++
``minio_cluster_drive_offline_total``
Total drives offline in this cluster.
``minio_cluster_drive_online_total``
Total drives online in this cluster.
``minio_cluster_drive_total``
Total drives in this cluster.
ILM Metrics
+++++++++++
``minio_cluster_ilm_transitioned_bytes``
Total bytes transitioned to a tier.
``minio_cluster_ilm_transitioned_objects``
Total number of objects transitioned to a tier.
``minio_cluster_ilm_transitioned_versions``
Total number of versions transitioned to a tier.
``minio_node_ilm_expiry_active_tasks``
Total number of active :ref:`object expiration <minio-lifecycle-management-expiration>` tasks.
Key Management Metrics
++++++++++++++++++++++
``minio_cluster_kms_online``
Reports whether the KMS is online (1) or offline (0).
``minio_cluster_kms_request_error``
Number of KMS requests that failed due to some error.
(HTTP 4xx status code).
``minio_cluster_kms_request_failure``
Number of KMS requests that failed due to some internal failure.
(HTTP 5xx status code).
``minio_cluster_kms_request_success``
Number of KMS requests that succeeded.
``minio_cluster_kms_uptime``
The time the KMS has been up and running in seconds.
Cluster Health Metrics
++++++++++++++++++++++
``minio_cluster_nodes_offline_total``
Total number of MinIO nodes offline.
``minio_cluster_nodes_online_total``
Total number of MinIO nodes online.
``minio_cluster_write_quorum``
Maximum write quorum across all pools and sets.
``minio_cluster_health_status``
Get current cluster health status.
``minio_heal_objects_errors_total``
Objects for which healing failed in current self healing run.
``minio_heal_objects_heal_total``
Objects healed in current self healing run.
``minio_heal_objects_total``
Objects scanned in current self healing run.
``minio_heal_time_last_activity_nano_seconds``
Time elapsed (in nano seconds) since last self healing activity.
``minio_minio_update_percent``
Total percentage cache usage.
``minio_software_commit_info``
Git commit hash for the MinIO release.
``minio_software_version_info``
MinIO Release tag for the server.
``minio_usage_last_activity_nano_seconds``
Time elapsed (in nano seconds) since last scan activity.
Inter Node Metrics
++++++++++++++++++
``minio_inter_node_traffic_dial_avg_time``
Average time of internodes TCP dial calls.
``minio_inter_node_traffic_dial_errors``
Total number of internode TCP dial timeouts and errors.
``minio_inter_node_traffic_errors_total``
Total number of failed internode calls.
``minio_inter_node_traffic_received_bytes``
Total number of bytes received from other peer nodes.
``minio_inter_node_traffic_sent_bytes``
Total number of bytes sent to the other peer nodes.
S3 Request Metrics
++++++++++++++++++
``minio_s3_requests_4xx_errors_total``
Total number S3 requests with (4xx) errors.
``minio_s3_requests_5xx_errors_total``
Total number S3 requests with (5xx) errors.
``minio_s3_requests_canceled_total``
Total number S3 requests canceled by the client.
``minio_s3_requests_errors_total``
Total number S3 requests with (4xx and 5xx) errors.
``minio_s3_requests_incoming_total``
Volatile number of total incoming S3 requests.
``minio_s3_requests_inflight_total``
Total number of S3 requests currently in flight.
``minio_s3_requests_rejected_auth_total``
Total number S3 requests rejected for auth failure.
``minio_s3_requests_rejected_header_total``
Total number S3 requests rejected for invalid header.
``minio_s3_requests_rejected_invalid_total``
Total number S3 invalid requests.
``minio_s3_requests_rejected_timestamp_total``
Total number S3 requests rejected for invalid timestamp.
``minio_s3_requests_total``
Total number S3 requests.
``minio_s3_requests_waiting_total``
Number of S3 requests in the waiting queue.
``minio_s3_requests_ttfb_seconds_distribution``
Distribution of the time to first byte across API calls.
``minio_s3_traffic_received_bytes``
Total number of s3 bytes received.
``minio_s3_traffic_sent_bytes``
Total number of s3 bytes sent.
Lock Metrics
++++++++++++
``minio_locks_total``
Total number of current locks on the peer.
``minio_locks_write_total``
Number of current WRITE locks on the peer.
``minio_locks_read_total``
Number of current READ locks on the peer.
Webhook Metrics
+++++++++++++++
``minio_cluster_webhook_failed_messages``
Number of messages that failed to send.
``minio_cluster_webhook_online``
Reports whether the webhook endpoint is online (1) or offline (0).
``minio_cluster_webhook_queue_length``
Number of messages in the webhook queue.
``minio_cluster_webhook_total_messages``
Number of messages sent to this webhook endpoint.
.. _minio_available_node_metrics:
Node Metrics
~~~~~~~~~~~~
Each metric includes the following labels:
- Server that generated the metric.
- Server that calculated the metric.
These metrics can be obtained from any MinIO server once per collection.
Drive Metrics
+++++++++++++
``minio_node_drive_free_bytes``
Total storage available on a drive in this node.
``minio_node_drive_free_inodes``
Total free inodes in this node.
``minio_node_drive_latency_us``
Average last minute latency in µs for drive API storage operations.
``minio_node_drive_offline_total``
Total drives offline in this node.
``minio_node_drive_online_total``
Total drives online in this node.
``minio_node_drive_total``
Total drives in this node.
``minio_node_drive_total_bytes``
Total storage on a drive in this node.
``minio_node_drive_used_bytes``
Total storage used on a drive in this node.
``minio_node_drive_errors_timeout``
Total number of timeout errors since server start in this node.
``minio_node_drive_errors_availability``
Total number of I/O errors, permission denied, and timeouts since server start in this node.
File Metrics
++++++++++++
``minio_node_file_descriptor_limit_total``
Limit on total number of open file descriptors for the MinIO Server process.
``minio_node_file_descriptor_open_total``
Total number of open file descriptors by the MinIO Server process.
Go Metrics
++++++++++
``minio_node_go_routine_total``
Total number of go routines running.
Access Management (IAM) Metrics
+++++++++++++++++++++++++++++++
``minio_node_iam_last_sync_duration_millis``
Last successful IAM data sync duration in milliseconds.
``minio_node_iam_since_last_sync_millis``
Time (in milliseconds) since last successful IAM data sync.
``minio_node_iam_sync_failures``
Number of failed IAM data syncs since server start.
``minio_node_iam_sync_successes``
Number of successful IAM data syncs since server start.
Lifecycle Management (ILM) Metrics
++++++++++++++++++++++++++++++++++
``minio_node_ilm_expiry_pending_tasks``
Number of pending ILM expiry tasks in the queue.
``minio_node_ilm_transition_active_tasks``
Number of active ILM transition tasks.
``minio_node_ilm_transition_pending_tasks``
Number of pending ILM transition tasks in the queue.
``minio_node_ilm_versions_scanned``
Total number of object versions checked for ilm actions since server start.
I/O Metrics
+++++++++++
``minio_node_io_rchar_bytes``
Total bytes read by the process from the underlying storage system including cache, ``/proc/[pid]/io`` rchar.
``minio_node_io_read_bytes``
Total bytes read by the process from the underlying storage system, ``/proc/[pid]/io`` read_bytes.
``minio_node_io_wchar_bytes``
Total bytes written by the process to the underlying storage system including page cache, ``/proc/[pid]/io`` wchar.
``minio_node_io_write_bytes``
Total bytes written by the process to the underlying storage system, ``/proc/[pid]/io`` write_bytes.
Process Metrics
+++++++++++++++
``minio_node_process_cpu_total_seconds``
Total user and system CPU time spent in seconds.
``minio_node_process_resident_memory_bytes``
Resident memory size in bytes.
``minio_node_process_starttime_seconds``
Start time for MinIO process per node, time in seconds since Unix epoc.
``minio_node_process_uptime_seconds``
Uptime for MinIO process per node in seconds.
Scanner Metrics
+++++++++++++++
``minio_node_scanner_bucket_scans_finished``
Total number of bucket scans finished since server start.
``minio_node_scanner_bucket_scans_started``
Total number of bucket scans started since server start.
``minio_node_scanner_directories_scanned``
Total number of directories scanned since server start.
``minio_node_scanner_objects_scanned``
Total number of unique objects scanned since server start.
``minio_node_scanner_versions_scanned``
Total number of object versions scanned since server start.
Read and Write Metrics
++++++++++++++++++++++
``minio_node_syscall_read_total``
Total read SysCalls to the kernel.
``/proc/[pid]/io`` syscr.
``minio_node_syscall_write_total``
Total write SysCalls to the kernel.
``/proc/[pid]/io`` syscw.
Notification Metrics
++++++++++++++++++++
``minio_notify_current_send_in_progress``
Number of concurrent async Send calls active to all targets.
``minio_notify_target_queue_length``
Number of unsent notifications in queue for target.
IAM Plugin Metrics
++++++++++++++++++
.. note::
The metrics in this section require that you have configured the :ref:`MinIO External Identity Management Plugin <minio-external-identity-management-plugin>`.
``minio_node_iam_plugin_authn_service_last_succ_seconds``
Time (in seconds) since last successful request to the external IDP service.
``minio_node_iam_plugin_authn_service_last_fail_seconds``
Time (in seconds) since last failed request to the external IDP service.
``minio_node_iam_plugin_authn_service_total_requests_minute``
Total requests count to the external IDP service in the last full minute.
``minio_node_iam_plugin_authn_service_failed_requests_minute``
Count of the failed requests to the external IDP service in the last full minute.
``minio_node_iam_plugin_authn_service_succ_avg_rtt_ms_minute``
Average round trip time (RTT) of successful requests to the IDP service in the last full minute.
``minio_node_iam_plugin_authn_service_succ_max_rtt_ms_minute``
Maximum round trip time (RTT) of successful requests to the IDP service in the last full minute.
.. _minio_available_bucket_metrics:
Bucket Metrics
~~~~~~~~~~~~~~
Each bucket metric includes the following labels:
- The server that calculated the metric.
- The server that generated the metric.
- The bucket the metric is for.
These metrics can be obtained from any MinIO server once per collection.
Distribution Metrics
++++++++++++++++++++
``minio_bucket_objects_size_distribution``
Distribution of object sizes in the bucket, includes label for the bucket name.
``minio_bucket_objects_version_distribution``
Distribution of object sizes in a bucket, by number of versions.
``minio_bucket_quota_total_bytes``
Total bucket quota size in bytes.
Replication Metrics
+++++++++++++++++++
.. note::
The metrics for bucket replication only populate for MinIO clusters with :ref:`minio-bucket-replication-serverside` enabled.
``minio_bucket_replication_failed_count``
Total number of objects which failed replication.
``minio_bucket_replication_latency_ms``
Replication latency in milliseconds.
``minio_bucket_replication_received_bytes``
Total number of bytes replicated to this bucket from another source bucket.
``minio_bucket_replication_sent_bytes``
Total number of bytes replicated to the target bucket.
``minio_bucket_replication_failed_bytes``
Total number of bytes that failed at least once to replicate for a given bucket.
You can identify the bucket using the ``{ bucket="STRING" }`` label.
``minio_bucket_replication_pending_bytes``
Total number of bytes pending to replicate for a given bucket.
You can identify the bucket using the ``{ bucket="STRING" }`` label.
``minio_bucket_replication_pending_count``
Total number of replication operations pending for a given bucket.
You can identify the bucket using the ``{ bucket="STRING" }`` label.
Traffic Metrics
+++++++++++++++
``minio_bucket_traffic_received_bytes``
Total number of S3 bytes received for this bucket.
``minio_bucket_traffic_sent_bytes``
Total number of S3 bytes sent for this bucket.
Usage Metrics
+++++++++++++
``minio_bucket_usage_object_total``
Total number of objects.
``minio_bucket_usage_version_total``
Total number of versions (includes delete marker).
``minio_bucket_usage_deletemarker_total``
Total number of delete markers.
``minio_bucket_usage_total_bytes``
Total bucket size in bytes.
Requests Metrics
++++++++++++++++
``minio_bucket_requests_4xx_errors_total``
Total number of S3 requests with (4xx) errors on a bucket.
``minio_bucket_requests_5xx_errors_total``
Total number of S3 requests with (5xx) errors on a bucket.
``minio_bucket_requests_inflight_total``
Total number of S3 requests currently in flight on a bucket.
``minio_bucket_requests_total``
Total number of S3 requests on a bucket.
``minio_bucket_requests_canceled_total``
Total number S3 requests canceled by the client.
``minio_bucket_requests_ttfb_seconds_distribution``
Distribution of time to first byte across API calls per bucket.
.. include:: /includes/common-metrics-bucket.md
:parser: myst_parser.sphinx_
.. toctree::
:titlesonly:

12
sync-minio-server-docs.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
set -e
set -x
function main() {
curl --retry 10 -Ls https://raw.githubusercontent.com/minio/minio/master/docs/metrics/prometheus/list.md | csplit - /"# Bucket Metrics"/
mv xx00 source/includes/common-metrics-cluster.md
mv xx01 source/includes/common-metrics-bucket.md
}
main "$@"