mirror of
https://github.com/prometheus-community/bind_exporter.git
synced 2025-04-20 00:02:14 +03:00
Make it possible to completely overwrite the existing stats groups and also add a verification of duplicated stats groups listing. This removes the option to repeat the -bind.stats-groups flags.
488 lines
15 KiB
Go
488 lines
15 KiB
Go
package main
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"math"
|
|
"net/http"
|
|
_ "net/http/pprof"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/digitalocean/bind_exporter/bind"
|
|
"github.com/digitalocean/bind_exporter/bind/auto"
|
|
"github.com/digitalocean/bind_exporter/bind/v2"
|
|
"github.com/digitalocean/bind_exporter/bind/v3"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/common/log"
|
|
"github.com/prometheus/common/version"
|
|
)
|
|
|
|
const (
|
|
namespace = "bind"
|
|
resolver = "resolver"
|
|
)
|
|
|
|
var (
|
|
up = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "up"),
|
|
"Was the Bind instance query successful?",
|
|
nil, nil,
|
|
)
|
|
bootTime = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "boot_time_seconds"),
|
|
"Start time of the BIND process since unix epoch in seconds.",
|
|
nil, nil,
|
|
)
|
|
configTime = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "config_time_seconds"),
|
|
"Time of the last reconfiguration since unix epoch in seconds.",
|
|
nil, nil,
|
|
)
|
|
incomingQueries = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "incoming_queries_total"),
|
|
"Number of incomming DNS queries.",
|
|
[]string{"type"}, nil,
|
|
)
|
|
incomingRequests = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "incoming_requests_total"),
|
|
"Number of incomming DNS requests.",
|
|
[]string{"opcode"}, nil,
|
|
)
|
|
resolverCache = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "cache_rrsets"),
|
|
"Number of RRSets in Cache database.",
|
|
[]string{"view", "type"}, nil,
|
|
)
|
|
resolverQueries = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "queries_total"),
|
|
"Number of outgoing DNS queries.",
|
|
[]string{"view", "type"}, nil,
|
|
)
|
|
resolverQueryDuration = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "query_duration_seconds"),
|
|
"Resolver query round-trip time in seconds.",
|
|
[]string{"view"}, nil,
|
|
)
|
|
resolverQueryErrors = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "query_errors_total"),
|
|
"Number of resolver queries failed.",
|
|
[]string{"view", "error"}, nil,
|
|
)
|
|
resolverResponseErrors = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "response_errors_total"),
|
|
"Number of resolver response errors received.",
|
|
[]string{"view", "error"}, nil,
|
|
)
|
|
resolverDNSSECSucess = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "dnssec_validation_success_total"),
|
|
"Number of DNSSEC validation attempts succeeded.",
|
|
[]string{"view", "result"}, nil,
|
|
)
|
|
resolverMetricStats = map[string]*prometheus.Desc{
|
|
"Lame": prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "response_lame_total"),
|
|
"Number of lame delegation responses received.",
|
|
[]string{"view"}, nil,
|
|
),
|
|
"EDNS0Fail": prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "query_edns0_errors_total"),
|
|
"Number of EDNS(0) query errors.",
|
|
[]string{"view"}, nil,
|
|
),
|
|
"Mismatch": prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "response_mismatch_total"),
|
|
"Number of mismatch responses received.",
|
|
[]string{"view"}, nil,
|
|
),
|
|
"Retry": prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "query_retries_total"),
|
|
"Number of resolver query retries.",
|
|
[]string{"view"}, nil,
|
|
),
|
|
"Truncated": prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "response_truncated_total"),
|
|
"Number of truncated responses received.",
|
|
[]string{"view"}, nil,
|
|
),
|
|
"ValFail": prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, resolver, "dnssec_validation_errors_total"),
|
|
"Number of DNSSEC validation attempt errors.",
|
|
[]string{"view"}, nil,
|
|
),
|
|
}
|
|
resolverLabelStats = map[string]*prometheus.Desc{
|
|
"QueryAbort": resolverQueryErrors,
|
|
"QuerySockFail": resolverQueryErrors,
|
|
"QueryTimeout": resolverQueryErrors,
|
|
"NXDOMAIN": resolverResponseErrors,
|
|
"SERVFAIL": resolverResponseErrors,
|
|
"FORMERR": resolverResponseErrors,
|
|
"OtherError": resolverResponseErrors,
|
|
"ValOk": resolverDNSSECSucess,
|
|
"ValNegOk": resolverDNSSECSucess,
|
|
}
|
|
serverQueryErrors = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "query_errors_total"),
|
|
"Number of query failures.",
|
|
[]string{"error"}, nil,
|
|
)
|
|
serverReponses = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "responses_total"),
|
|
"Number of responses sent.",
|
|
[]string{"result"}, nil,
|
|
)
|
|
serverLabelStats = map[string]*prometheus.Desc{
|
|
"QryDropped": serverQueryErrors,
|
|
"QryFailure": serverQueryErrors,
|
|
"QrySuccess": serverReponses,
|
|
"QryReferral": serverReponses,
|
|
"QryNxrrset": serverReponses,
|
|
"QrySERVFAIL": serverReponses,
|
|
"QryFORMERR": serverReponses,
|
|
"QryNXDOMAIN": serverReponses,
|
|
}
|
|
serverMetricStats = map[string]*prometheus.Desc{
|
|
"QryDuplicate": prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "query_duplicates_total"),
|
|
"Number of duplicated queries received.",
|
|
nil, nil,
|
|
),
|
|
"QryRecursion": prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "query_recursions_total"),
|
|
"Number of queries causing recursion.",
|
|
nil, nil,
|
|
),
|
|
}
|
|
tasksRunning = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "tasks_running"),
|
|
"Number of running tasks.",
|
|
nil, nil,
|
|
)
|
|
workerThreads = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "worker_threads"),
|
|
"Total number of available worker threads.",
|
|
nil, nil,
|
|
)
|
|
)
|
|
|
|
type collectorConstructor func(*bind.Statistics) prometheus.Collector
|
|
|
|
type serverCollector struct {
|
|
stats *bind.Statistics
|
|
}
|
|
|
|
// newServerCollector implements collectorConstructor.
|
|
func newServerCollector(s *bind.Statistics) prometheus.Collector {
|
|
return &serverCollector{stats: s}
|
|
}
|
|
|
|
// Describe implements prometheus.Collector.
|
|
func (c *serverCollector) Describe(ch chan<- *prometheus.Desc) {
|
|
ch <- bootTime
|
|
ch <- configTime
|
|
ch <- incomingQueries
|
|
ch <- incomingRequests
|
|
ch <- serverQueryErrors
|
|
ch <- serverReponses
|
|
for _, desc := range serverMetricStats {
|
|
ch <- desc
|
|
}
|
|
}
|
|
|
|
// Collect implements prometheus.Collector.
|
|
func (c *serverCollector) Collect(ch chan<- prometheus.Metric) {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
bootTime, prometheus.GaugeValue, float64(c.stats.Server.BootTime.Unix()),
|
|
)
|
|
if !c.stats.Server.ConfigTime.IsZero() {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
configTime, prometheus.GaugeValue, float64(c.stats.Server.ConfigTime.Unix()),
|
|
)
|
|
}
|
|
for _, s := range c.stats.Server.IncomingQueries {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
incomingQueries, prometheus.CounterValue, float64(s.Counter), s.Name,
|
|
)
|
|
}
|
|
for _, s := range c.stats.Server.IncomingRequests {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
incomingRequests, prometheus.CounterValue, float64(s.Counter), s.Name,
|
|
)
|
|
}
|
|
for _, s := range c.stats.Server.NameServerStats {
|
|
if desc, ok := serverLabelStats[s.Name]; ok {
|
|
r := strings.TrimPrefix(s.Name, "Qry")
|
|
ch <- prometheus.MustNewConstMetric(
|
|
desc, prometheus.CounterValue, float64(s.Counter), r,
|
|
)
|
|
}
|
|
if desc, ok := serverMetricStats[s.Name]; ok {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
desc, prometheus.CounterValue, float64(s.Counter),
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
type viewCollector struct {
|
|
stats *bind.Statistics
|
|
}
|
|
|
|
// newViewCollector implements collectorConstructor.
|
|
func newViewCollector(s *bind.Statistics) prometheus.Collector {
|
|
return &viewCollector{stats: s}
|
|
}
|
|
|
|
// Describe implements prometheus.Collector.
|
|
func (c *viewCollector) Describe(ch chan<- *prometheus.Desc) {
|
|
ch <- resolverDNSSECSucess
|
|
ch <- resolverQueries
|
|
ch <- resolverQueryDuration
|
|
ch <- resolverQueryErrors
|
|
ch <- resolverResponseErrors
|
|
for _, desc := range resolverMetricStats {
|
|
ch <- desc
|
|
}
|
|
}
|
|
|
|
// Collect implements prometheus.Collector.
|
|
func (c *viewCollector) Collect(ch chan<- prometheus.Metric) {
|
|
for _, v := range c.stats.Views {
|
|
for _, s := range v.Cache {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
resolverCache, prometheus.GaugeValue, float64(s.Gauge), v.Name, s.Name,
|
|
)
|
|
}
|
|
for _, s := range v.ResolverQueries {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
resolverQueries, prometheus.CounterValue, float64(s.Counter), v.Name, s.Name,
|
|
)
|
|
}
|
|
for _, s := range v.ResolverStats {
|
|
if desc, ok := resolverMetricStats[s.Name]; ok {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
desc, prometheus.CounterValue, float64(s.Counter), v.Name,
|
|
)
|
|
}
|
|
if desc, ok := resolverLabelStats[s.Name]; ok {
|
|
ch <- prometheus.MustNewConstMetric(
|
|
desc, prometheus.CounterValue, float64(s.Counter), v.Name, s.Name,
|
|
)
|
|
}
|
|
}
|
|
if buckets, count, err := histogram(v.ResolverStats); err == nil {
|
|
ch <- prometheus.MustNewConstHistogram(
|
|
resolverQueryDuration, count, math.NaN(), buckets, v.Name,
|
|
)
|
|
} else {
|
|
log.Warn("Error parsing RTT:", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
type taskCollector struct {
|
|
stats *bind.Statistics
|
|
}
|
|
|
|
// newTaskCollector implements collectorConstructor.
|
|
func newTaskCollector(s *bind.Statistics) prometheus.Collector {
|
|
return &taskCollector{stats: s}
|
|
}
|
|
|
|
// Describe implements prometheus.Collector.
|
|
func (c *taskCollector) Describe(ch chan<- *prometheus.Desc) {
|
|
ch <- tasksRunning
|
|
ch <- workerThreads
|
|
}
|
|
|
|
// Collect implements prometheus.Collector.
|
|
func (c *taskCollector) Collect(ch chan<- prometheus.Metric) {
|
|
threadModel := c.stats.TaskManager.ThreadModel
|
|
ch <- prometheus.MustNewConstMetric(
|
|
tasksRunning, prometheus.GaugeValue, float64(threadModel.TasksRunning),
|
|
)
|
|
ch <- prometheus.MustNewConstMetric(
|
|
workerThreads, prometheus.GaugeValue, float64(threadModel.WorkerThreads),
|
|
)
|
|
}
|
|
|
|
// Exporter collects Binds stats from the given server and exports them using
|
|
// the prometheus metrics package.
|
|
type Exporter struct {
|
|
client bind.Client
|
|
collectors []collectorConstructor
|
|
groups []bind.StatisticGroup
|
|
}
|
|
|
|
// NewExporter returns an initialized Exporter.
|
|
func NewExporter(version, url string, timeout time.Duration, g []bind.StatisticGroup) *Exporter {
|
|
var c bind.Client
|
|
switch version {
|
|
case "xml.v2":
|
|
c = v2.NewClient(url, &http.Client{Timeout: timeout})
|
|
case "xml.v3":
|
|
c = v3.NewClient(url, &http.Client{Timeout: timeout})
|
|
default:
|
|
c = auto.NewClient(url, &http.Client{Timeout: timeout})
|
|
}
|
|
|
|
var cs []collectorConstructor
|
|
for _, g := range g {
|
|
switch g {
|
|
case bind.ServerStats:
|
|
cs = append(cs, newServerCollector)
|
|
case bind.ViewStats:
|
|
cs = append(cs, newViewCollector)
|
|
case bind.TaskStats:
|
|
cs = append(cs, newTaskCollector)
|
|
}
|
|
}
|
|
|
|
return &Exporter{client: c, collectors: cs, groups: g}
|
|
}
|
|
|
|
// Describe describes all the metrics ever exported by the bind exporter. It
|
|
// implements prometheus.Collector.
|
|
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
|
ch <- up
|
|
for _, c := range e.collectors {
|
|
c(&bind.Statistics{}).Describe(ch)
|
|
}
|
|
}
|
|
|
|
// Collect fetches the stats from configured bind location and delivers them as
|
|
// Prometheus metrics. It implements prometheus.Collector.
|
|
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
|
status := 0.
|
|
if stats, err := e.client.Stats(e.groups...); err == nil {
|
|
for _, c := range e.collectors {
|
|
c(&stats).Collect(ch)
|
|
}
|
|
status = 1
|
|
} else {
|
|
log.Error("Couldn't retrieve BIND stats: ", err)
|
|
}
|
|
ch <- prometheus.MustNewConstMetric(up, prometheus.GaugeValue, status)
|
|
}
|
|
|
|
func histogram(stats []bind.Counter) (map[float64]uint64, uint64, error) {
|
|
buckets := map[float64]uint64{}
|
|
var count uint64
|
|
|
|
for _, s := range stats {
|
|
if strings.HasPrefix(s.Name, bind.QryRTT) {
|
|
b := math.Inf(0)
|
|
if !strings.HasSuffix(s.Name, "+") {
|
|
var err error
|
|
rrt := strings.TrimPrefix(s.Name, bind.QryRTT)
|
|
b, err = strconv.ParseFloat(rrt, 32)
|
|
if err != nil {
|
|
return buckets, 0, fmt.Errorf("could not parse RTT: %s", rrt)
|
|
}
|
|
}
|
|
|
|
buckets[b/1000] = count + uint64(s.Counter)
|
|
count += uint64(s.Counter)
|
|
}
|
|
}
|
|
return buckets, count, nil
|
|
}
|
|
|
|
type statisticGroups []bind.StatisticGroup
|
|
|
|
// String implements flag.Value.
|
|
func (s *statisticGroups) String() string {
|
|
groups := []string{}
|
|
for _, g := range *s {
|
|
groups = append(groups, string(g))
|
|
}
|
|
return fmt.Sprintf("%q", strings.Join(groups, ","))
|
|
}
|
|
|
|
// Set implements flag.Value.
|
|
func (s *statisticGroups) Set(value string) error {
|
|
*s = []bind.StatisticGroup{}
|
|
if len(value) == 0 {
|
|
return nil
|
|
}
|
|
var sg bind.StatisticGroup
|
|
for _, dt := range strings.Split(value, ",") {
|
|
switch dt {
|
|
case string(bind.ServerStats):
|
|
sg = bind.ServerStats
|
|
case string(bind.ViewStats):
|
|
sg = bind.ViewStats
|
|
case string(bind.TaskStats):
|
|
sg = bind.TaskStats
|
|
default:
|
|
return fmt.Errorf("unknown stats group %q", dt)
|
|
}
|
|
for _, existing := range *s {
|
|
if existing == sg {
|
|
return fmt.Errorf("duplicated stats group %q", sg)
|
|
}
|
|
}
|
|
*s = append(*s, sg)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func main() {
|
|
var (
|
|
bindURI = flag.String("bind.stats-url", "http://localhost:8053/", "HTTP XML API address of an Bind server.")
|
|
bindTimeout = flag.Duration("bind.timeout", 10*time.Second, "Timeout for trying to get stats from Bind.")
|
|
bindPidFile = flag.String("bind.pid-file", "", "Path to Bind's pid file to export process information.")
|
|
bindVersion = flag.String("bind.stats-version", "auto", "BIND statistics version. Can be detected automatically. Available: [xml.v2, xml.v3, auto]")
|
|
showVersion = flag.Bool("version", false, "Print version information.")
|
|
listenAddress = flag.String("web.listen-address", ":9119", "Address to listen on for web interface and telemetry.")
|
|
metricsPath = flag.String("web.telemetry-path", "/metrics", "Path under which to expose metrics.")
|
|
|
|
groups = statisticGroups{bind.ServerStats, bind.ViewStats}
|
|
)
|
|
flag.Var(&groups, "bind.stats-groups", "Comma-separated list of statistics to collect. Available: [server, view, tasks]")
|
|
flag.Parse()
|
|
|
|
if *showVersion {
|
|
fmt.Fprintln(os.Stdout, version.Print("bind_exporter"))
|
|
os.Exit(0)
|
|
}
|
|
log.Infoln("Starting bind_exporter", version.Info())
|
|
log.Infoln("Build context", version.BuildContext())
|
|
log.Infoln("Configured to collect statistics", groups.String())
|
|
|
|
prometheus.MustRegister(NewExporter(*bindVersion, *bindURI, *bindTimeout, groups))
|
|
if *bindPidFile != "" {
|
|
procExporter := prometheus.NewProcessCollectorPIDFn(
|
|
func() (int, error) {
|
|
content, err := ioutil.ReadFile(*bindPidFile)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("Can't read pid file: %s", err)
|
|
}
|
|
value, err := strconv.Atoi(strings.TrimSpace(string(content)))
|
|
if err != nil {
|
|
return 0, fmt.Errorf("Can't parse pid file: %s", err)
|
|
}
|
|
return value, nil
|
|
}, namespace)
|
|
prometheus.MustRegister(procExporter)
|
|
}
|
|
|
|
log.Info("Starting Server: ", *listenAddress)
|
|
http.Handle(*metricsPath, prometheus.Handler())
|
|
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Write([]byte(`<html>
|
|
<head><title>Bind Exporter</title></head>
|
|
<body>
|
|
<h1>Bind Exporter</h1>
|
|
<p><a href='` + *metricsPath + `'>Metrics</a></p>
|
|
</body>
|
|
</html>`))
|
|
})
|
|
log.Fatal(http.ListenAndServe(*listenAddress, nil))
|
|
}
|