From 3864bbc9cdbf9ac8df30b8369525a0c7b5c4561e Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Thu, 24 Dec 2020 16:26:58 +0100 Subject: [PATCH 1/9] Update queries.yml (#433) Update query for pg_stat_user_tables: * Split up to multi-line format to make it easier to read. * Remove duplicate of column `COALESCE(last_vacuum, '1970-01-01Z')`. Signed-off-by: Ben Kochie --- queries.yaml | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/queries.yaml b/queries.yaml index 18abd65b..d0fdc520 100644 --- a/queries.yaml +++ b/queries.yaml @@ -15,7 +15,32 @@ pg_postmaster: description: "Time at which postmaster started" pg_stat_user_tables: - query: "SELECT current_database() datname, schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z'), COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables" + query: | + SELECT + current_database() datname, + schemaname, + relname, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + n_tup_ins, + n_tup_upd, + n_tup_del, + n_tup_hot_upd, + n_live_tup, + n_dead_tup, + n_mod_since_analyze, + COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, + COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, + COALESCE(last_analyze, '1970-01-01Z') as last_analyze, + COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, + vacuum_count, + autovacuum_count, + analyze_count, + autoanalyze_count + FROM + pg_stat_user_tables metrics: - datname: usage: "LABEL" From 3fd1c2c0ebc3b712f788481bd67e3d688112681c Mon Sep 17 00:00:00 2001 From: Corin Lawson Date: Fri, 25 Dec 2020 02:34:26 +1100 Subject: [PATCH 2/9] Introduce histogram support (#435) * Introduce histogram support Prior to this change, the custom queries were restricted to counters and gauges. This change introduces a new ColumnUsage, namely HISTOGRAM, that expects the column to contain an array of upper inclusive bounds for each observation bucket in the emitted metric. It also expects three more columns to be present with the suffixes: - `_bucket`, containing an array of cumulative counters for the observation buckets; - `_sum`, the total sum of all observed values; and - `_count`, the count of events that have been observed. A flag has been added to the MetricMap struct to easily identify metrics that should emit a histogram and the construction of a histogram metric is aided by the pg.Array function and a new helper dbToUint64 function. Finally, and example of usage is given in queries.yaml. fixes #402 Signed-off-by: Corin Lawson * Introduces tests for histogram support Prior to this change, the histogram support was untested. This change introduces a new integration test that reads a user query containing a number of histogram metrics. Also, additional checks have been added to TestBooleanConversionToValueAndString to test dbToUint64. Signed-off-by: Corin Lawson --- cmd/postgres_exporter/postgres_exporter.go | 133 +++++++++++++++++- .../postgres_exporter_integration_test.go | 23 +++ .../postgres_exporter_test.go | 73 +++++++++- .../tests/user_queries_test.yaml | 51 +++++++ queries.yaml | 44 ++++++ 5 files changed, 317 insertions(+), 7 deletions(-) create mode 100644 cmd/postgres_exporter/tests/user_queries_test.yaml diff --git a/cmd/postgres_exporter/postgres_exporter.go b/cmd/postgres_exporter/postgres_exporter.go index 0f941616..9151ba3d 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/cmd/postgres_exporter/postgres_exporter.go @@ -80,6 +80,7 @@ const ( GAUGE ColumnUsage = iota // Use this column as a gauge MAPPEDMETRIC ColumnUsage = iota // Use this column with the supplied mapping of text values DURATION ColumnUsage = iota // This column should be interpreted as a text duration (and converted to milliseconds) + HISTOGRAM ColumnUsage = iota // Use this column as a histogram ) // UnmarshalYAML implements the yaml.Unmarshaller interface. @@ -169,6 +170,7 @@ type MetricMapNamespace struct { // be mapped to by the collector type MetricMap struct { discard bool // Should metric be discarded during mapping? + histogram bool // Should metric be treated as a histogram? vtype prometheus.ValueType // Prometheus valuetype desc *prometheus.Desc // Prometheus descriptor conversion func(interface{}) (float64, bool) // Conversion function to turn PG result into float64 @@ -650,6 +652,27 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri return dbToFloat64(in) }, } + case HISTOGRAM: + thisMap[columnName] = MetricMap{ + histogram: true, + vtype: prometheus.UntypedValue, + desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels), + conversion: func(in interface{}) (float64, bool) { + return dbToFloat64(in) + }, + } + thisMap[columnName+"_bucket"] = MetricMap{ + histogram: true, + discard: true, + } + thisMap[columnName+"_sum"] = MetricMap{ + histogram: true, + discard: true, + } + thisMap[columnName+"_count"] = MetricMap{ + histogram: true, + discard: true, + } case MAPPEDMETRIC: thisMap[columnName] = MetricMap{ vtype: prometheus.GaugeValue, @@ -721,6 +744,9 @@ func stringToColumnUsage(s string) (ColumnUsage, error) { case "GAUGE": u = GAUGE + case "HISTOGRAM": + u = HISTOGRAM + case "MAPPEDMETRIC": u = MAPPEDMETRIC @@ -772,6 +798,46 @@ func dbToFloat64(t interface{}) (float64, bool) { } } +// Convert database.sql types to uint64 for Prometheus consumption. Null types are mapped to 0. string and []byte +// types are mapped as 0 and !ok +func dbToUint64(t interface{}) (uint64, bool) { + switch v := t.(type) { + case uint64: + return v, true + case int64: + return uint64(v), true + case float64: + return uint64(v), true + case time.Time: + return uint64(v.Unix()), true + case []byte: + // Try and convert to string and then parse to a uint64 + strV := string(v) + result, err := strconv.ParseUint(strV, 10, 64) + if err != nil { + log.Infoln("Could not parse []byte:", err) + return 0, false + } + return result, true + case string: + result, err := strconv.ParseUint(v, 10, 64) + if err != nil { + log.Infoln("Could not parse string:", err) + return 0, false + } + return result, true + case bool: + if v { + return 1, true + } + return 0, true + case nil: + return 0, true + default: + return 0, false + } +} + // Convert database.sql to string for Prometheus labels. Null types are mapped to empty strings. func dbToString(t interface{}) (string, bool) { switch v := t.(type) { @@ -1304,13 +1370,68 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa continue } - value, ok := dbToFloat64(columnData[idx]) - if !ok { - nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName, columnData[idx]))) - continue + if metricMapping.histogram { + var keys []float64 + err = pq.Array(&keys).Scan(columnData[idx]) + if err != nil { + return []prometheus.Metric{}, []error{}, errors.New(fmt.Sprintln("Error retrieving", columnName, "buckets:", namespace, err)) + } + + var values []int64 + valuesIdx, ok := columnIdx[columnName+"_bucket"] + if !ok { + nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_bucket"))) + continue + } + err = pq.Array(&values).Scan(columnData[valuesIdx]) + if err != nil { + return []prometheus.Metric{}, []error{}, errors.New(fmt.Sprintln("Error retrieving", columnName, "bucket values:", namespace, err)) + } + + buckets := make(map[float64]uint64, len(keys)) + for i, key := range keys { + if i >= len(values) { + break + } + buckets[key] = uint64(values[i]) + } + + idx, ok = columnIdx[columnName+"_sum"] + if !ok { + nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_sum"))) + continue + } + sum, ok := dbToFloat64(columnData[idx]) + if !ok { + nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_sum", columnData[idx]))) + continue + } + + idx, ok = columnIdx[columnName+"_count"] + if !ok { + nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_count"))) + continue + } + count, ok := dbToUint64(columnData[idx]) + if !ok { + nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_count", columnData[idx]))) + continue + } + + metric = prometheus.MustNewConstHistogram( + metricMapping.desc, + count, sum, buckets, + labels..., + ) + } else { + value, ok := dbToFloat64(columnData[idx]) + if !ok { + nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName, columnData[idx]))) + continue + } + // Generate the metric + metric = prometheus.MustNewConstMetric(metricMapping.desc, metricMapping.vtype, value, labels...) } - // Generate the metric - metric = prometheus.MustNewConstMetric(metricMapping.desc, metricMapping.vtype, value, labels...) } else { // Unknown metric. Report as untyped if scan to float64 works, else note an error too. metricLabel := fmt.Sprintf("%s_%s", namespace, columnName) diff --git a/cmd/postgres_exporter/postgres_exporter_integration_test.go b/cmd/postgres_exporter/postgres_exporter_integration_test.go index 0363af96..d575692a 100644 --- a/cmd/postgres_exporter/postgres_exporter_integration_test.go +++ b/cmd/postgres_exporter/postgres_exporter_integration_test.go @@ -126,3 +126,26 @@ func (s *IntegrationSuite) TestUnknownMetricParsingDoesntCrash(c *C) { // scrape the exporter and make sure it works exporter.scrape(ch) } + +// TestExtendQueriesDoesntCrash tests that specifying extend.query-path doesn't +// crash. +func (s *IntegrationSuite) TestExtendQueriesDoesntCrash(c *C) { + // Setup a dummy channel to consume metrics + ch := make(chan prometheus.Metric, 100) + go func() { + for range ch { + } + }() + + dsn := os.Getenv("DATA_SOURCE_NAME") + c.Assert(dsn, Not(Equals), "") + + exporter := NewExporter( + strings.Split(dsn, ","), + WithUserQueriesPath("../user_queries_test.yaml"), + ) + c.Assert(exporter, NotNil) + + // scrape the exporter and make sure it works + exporter.scrape(ch) +} diff --git a/cmd/postgres_exporter/postgres_exporter_test.go b/cmd/postgres_exporter/postgres_exporter_test.go index 8222bf9d..0a471750 100644 --- a/cmd/postgres_exporter/postgres_exporter_test.go +++ b/cmd/postgres_exporter/postgres_exporter_test.go @@ -4,9 +4,11 @@ package main import ( "io/ioutil" + "math" "os" "reflect" "testing" + "time" "github.com/blang/semver" "github.com/prometheus/client_golang/prometheus" @@ -287,6 +289,22 @@ func UnsetEnvironment(c *C, d string) { c.Assert(err, IsNil) } +type isNaNChecker struct { + *CheckerInfo +} + +var IsNaN Checker = &isNaNChecker{ + &CheckerInfo{Name: "IsNaN", Params: []string{"value"}}, +} + +func (checker *isNaNChecker) Check(params []interface{}, names []string) (result bool, error string) { + param, ok := (params[0]).(float64) + if !ok { + return false, "obtained value type is not a float" + } + return math.IsNaN(param), "" +} + // test boolean metric type gets converted to float func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { @@ -294,6 +312,7 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { input interface{} expectedString string expectedValue float64 + expectedCount uint64 expectedOK bool } @@ -302,19 +321,71 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { input: true, expectedString: "true", expectedValue: 1.0, + expectedCount: 1, expectedOK: true, }, { input: false, expectedString: "false", expectedValue: 0.0, + expectedCount: 0, + expectedOK: true, + }, + { + input: nil, + expectedString: "", + expectedValue: math.NaN(), + expectedCount: 0, + expectedOK: true, + }, + { + input: TestCase{}, + expectedString: "", + expectedValue: math.NaN(), + expectedCount: 0, + expectedOK: false, + }, + { + input: 123.0, + expectedString: "123", + expectedValue: 123.0, + expectedCount: 123, + expectedOK: true, + }, + { + input: "123", + expectedString: "123", + expectedValue: 123.0, + expectedCount: 123, + expectedOK: true, + }, + { + input: []byte("123"), + expectedString: "123", + expectedValue: 123.0, + expectedCount: 123, + expectedOK: true, + }, + { + input: time.Unix(1600000000, 0), + expectedString: "1600000000", + expectedValue: 1600000000.0, + expectedCount: 1600000000, expectedOK: true, }, } for _, cs := range cases { value, ok := dbToFloat64(cs.input) - c.Assert(value, Equals, cs.expectedValue) + if math.IsNaN(cs.expectedValue) { + c.Assert(value, IsNaN) + } else { + c.Assert(value, Equals, cs.expectedValue) + } + c.Assert(ok, Equals, cs.expectedOK) + + count, ok := dbToUint64(cs.input) + c.Assert(count, Equals, cs.expectedCount) c.Assert(ok, Equals, cs.expectedOK) str, ok := dbToString(cs.input) diff --git a/cmd/postgres_exporter/tests/user_queries_test.yaml b/cmd/postgres_exporter/tests/user_queries_test.yaml new file mode 100644 index 00000000..c9a39655 --- /dev/null +++ b/cmd/postgres_exporter/tests/user_queries_test.yaml @@ -0,0 +1,51 @@ +random: + query: | + WITH data AS (SELECT floor(random()*10) AS d FROM generate_series(1,100)), + metrics AS (SELECT SUM(d) AS sum, COUNT(*) AS count FROM data), + buckets AS (SELECT le, SUM(CASE WHEN d <= le THEN 1 ELSE 0 END) AS d + FROM data, UNNEST(ARRAY[1, 2, 4, 8]) AS le GROUP BY le) + SELECT + sum AS histogram_sum, + count AS histogram_count, + ARRAY_AGG(le) AS histogram, + ARRAY_AGG(d) AS histogram_bucket, + ARRAY_AGG(le) AS missing, + ARRAY_AGG(le) AS missing_sum, + ARRAY_AGG(d) AS missing_sum_bucket, + ARRAY_AGG(le) AS missing_count, + ARRAY_AGG(d) AS missing_count_bucket, + sum AS missing_count_sum, + ARRAY_AGG(le) AS unexpected_sum, + ARRAY_AGG(d) AS unexpected_sum_bucket, + 'data' AS unexpected_sum_sum, + ARRAY_AGG(le) AS unexpected_count, + ARRAY_AGG(d) AS unexpected_count_bucket, + sum AS unexpected_count_sum, + 'nan'::varchar AS unexpected_count_count, + ARRAY_AGG(le) AS unexpected_bytes, + ARRAY_AGG(d) AS unexpected_bytes_bucket, + sum AS unexpected_bytes_sum, + 'nan'::bytea AS unexpected_bytes_count + FROM metrics, buckets GROUP BY 1,2 + metrics: + - histogram: + usage: "HISTOGRAM" + description: "Random data" + - missing: + usage: "HISTOGRAM" + description: "nonfatal error" + - missing_sum: + usage: "HISTOGRAM" + description: "nonfatal error" + - missing_count: + usage: "HISTOGRAM" + description: "nonfatal error" + - unexpected_sum: + usage: "HISTOGRAM" + description: "nonfatal error" + - unexpected_count: + usage: "HISTOGRAM" + description: "nonfatal error" + - unexpected_bytes: + usage: "HISTOGRAM" + description: "nonfatal error" diff --git a/queries.yaml b/queries.yaml index d0fdc520..24abb9a9 100644 --- a/queries.yaml +++ b/queries.yaml @@ -228,3 +228,47 @@ pg_stat_statements: - blk_write_time_seconds: usage: "COUNTER" description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" + +pg_stat_activity: + query: | + WITH + metrics AS ( + SELECT + application_name, + SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum, + COUNT(*) AS process_idle_seconds_count + FROM pg_stat_activity + WHERE state = 'idle' + GROUP BY application_name + ), + buckets AS ( + SELECT + application_name, + le, + SUM( + CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le + THEN 1 + ELSE 0 + END + )::bigint AS bucket + FROM + pg_stat_activity, + UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le + GROUP BY application_name, le + ORDER BY application_name, le + ) + SELECT + application_name, + process_idle_seconds_sum, + process_idle_seconds_count, + ARRAY_AGG(le) AS process_idle_seconds, + ARRAY_AGG(bucket) AS process_idle_seconds_bucket + FROM metrics JOIN buckets USING (application_name) + GROUP BY 1, 2, 3 + metrics: + - application_name: + usage: "LABEL" + description: "Application Name" + - process_idle_seconds: + usage: "HISTOGRAM" + description: "Idle time of server processes" From c55a3b3c5b95dad4513e96a1218d9ec2e3916ef8 Mon Sep 17 00:00:00 2001 From: Ajay Bhat Date: Thu, 24 Dec 2020 21:06:18 +0530 Subject: [PATCH 3/9] Fixes (#364) (#387) Have a custom prefix for each of the default metrics created by postgres_exporter Co-authored-by: Will Rouesnel --- README.md | 3 +++ cmd/postgres_exporter/postgres_exporter.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index ff5174b3..72879ac4 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,9 @@ The following environment variables configure the exporter: * `PG_EXPORTER_EXCLUDE_DATABASES` A comma-separated list of databases to remove when autoDiscoverDatabases is enabled. Default is empty string. +* `PG_EXPORTER_METRIC_PREFIX` + A prefix to use for each of the default metrics exported by postgres-exporter. Default is `pg` + Settings set by environment variables starting with `PG_` will be overwritten by the corresponding CLI flag if given. ### Setting the Postgres server's data source name diff --git a/cmd/postgres_exporter/postgres_exporter.go b/cmd/postgres_exporter/postgres_exporter.go index 9151ba3d..1fe0d833 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/cmd/postgres_exporter/postgres_exporter.go @@ -53,6 +53,7 @@ var ( onlyDumpMaps = kingpin.Flag("dumpmaps", "Do not run, simply dump the maps.").Bool() constantLabelsList = kingpin.Flag("constantLabels", "A list of label=value separated by comma(,).").Default("").Envar("PG_EXPORTER_CONSTANT_LABELS").String() excludeDatabases = kingpin.Flag("exclude-databases", "A list of databases to remove when autoDiscoverDatabases is enabled").Default("").Envar("PG_EXPORTER_EXCLUDE_DATABASES").String() + metricPrefix = kingpin.Flag("metric-prefix", "A metric prefix can be used to have non-default (not \"pg\") prefixes for each of the metrics").Default("pg").Envar("PG_EXPORTER_METRIC_PREFIX").String() ) // Metric name parts. @@ -626,6 +627,8 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri } } + namespace := strings.Replace(namespace, "pg", *metricPrefix, 1) + // Determine how to convert the column based on its usage. // nolint: dupl switch columnMapping.usage { From 8c27e97b776fd495420f002a967dda8b3dbfaa3a Mon Sep 17 00:00:00 2001 From: Yann Soubeyrand Date: Thu, 24 Dec 2020 16:37:31 +0100 Subject: [PATCH 4/9] Do not try to return metric descriptors in Describe (#416) Since we cannot know in advance the metrics which the exporter will generate, the workaround is to run a Collect and return the metric descriptors. This is problematic when the connection to the PostgreSQL instance cannot be established straight from the start. This patch makes Describe return no descriptors, effectively turning the collector in an unchecked one, which we're in the typical use case here: https://pkg.go.dev/github.com/prometheus/client_golang/prometheus?tab=doc#hdr-Custom_Collectors_and_constant_Metrics. Signed-off-by: Yann Soubeyrand --- cmd/postgres_exporter/postgres_exporter.go | 23 ---------------------- 1 file changed, 23 deletions(-) diff --git a/cmd/postgres_exporter/postgres_exporter.go b/cmd/postgres_exporter/postgres_exporter.go index 1fe0d833..3a20c7b3 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/cmd/postgres_exporter/postgres_exporter.go @@ -1236,29 +1236,6 @@ func (e *Exporter) setupInternalMetrics() { // Describe implements prometheus.Collector. func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { - // We cannot know in advance what metrics the exporter will generate - // from Postgres. So we use the poor man's describe method: Run a collect - // and send the descriptors of all the collected metrics. The problem - // here is that we need to connect to the Postgres DB. If it is currently - // unavailable, the descriptors will be incomplete. Since this is a - // stand-alone exporter and not used as a library within other code - // implementing additional metrics, the worst that can happen is that we - // don't detect inconsistent metrics created by this exporter - // itself. Also, a change in the monitored Postgres instance may change the - // exported metrics during the runtime of the exporter. - metricCh := make(chan prometheus.Metric) - doneCh := make(chan struct{}) - - go func() { - for m := range metricCh { - ch <- m.Desc() - } - close(doneCh) - }() - - e.Collect(metricCh) - close(metricCh) - <-doneCh } // Collect implements prometheus.Collector. From bfd0707e375aeac85b102dcaa640df7717df581f Mon Sep 17 00:00:00 2001 From: alexey-gavrilov-flant <53515419+alexey-gavrilov-flant@users.noreply.github.com> Date: Thu, 24 Dec 2020 18:39:07 +0300 Subject: [PATCH 5/9] Fixed "Scrape Duration" if psql down (#426) Co-authored-by: Will Rouesnel --- cmd/postgres_exporter/postgres_exporter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/postgres_exporter/postgres_exporter.go b/cmd/postgres_exporter/postgres_exporter.go index 3a20c7b3..5e4387dc 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/cmd/postgres_exporter/postgres_exporter.go @@ -1046,7 +1046,7 @@ func (s *Servers) GetServer(dsn string) (*Server, error) { var err error var ok bool errCount := 0 // start at zero because we increment before doing work - retries := 3 + retries := 1 var server *Server for { if errCount++; errCount > retries { From f188bdef53ee8c71ec4070172d7b186a03fcd04a Mon Sep 17 00:00:00 2001 From: Jakov Sosic Date: Thu, 24 Dec 2020 16:39:43 +0100 Subject: [PATCH 6/9] Improve PostgreSQL replication lag detection (#395) In some cases master can show pg_last_xact_replay_timestamp() from past, which can cause the exporter to show ever-growing value for the lag. By checking if the instance is in recovery we can avoid reporting some huge number for master instance. --- queries.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/queries.yaml b/queries.yaml index 24abb9a9..800e62bd 100644 --- a/queries.yaml +++ b/queries.yaml @@ -1,5 +1,5 @@ pg_replication: - query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) as lag" + query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag" master: true metrics: - lag: From aea6fae7d64e874e3b8416c6d4200382b9337a6c Mon Sep 17 00:00:00 2001 From: Yann Soubeyrand Date: Thu, 24 Dec 2020 16:41:05 +0100 Subject: [PATCH 7/9] Recover when connection cannot be established straight at startup (#415) When the connection to the PostgreSQL instance cannot be established straight at startup, a race condition can happen when autoDiscoverDatabases is true. If discoverDatabaseDSNs fails, no dsn is set as the master database, and, if scrapeDSN succeeds, checkMapVersions will have omitted the default metrics in the server metric map. The metric map won't be updated unless the version returned by the PostgreSQL instance changes. With this patch, scrapeDSN won't be run unless discoverDatabaseDSNs succeeded and thus the race condition is eliminated. Signed-off-by: Yann Soubeyrand --- cmd/postgres_exporter/postgres_exporter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/postgres_exporter/postgres_exporter.go b/cmd/postgres_exporter/postgres_exporter.go index 5e4387dc..a73dcc84 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/cmd/postgres_exporter/postgres_exporter.go @@ -1624,12 +1624,12 @@ func (e *Exporter) discoverDatabaseDSNs() []string { continue } - dsns[dsn] = struct{}{} server, err := e.servers.GetServer(dsn) if err != nil { log.Errorf("Error opening connection to database (%s): %v", loggableDSN(dsn), err) continue } + dsns[dsn] = struct{}{} // If autoDiscoverDatabases is true, set first dsn as master database (Default: false) server.master = true From 1ba1100a722d36f1f5db21fb8fe2de0c57231b4b Mon Sep 17 00:00:00 2001 From: Ivan Shapovalov Date: Sun, 31 Jan 2021 14:21:38 +0300 Subject: [PATCH 8/9] Support connstring syntax when discovering databases (#473) * Support connstring syntax when discovering databases Support connstring DSNs (`host=... user=... password=... dbname=...`) in addition to URIs (`postgresql://user:pass@host/dbname`) for purposes of database discovery. Connstring syntax is needed to support accessing PostgreSQL via Unix domain sockets (`host=/run/postgres`), which is not really possible with URI syntax. * Appease gometalinter, don't shadow namespace --- cmd/postgres_exporter/postgres_exporter.go | 38 ++++++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/cmd/postgres_exporter/postgres_exporter.go b/cmd/postgres_exporter/postgres_exporter.go index a73dcc84..958fdc71 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/cmd/postgres_exporter/postgres_exporter.go @@ -601,6 +601,8 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri for namespace, intermediateMappings := range metricMaps { thisMap := make(map[string]MetricMap) + namespace = strings.Replace(namespace, "pg", *metricPrefix, 1) + // Get the constant labels var variableLabels []string for columnName, columnMapping := range intermediateMappings.columnMappings { @@ -627,8 +629,6 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri } } - namespace := strings.Replace(namespace, "pg", *metricPrefix, 1) - // Determine how to convert the column based on its usage. // nolint: dupl switch columnMapping.usage { @@ -1616,11 +1616,27 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) { } func (e *Exporter) discoverDatabaseDSNs() []string { + // connstring syntax is complex (and not sure if even regular). + // we don't need to parse it, so just superficially validate that it starts + // with a valid-ish keyword pair + connstringRe := regexp.MustCompile(`^ *[a-zA-Z0-9]+ *= *[^= ]+`) + dsns := make(map[string]struct{}) for _, dsn := range e.dsn { - parsedDSN, err := url.Parse(dsn) - if err != nil { - log.Errorf("Unable to parse DSN (%s): %v", loggableDSN(dsn), err) + var dsnURI *url.URL + var dsnConnstring string + + if strings.HasPrefix(dsn, "postgresql://") { + var err error + dsnURI, err = url.Parse(dsn) + if err != nil { + log.Errorf("Unable to parse DSN as URI (%s): %v", loggableDSN(dsn), err) + continue + } + } else if connstringRe.MatchString(dsn) { + dsnConnstring = dsn + } else { + log.Errorf("Unable to parse DSN as either URI or connstring (%s)", loggableDSN(dsn)) continue } @@ -1643,8 +1659,16 @@ func (e *Exporter) discoverDatabaseDSNs() []string { if contains(e.excludeDatabases, databaseName) { continue } - parsedDSN.Path = databaseName - dsns[parsedDSN.String()] = struct{}{} + + if dsnURI != nil { + dsnURI.Path = databaseName + dsn = dsnURI.String() + } else { + // replacing one dbname with another is complicated. + // just append new dbname to override. + dsn = fmt.Sprintf("%s dbname=%s", dsnConnstring, databaseName) + } + dsns[dsn] = struct{}{} } } From 301976c2180fbd8848193a09df65bb898d0acf50 Mon Sep 17 00:00:00 2001 From: Ollie Charles Date: Sun, 31 Jan 2021 11:22:18 +0000 Subject: [PATCH 9/9] Detect SIReadLock locks in the pg_locks metric (#421) Co-authored-by: Will Rouesnel --- cmd/postgres_exporter/postgres_exporter.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/postgres_exporter/postgres_exporter.go b/cmd/postgres_exporter/postgres_exporter.go index 958fdc71..8a3e1617 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/cmd/postgres_exporter/postgres_exporter.go @@ -379,7 +379,8 @@ var queryOverrides = map[string][]OverrideQuery{ ('sharelock'), ('sharerowexclusivelock'), ('exclusivelock'), - ('accessexclusivelock') + ('accessexclusivelock'), + ('sireadlock') ) AS tmp(mode) CROSS JOIN pg_database LEFT JOIN (SELECT database, lower(mode) AS mode,count(*) AS count