You've already forked postgres_exporter
mirror of
https://github.com/prometheus-community/postgres_exporter.git
synced 2025-10-31 09:10:25 +03:00
Fix swapped flushedLsn and receiveStartTli for wal_receiver collector (#1198)
In `pgStatWalReceiverQueryTemplate`, the order of the columns (when `hasFlushedLSN == true`) is:
- ...
- `receive_start_lsn`
- `flushed_lsn`
- `receive_start_tli`
- ...
However, columns were scanned in this order:
- ...
- `receive_start_lsn` -> `receiveStartLsn`
- `receive_start_tli` -> `flushedLsn` (!)
- `flushed_lsn` -> `receiveStartTli` (!)
- ...
This incorrect hydration of variables also manifests as swapped values for the
`pg_stat_wal_receiver_flushed_lsn` and `pg_stat_wal_receiver_receive_start_tli` metrics.
This seems to be a bug that has existed since the initial implementation:
- 2d7e152751
- https://github.com/prometheus-community/postgres_exporter/pull/844
In this patch, I'm:
- fixing the `.Scan()`, so that it hydrates variables in the correct order
- adjusting the order in which metrics are pushed out to the channel,
to follow the order we consume them in
(.., `receive_start_lsn`, `flushed_lsn`, `receive_start_tli`, ..)
- adjusting the walreceiver tests, to follow the new order (which matches .`Scan()`)
- fixing a small identation issue in `pgStatWalReceiverQueryTemplate`
Signed-off-by: Slavi Pantaleev <slavi@devture.com>
This commit is contained in:
@@ -108,7 +108,7 @@ var (
|
||||
status,
|
||||
(receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn,
|
||||
%s
|
||||
receive_start_tli,
|
||||
receive_start_tli,
|
||||
received_tli,
|
||||
extract(epoch from last_msg_send_time) as last_msg_send_time,
|
||||
extract(epoch from last_msg_receipt_time) as last_msg_receipt_time,
|
||||
@@ -147,7 +147,7 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
|
||||
var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64
|
||||
|
||||
if hasFlushedLSN {
|
||||
if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &flushedLsn, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
|
||||
if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &flushedLsn, &receiveStartTli, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
@@ -209,12 +209,6 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
|
||||
float64(receiveStartLsn.Int64),
|
||||
labels...)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
statWalReceiverReceiveStartTli,
|
||||
prometheus.GaugeValue,
|
||||
float64(receiveStartTli.Int64),
|
||||
labels...)
|
||||
|
||||
if hasFlushedLSN {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
statWalReceiverFlushedLSN,
|
||||
@@ -223,6 +217,12 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
|
||||
labels...)
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
statWalReceiverReceiveStartTli,
|
||||
prometheus.GaugeValue,
|
||||
float64(receiveStartTli.Int64),
|
||||
labels...)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
statWalReceiverReceivedTli,
|
||||
prometheus.GaugeValue,
|
||||
|
||||
@@ -50,8 +50,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
|
||||
"slot_name",
|
||||
"status",
|
||||
"receive_start_lsn",
|
||||
"receive_start_tli",
|
||||
"flushed_lsn",
|
||||
"receive_start_tli",
|
||||
"received_tli",
|
||||
"last_msg_send_time",
|
||||
"last_msg_receipt_time",
|
||||
@@ -65,8 +65,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
|
||||
"bar",
|
||||
"stopping",
|
||||
int64(1200668684563608),
|
||||
1687321285,
|
||||
int64(1200668684563609),
|
||||
1687321285,
|
||||
1687321280,
|
||||
1687321275,
|
||||
1687321276,
|
||||
@@ -88,8 +88,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
|
||||
}()
|
||||
expected := []MetricResult{
|
||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER},
|
||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
|
||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER},
|
||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
|
||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE},
|
||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER},
|
||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER},
|
||||
|
||||
Reference in New Issue
Block a user