1
0
mirror of https://github.com/prometheus-community/postgres_exporter.git synced 2025-10-31 09:10:25 +03:00

Fix swapped flushedLsn and receiveStartTli for wal_receiver collector (#1198)

In `pgStatWalReceiverQueryTemplate`, the order of the columns (when `hasFlushedLSN == true`) is:

- ...
- `receive_start_lsn`
- `flushed_lsn`
- `receive_start_tli`
- ...

However, columns were scanned in this order:

- ...
- `receive_start_lsn` -> `receiveStartLsn`
- `receive_start_tli` -> `flushedLsn` (!)
- `flushed_lsn` -> `receiveStartTli` (!)
- ...

This incorrect hydration of variables also manifests as swapped values for the
`pg_stat_wal_receiver_flushed_lsn` and `pg_stat_wal_receiver_receive_start_tli` metrics.

This seems to be a bug that has existed since the initial implementation:

- 2d7e152751
- https://github.com/prometheus-community/postgres_exporter/pull/844

In this patch, I'm:

- fixing the `.Scan()`, so that it hydrates variables in the correct order

- adjusting the order in which metrics are pushed out to the channel,
  to follow the order we consume them in
  (.., `receive_start_lsn`, `flushed_lsn`, `receive_start_tli`, ..)

- adjusting the walreceiver tests, to follow the new order (which matches .`Scan()`)

- fixing a small identation issue in `pgStatWalReceiverQueryTemplate`

Signed-off-by: Slavi Pantaleev <slavi@devture.com>
This commit is contained in:
Slavi Pantaleev
2025-09-29 19:15:17 +03:00
committed by GitHub
parent e62fe086f0
commit ef2736e7a6
2 changed files with 11 additions and 11 deletions

View File

@@ -108,7 +108,7 @@ var (
status,
(receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn,
%s
receive_start_tli,
receive_start_tli,
received_tli,
extract(epoch from last_msg_send_time) as last_msg_send_time,
extract(epoch from last_msg_receipt_time) as last_msg_receipt_time,
@@ -147,7 +147,7 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64
if hasFlushedLSN {
if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &flushedLsn, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &flushedLsn, &receiveStartTli, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
return err
}
} else {
@@ -209,12 +209,6 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
float64(receiveStartLsn.Int64),
labels...)
ch <- prometheus.MustNewConstMetric(
statWalReceiverReceiveStartTli,
prometheus.GaugeValue,
float64(receiveStartTli.Int64),
labels...)
if hasFlushedLSN {
ch <- prometheus.MustNewConstMetric(
statWalReceiverFlushedLSN,
@@ -223,6 +217,12 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
labels...)
}
ch <- prometheus.MustNewConstMetric(
statWalReceiverReceiveStartTli,
prometheus.GaugeValue,
float64(receiveStartTli.Int64),
labels...)
ch <- prometheus.MustNewConstMetric(
statWalReceiverReceivedTli,
prometheus.GaugeValue,

View File

@@ -50,8 +50,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
"slot_name",
"status",
"receive_start_lsn",
"receive_start_tli",
"flushed_lsn",
"receive_start_tli",
"received_tli",
"last_msg_send_time",
"last_msg_receipt_time",
@@ -65,8 +65,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
"bar",
"stopping",
int64(1200668684563608),
1687321285,
int64(1200668684563609),
1687321285,
1687321280,
1687321275,
1687321276,
@@ -88,8 +88,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
}()
expected := []MetricResult{
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER},
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER},
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE},
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER},
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER},