You've already forked postgres_exporter
mirror of
https://github.com/prometheus-community/postgres_exporter.git
synced 2025-11-03 07:53:12 +03:00
Fix swapped flushedLsn and receiveStartTli for wal_receiver collector (#1198)
In `pgStatWalReceiverQueryTemplate`, the order of the columns (when `hasFlushedLSN == true`) is:
- ...
- `receive_start_lsn`
- `flushed_lsn`
- `receive_start_tli`
- ...
However, columns were scanned in this order:
- ...
- `receive_start_lsn` -> `receiveStartLsn`
- `receive_start_tli` -> `flushedLsn` (!)
- `flushed_lsn` -> `receiveStartTli` (!)
- ...
This incorrect hydration of variables also manifests as swapped values for the
`pg_stat_wal_receiver_flushed_lsn` and `pg_stat_wal_receiver_receive_start_tli` metrics.
This seems to be a bug that has existed since the initial implementation:
- 2d7e152751
- https://github.com/prometheus-community/postgres_exporter/pull/844
In this patch, I'm:
- fixing the `.Scan()`, so that it hydrates variables in the correct order
- adjusting the order in which metrics are pushed out to the channel,
to follow the order we consume them in
(.., `receive_start_lsn`, `flushed_lsn`, `receive_start_tli`, ..)
- adjusting the walreceiver tests, to follow the new order (which matches .`Scan()`)
- fixing a small identation issue in `pgStatWalReceiverQueryTemplate`
Signed-off-by: Slavi Pantaleev <slavi@devture.com>
This commit is contained in:
@@ -108,7 +108,7 @@ var (
|
|||||||
status,
|
status,
|
||||||
(receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn,
|
(receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn,
|
||||||
%s
|
%s
|
||||||
receive_start_tli,
|
receive_start_tli,
|
||||||
received_tli,
|
received_tli,
|
||||||
extract(epoch from last_msg_send_time) as last_msg_send_time,
|
extract(epoch from last_msg_send_time) as last_msg_send_time,
|
||||||
extract(epoch from last_msg_receipt_time) as last_msg_receipt_time,
|
extract(epoch from last_msg_receipt_time) as last_msg_receipt_time,
|
||||||
@@ -147,7 +147,7 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
|
|||||||
var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64
|
var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64
|
||||||
|
|
||||||
if hasFlushedLSN {
|
if hasFlushedLSN {
|
||||||
if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &flushedLsn, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
|
if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &flushedLsn, &receiveStartTli, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -209,12 +209,6 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
|
|||||||
float64(receiveStartLsn.Int64),
|
float64(receiveStartLsn.Int64),
|
||||||
labels...)
|
labels...)
|
||||||
|
|
||||||
ch <- prometheus.MustNewConstMetric(
|
|
||||||
statWalReceiverReceiveStartTli,
|
|
||||||
prometheus.GaugeValue,
|
|
||||||
float64(receiveStartTli.Int64),
|
|
||||||
labels...)
|
|
||||||
|
|
||||||
if hasFlushedLSN {
|
if hasFlushedLSN {
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(
|
||||||
statWalReceiverFlushedLSN,
|
statWalReceiverFlushedLSN,
|
||||||
@@ -223,6 +217,12 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
|
|||||||
labels...)
|
labels...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ch <- prometheus.MustNewConstMetric(
|
||||||
|
statWalReceiverReceiveStartTli,
|
||||||
|
prometheus.GaugeValue,
|
||||||
|
float64(receiveStartTli.Int64),
|
||||||
|
labels...)
|
||||||
|
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(
|
||||||
statWalReceiverReceivedTli,
|
statWalReceiverReceivedTli,
|
||||||
prometheus.GaugeValue,
|
prometheus.GaugeValue,
|
||||||
|
|||||||
@@ -50,8 +50,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
|
|||||||
"slot_name",
|
"slot_name",
|
||||||
"status",
|
"status",
|
||||||
"receive_start_lsn",
|
"receive_start_lsn",
|
||||||
"receive_start_tli",
|
|
||||||
"flushed_lsn",
|
"flushed_lsn",
|
||||||
|
"receive_start_tli",
|
||||||
"received_tli",
|
"received_tli",
|
||||||
"last_msg_send_time",
|
"last_msg_send_time",
|
||||||
"last_msg_receipt_time",
|
"last_msg_receipt_time",
|
||||||
@@ -65,8 +65,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
|
|||||||
"bar",
|
"bar",
|
||||||
"stopping",
|
"stopping",
|
||||||
int64(1200668684563608),
|
int64(1200668684563608),
|
||||||
1687321285,
|
|
||||||
int64(1200668684563609),
|
int64(1200668684563609),
|
||||||
|
1687321285,
|
||||||
1687321280,
|
1687321280,
|
||||||
1687321275,
|
1687321275,
|
||||||
1687321276,
|
1687321276,
|
||||||
@@ -88,8 +88,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
|
|||||||
}()
|
}()
|
||||||
expected := []MetricResult{
|
expected := []MetricResult{
|
||||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER},
|
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER},
|
||||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
|
|
||||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER},
|
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER},
|
||||||
|
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
|
||||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE},
|
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE},
|
||||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER},
|
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER},
|
||||||
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER},
|
{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER},
|
||||||
|
|||||||
Reference in New Issue
Block a user