You've already forked postgres_exporter
							
							
				mirror of
				https://github.com/prometheus-community/postgres_exporter.git
				synced 2025-11-03 07:53:12 +03:00 
			
		
		
		
	Fix swapped flushedLsn and receiveStartTli for wal_receiver collector (#1198)
				
					
				
			In `pgStatWalReceiverQueryTemplate`, the order of the columns (when `hasFlushedLSN == true`) is:
- ...
- `receive_start_lsn`
- `flushed_lsn`
- `receive_start_tli`
- ...
However, columns were scanned in this order:
- ...
- `receive_start_lsn` -> `receiveStartLsn`
- `receive_start_tli` -> `flushedLsn` (!)
- `flushed_lsn` -> `receiveStartTli` (!)
- ...
This incorrect hydration of variables also manifests as swapped values for the
`pg_stat_wal_receiver_flushed_lsn` and `pg_stat_wal_receiver_receive_start_tli` metrics.
This seems to be a bug that has existed since the initial implementation:
- 2d7e152751
- https://github.com/prometheus-community/postgres_exporter/pull/844
In this patch, I'm:
- fixing the `.Scan()`, so that it hydrates variables in the correct order
- adjusting the order in which metrics are pushed out to the channel,
  to follow the order we consume them in
  (.., `receive_start_lsn`, `flushed_lsn`, `receive_start_tli`, ..)
- adjusting the walreceiver tests, to follow the new order (which matches .`Scan()`)
- fixing a small identation issue in `pgStatWalReceiverQueryTemplate`
Signed-off-by: Slavi Pantaleev <slavi@devture.com>
			
			
This commit is contained in:
		@@ -108,7 +108,7 @@ var (
 | 
				
			|||||||
		status,
 | 
							status,
 | 
				
			||||||
		(receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn,
 | 
							(receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn,
 | 
				
			||||||
		%s
 | 
							%s
 | 
				
			||||||
receive_start_tli,
 | 
							receive_start_tli,
 | 
				
			||||||
		received_tli,
 | 
							received_tli,
 | 
				
			||||||
		extract(epoch from last_msg_send_time) as last_msg_send_time,
 | 
							extract(epoch from last_msg_send_time) as last_msg_send_time,
 | 
				
			||||||
		extract(epoch from last_msg_receipt_time) as last_msg_receipt_time,
 | 
							extract(epoch from last_msg_receipt_time) as last_msg_receipt_time,
 | 
				
			||||||
@@ -147,7 +147,7 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
 | 
				
			|||||||
		var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64
 | 
							var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if hasFlushedLSN {
 | 
							if hasFlushedLSN {
 | 
				
			||||||
			if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &flushedLsn, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
 | 
								if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &flushedLsn, &receiveStartTli, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil {
 | 
				
			||||||
				return err
 | 
									return err
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
@@ -209,12 +209,6 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
 | 
				
			|||||||
			float64(receiveStartLsn.Int64),
 | 
								float64(receiveStartLsn.Int64),
 | 
				
			||||||
			labels...)
 | 
								labels...)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ch <- prometheus.MustNewConstMetric(
 | 
					 | 
				
			||||||
			statWalReceiverReceiveStartTli,
 | 
					 | 
				
			||||||
			prometheus.GaugeValue,
 | 
					 | 
				
			||||||
			float64(receiveStartTli.Int64),
 | 
					 | 
				
			||||||
			labels...)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if hasFlushedLSN {
 | 
							if hasFlushedLSN {
 | 
				
			||||||
			ch <- prometheus.MustNewConstMetric(
 | 
								ch <- prometheus.MustNewConstMetric(
 | 
				
			||||||
				statWalReceiverFlushedLSN,
 | 
									statWalReceiverFlushedLSN,
 | 
				
			||||||
@@ -223,6 +217,12 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta
 | 
				
			|||||||
				labels...)
 | 
									labels...)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ch <- prometheus.MustNewConstMetric(
 | 
				
			||||||
 | 
								statWalReceiverReceiveStartTli,
 | 
				
			||||||
 | 
								prometheus.GaugeValue,
 | 
				
			||||||
 | 
								float64(receiveStartTli.Int64),
 | 
				
			||||||
 | 
								labels...)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ch <- prometheus.MustNewConstMetric(
 | 
							ch <- prometheus.MustNewConstMetric(
 | 
				
			||||||
			statWalReceiverReceivedTli,
 | 
								statWalReceiverReceivedTli,
 | 
				
			||||||
			prometheus.GaugeValue,
 | 
								prometheus.GaugeValue,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -50,8 +50,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
 | 
				
			|||||||
		"slot_name",
 | 
							"slot_name",
 | 
				
			||||||
		"status",
 | 
							"status",
 | 
				
			||||||
		"receive_start_lsn",
 | 
							"receive_start_lsn",
 | 
				
			||||||
		"receive_start_tli",
 | 
					 | 
				
			||||||
		"flushed_lsn",
 | 
							"flushed_lsn",
 | 
				
			||||||
 | 
							"receive_start_tli",
 | 
				
			||||||
		"received_tli",
 | 
							"received_tli",
 | 
				
			||||||
		"last_msg_send_time",
 | 
							"last_msg_send_time",
 | 
				
			||||||
		"last_msg_receipt_time",
 | 
							"last_msg_receipt_time",
 | 
				
			||||||
@@ -65,8 +65,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
 | 
				
			|||||||
			"bar",
 | 
								"bar",
 | 
				
			||||||
			"stopping",
 | 
								"stopping",
 | 
				
			||||||
			int64(1200668684563608),
 | 
								int64(1200668684563608),
 | 
				
			||||||
			1687321285,
 | 
					 | 
				
			||||||
			int64(1200668684563609),
 | 
								int64(1200668684563609),
 | 
				
			||||||
 | 
								1687321285,
 | 
				
			||||||
			1687321280,
 | 
								1687321280,
 | 
				
			||||||
			1687321275,
 | 
								1687321275,
 | 
				
			||||||
			1687321276,
 | 
								1687321276,
 | 
				
			||||||
@@ -88,8 +88,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) {
 | 
				
			|||||||
	}()
 | 
						}()
 | 
				
			||||||
	expected := []MetricResult{
 | 
						expected := []MetricResult{
 | 
				
			||||||
		{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER},
 | 
							{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER},
 | 
				
			||||||
		{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
 | 
					 | 
				
			||||||
		{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER},
 | 
							{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER},
 | 
				
			||||||
 | 
							{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE},
 | 
				
			||||||
		{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE},
 | 
							{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE},
 | 
				
			||||||
		{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER},
 | 
							{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER},
 | 
				
			||||||
		{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER},
 | 
							{labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER},
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user