Generalize relation analyze in table AM interface

Currently, there is just one algorithm for sampling tuples from a table written in acquire_sample_rows(). Custom table AM can just redefine the way to get the next block/tuple by implementing scan_analyze_next_block() and scan_analyze_next_tuple() API functions. This approach doesn't seem general enough. For instance, it's unclear how to sample this way index-organized tables. This commit allows table AM to encapsulate the whole sampling algorithm (currently implemented in acquire_sample_rows()) into the relation_analyze() API function. Discussion: https://postgr.es/m/CAPpHfdurb9ycV8udYqM%3Do0sPS66PJ4RCBM1g-bBpvzUfogY0EA%40mail.gmail.com Reviewed-by: Pavel Borisov, Matthias van de Meent
2025-07-05 07:21:24 +03:00 · 2024-03-30 22:34:04 +02:00
parent b154d8a6d0
commit 27bc1772fc
7 changed files with 100 additions and 125 deletions
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@ -369,6 +369,15 @@ extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
 extern bool HeapTupleIsSurelyDead(HeapTuple htup,
 								  struct GlobalVisState *vistest);

+/* in heap/heapam_handler.c*/
+extern void heapam_scan_analyze_next_block(TableScanDesc scan,
+										   BlockNumber blockno,
+										   BufferAccessStrategy bstrategy);
+extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
+										   TransactionId OldestXmin,
+										   double *liverows, double *deadrows,
+										   TupleTableSlot *slot);
+
 /*
 * To avoid leaking too much knowledge about reorderbuffer implementation
 * details this is implemented in reorderbuffer.c not heapam_visibility.c
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@ -20,6 +20,7 @@
 #include "access/relscan.h"
 #include "access/sdir.h"
 #include "access/xact.h"
+#include "commands/vacuum.h"
 #include "executor/tuptable.h"
 #include "utils/rel.h"
 #include "utils/snapshot.h"
@ -658,41 +659,6 @@ typedef struct TableAmRoutine
 									struct VacuumParams *params,
 									BufferAccessStrategy bstrategy);

-	/*
-	 * Prepare to analyze block `blockno` of `scan`. The scan has been started
-	 * with table_beginscan_analyze().  See also
-	 * table_scan_analyze_next_block().
-	 *
-	 * The callback may acquire resources like locks that are held until
-	 * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
-	 * to hold a lock until all tuples on a block have been analyzed by
-	 * scan_analyze_next_tuple.
-	 *
-	 * The callback can return false if the block is not suitable for
-	 * sampling, e.g. because it's a metapage that could never contain tuples.
-	 *
-	 * XXX: This obviously is primarily suited for block-based AMs. It's not
-	 * clear what a good interface for non block based AMs would be, so there
-	 * isn't one yet.
-	 */
-	bool		(*scan_analyze_next_block) (TableScanDesc scan,
-											BlockNumber blockno,
-											BufferAccessStrategy bstrategy);
-
-	/*
-	 * See table_scan_analyze_next_tuple().
-	 *
-	 * Not every AM might have a meaningful concept of dead rows, in which
-	 * case it's OK to not increment *deadrows - but note that that may
-	 * influence autovacuum scheduling (see comment for relation_vacuum
-	 * callback).
-	 */
-	bool		(*scan_analyze_next_tuple) (TableScanDesc scan,
-											TransactionId OldestXmin,
-											double *liverows,
-											double *deadrows,
-											TupleTableSlot *slot);
-
 	/* see table_index_build_range_scan for reference about parameters */
 	double		(*index_build_range_scan) (Relation table_rel,
 										   Relation index_rel,
@ -713,6 +679,12 @@ typedef struct TableAmRoutine
 										Snapshot snapshot,
 										struct ValidateIndexState *state);

+	/* See table_relation_analyze() */
+	void		(*relation_analyze) (Relation relation,
+									 AcquireSampleRowsFunc *func,
+									 BlockNumber *totalpages,
+									 BufferAccessStrategy bstrategy);
+

 	/* ------------------------------------------------------------------------
 	 * Miscellaneous functions.
@ -1008,19 +980,6 @@ table_beginscan_tid(Relation rel, Snapshot snapshot)
 	return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
 }

-/*
- * table_beginscan_analyze is an alternative entry point for setting up a
- * TableScanDesc for an ANALYZE scan.  As with bitmap scans, it's worth using
- * the same data structure although the behavior is rather different.
- */
-static inline TableScanDesc
-table_beginscan_analyze(Relation rel)
-{
-	uint32		flags = SO_TYPE_ANALYZE;
-
-	return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
-}
-
 /*
 * End relation scan.
 */
@ -1746,42 +1705,6 @@ table_relation_vacuum(Relation rel, struct VacuumParams *params,
 	rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
 }

-/*
- * Prepare to analyze block `blockno` of `scan`. The scan needs to have been
- * started with table_beginscan_analyze().  Note that this routine might
- * acquire resources like locks that are held until
- * table_scan_analyze_next_tuple() returns false.
- *
- * Returns false if block is unsuitable for sampling, true otherwise.
- */
-static inline bool
-table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
-							  BufferAccessStrategy bstrategy)
-{
-	return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, blockno,
-															bstrategy);
-}
-
-/*
- * Iterate over tuples in the block selected with
- * table_scan_analyze_next_block() (which needs to have returned true, and
- * this routine may not have returned false for the same block before). If a
- * tuple that's suitable for sampling is found, true is returned and a tuple
- * is stored in `slot`.
- *
- * *liverows and *deadrows are incremented according to the encountered
- * tuples.
- */
-static inline bool
-table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
-							  double *liverows, double *deadrows,
-							  TupleTableSlot *slot)
-{
-	return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
-															liverows, deadrows,
-															slot);
-}
-
 /*
 * table_index_build_scan - scan the table to find tuples to be indexed
 *
@ -1887,6 +1810,21 @@ table_index_validate_scan(Relation table_rel,
 											   state);
 }

+/*
+ * table_relation_analyze - fill the infromation for a sampling statistics
+ *							acquisition
+ *
+ * The pointer to a function that will collect sample rows from the table
+ * should be stored to `*func`, plus the estimated size of the table in pages
+ * should br stored to `*totalpages`.
+ */
+static inline void
+table_relation_analyze(Relation relation, AcquireSampleRowsFunc *func,
+					   BlockNumber *totalpages, BufferAccessStrategy bstrategy)
+{
+	relation->rd_tableam->relation_analyze(relation, func,
+										   totalpages, bstrategy);
+}

 /* ----------------------------------------------------------------------------
 * Miscellaneous functionality