Move syncscan.c to src/backend/access/common.

Since the tableam.c code needs to make use of the syncscan.c routines itself, and since other block-oriented AMs might also want to use it one day, it didn't make sense for it to live under src/backend/access/heap. Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CA%2BhUKGLCnG%3DNEAByg6bk%2BCT9JZD97Y%3DAxKhh27Su9FeGWOKvDg%40mail.gmail.com
2025-11-21 00:42:43 +03:00 · 2020-07-29 16:46:58 +12:00
parent c49c74d192
commit cb04ad4985
9 changed files with 33 additions and 11 deletions
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -20,7 +20,6 @@ OBJS = \
 	hio.o \
 	pruneheap.o \
 	rewriteheap.o \
-	syncscan.o \
 	vacuumlazy.o \
 	visibilitymap.o

--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -41,6 +41,7 @@
 #include "access/parallel.h"
 #include "access/relscan.h"
 #include "access/subtrans.h"
+#include "access/syncscan.h"
 #include "access/sysattr.h"
 #include "access/tableam.h"
 #include "access/transam.h"
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -24,6 +24,7 @@
 #include "access/heaptoast.h"
 #include "access/multixact.h"
 #include "access/rewriteheap.h"
+#include "access/syncscan.h"
 #include "access/tableam.h"
 #include "access/tsmapi.h"
 #include "access/xact.h"
--- a/src/backend/access/heap/syncscan.c
+++ b/src/backend/access/heap/syncscan.c
@@ -1,322 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * syncscan.c
- *	  heap scan synchronization support
- *
- * When multiple backends run a sequential scan on the same table, we try
- * to keep them synchronized to reduce the overall I/O needed.  The goal is
- * to read each page into shared buffer cache only once, and let all backends
- * that take part in the shared scan process the page before it falls out of
- * the cache.
- *
- * Since the "leader" in a pack of backends doing a seqscan will have to wait
- * for I/O, while the "followers" don't, there is a strong self-synchronizing
- * effect once we can get the backends examining approximately the same part
- * of the table at the same time.  Hence all that is really needed is to get
- * a new backend beginning a seqscan to begin it close to where other backends
- * are reading.  We can scan the table circularly, from block X up to the
- * end and then from block 0 to X-1, to ensure we visit all rows while still
- * participating in the common scan.
- *
- * To accomplish that, we keep track of the scan position of each table, and
- * start new scans close to where the previous scan(s) are.  We don't try to
- * do any extra synchronization to keep the scans together afterwards; some
- * scans might progress much more slowly than others, for example if the
- * results need to be transferred to the client over a slow network, and we
- * don't want such queries to slow down others.
- *
- * There can realistically only be a few large sequential scans on different
- * tables in progress at any time.  Therefore we just keep the scan positions
- * in a small LRU list which we scan every time we need to look up or update a
- * scan position.  The whole mechanism is only applied for tables exceeding
- * a threshold size (but that is not the concern of this module).
- *
- * INTERFACE ROUTINES
- *		ss_get_location		- return current scan location of a relation
- *		ss_report_location	- update current scan location
- *
- *
- * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * IDENTIFICATION
- *	  src/backend/access/heap/syncscan.c
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include "access/heapam.h"
-#include "miscadmin.h"
-#include "storage/lwlock.h"
-#include "storage/shmem.h"
-#include "utils/rel.h"
-
-
-/* GUC variables */
-#ifdef TRACE_SYNCSCAN
-bool		trace_syncscan = false;
-#endif
-
-
-/*
- * Size of the LRU list.
- *
- * Note: the code assumes that SYNC_SCAN_NELEM > 1.
- *
- * XXX: What's a good value? It should be large enough to hold the
- * maximum number of large tables scanned simultaneously.  But a larger value
- * means more traversing of the LRU list when starting a new scan.
- */
-#define SYNC_SCAN_NELEM 20
-
-/*
- * Interval between reports of the location of the current scan, in pages.
- *
- * Note: This should be smaller than the ring size (see buffer/freelist.c)
- * we use for bulk reads.  Otherwise a scan joining other scans might start
- * from a page that's no longer in the buffer cache.  This is a bit fuzzy;
- * there's no guarantee that the new scan will read the page before it leaves
- * the buffer cache anyway, and on the other hand the page is most likely
- * still in the OS cache.
- */
-#define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ)
-
-
-/*
- * The scan locations structure is essentially a doubly-linked LRU with head
- * and tail pointer, but designed to hold a fixed maximum number of elements in
- * fixed-size shared memory.
- */
-typedef struct ss_scan_location_t
-{
-	RelFileNode relfilenode;	/* identity of a relation */
-	BlockNumber location;		/* last-reported location in the relation */
-} ss_scan_location_t;
-
-typedef struct ss_lru_item_t
-{
-	struct ss_lru_item_t *prev;
-	struct ss_lru_item_t *next;
-	ss_scan_location_t location;
-} ss_lru_item_t;
-
-typedef struct ss_scan_locations_t
-{
-	ss_lru_item_t *head;
-	ss_lru_item_t *tail;
-	ss_lru_item_t items[FLEXIBLE_ARRAY_MEMBER]; /* SYNC_SCAN_NELEM items */
-} ss_scan_locations_t;
-
-#define SizeOfScanLocations(N) \
-	(offsetof(ss_scan_locations_t, items) + (N) * sizeof(ss_lru_item_t))
-
-/* Pointer to struct in shared memory */
-static ss_scan_locations_t *scan_locations;
-
-/* prototypes for internal functions */
-static BlockNumber ss_search(RelFileNode relfilenode,
-							 BlockNumber location, bool set);
-
-
-/*
- * SyncScanShmemSize --- report amount of shared memory space needed
- */
-Size
-SyncScanShmemSize(void)
-{
-	return SizeOfScanLocations(SYNC_SCAN_NELEM);
-}
-
-/*
- * SyncScanShmemInit --- initialize this module's shared memory
- */
-void
-SyncScanShmemInit(void)
-{
-	int			i;
-	bool		found;
-
-	scan_locations = (ss_scan_locations_t *)
-		ShmemInitStruct("Sync Scan Locations List",
-						SizeOfScanLocations(SYNC_SCAN_NELEM),
-						&found);
-
-	if (!IsUnderPostmaster)
-	{
-		/* Initialize shared memory area */
-		Assert(!found);
-
-		scan_locations->head = &scan_locations->items[0];
-		scan_locations->tail = &scan_locations->items[SYNC_SCAN_NELEM - 1];
-
-		for (i = 0; i < SYNC_SCAN_NELEM; i++)
-		{
-			ss_lru_item_t *item = &scan_locations->items[i];
-
-			/*
-			 * Initialize all slots with invalid values. As scans are started,
-			 * these invalid entries will fall off the LRU list and get
-			 * replaced with real entries.
-			 */
-			item->location.relfilenode.spcNode = InvalidOid;
-			item->location.relfilenode.dbNode = InvalidOid;
-			item->location.relfilenode.relNode = InvalidOid;
-			item->location.location = InvalidBlockNumber;
-
-			item->prev = (i > 0) ?
-				(&scan_locations->items[i - 1]) : NULL;
-			item->next = (i < SYNC_SCAN_NELEM - 1) ?
-				(&scan_locations->items[i + 1]) : NULL;
-		}
-	}
-	else
-		Assert(found);
-}
-
-/*
- * ss_search --- search the scan_locations structure for an entry with the
- *		given relfilenode.
- *
- * If "set" is true, the location is updated to the given location.  If no
- * entry for the given relfilenode is found, it will be created at the head
- * of the list with the given location, even if "set" is false.
- *
- * In any case, the location after possible update is returned.
- *
- * Caller is responsible for having acquired suitable lock on the shared
- * data structure.
- */
-static BlockNumber
-ss_search(RelFileNode relfilenode, BlockNumber location, bool set)
-{
-	ss_lru_item_t *item;
-
-	item = scan_locations->head;
-	for (;;)
-	{
-		bool		match;
-
-		match = RelFileNodeEquals(item->location.relfilenode, relfilenode);
-
-		if (match || item->next == NULL)
-		{
-			/*
-			 * If we reached the end of list and no match was found, take over
-			 * the last entry
-			 */
-			if (!match)
-			{
-				item->location.relfilenode = relfilenode;
-				item->location.location = location;
-			}
-			else if (set)
-				item->location.location = location;
-
-			/* Move the entry to the front of the LRU list */
-			if (item != scan_locations->head)
-			{
-				/* unlink */
-				if (item == scan_locations->tail)
-					scan_locations->tail = item->prev;
-				item->prev->next = item->next;
-				if (item->next)
-					item->next->prev = item->prev;
-
-				/* link */
-				item->prev = NULL;
-				item->next = scan_locations->head;
-				scan_locations->head->prev = item;
-				scan_locations->head = item;
-			}
-
-			return item->location.location;
-		}
-
-		item = item->next;
-	}
-
-	/* not reached */
-}
-
-/*
- * ss_get_location --- get the optimal starting location for scan
- *
- * Returns the last-reported location of a sequential scan on the
- * relation, or 0 if no valid location is found.
- *
- * We expect the caller has just done RelationGetNumberOfBlocks(), and
- * so that number is passed in rather than computing it again.  The result
- * is guaranteed less than relnblocks (assuming that's > 0).
- */
-BlockNumber
-ss_get_location(Relation rel, BlockNumber relnblocks)
-{
-	BlockNumber startloc;
-
-	LWLockAcquire(SyncScanLock, LW_EXCLUSIVE);
-	startloc = ss_search(rel->rd_node, 0, false);
-	LWLockRelease(SyncScanLock);
-
-	/*
-	 * If the location is not a valid block number for this scan, start at 0.
-	 *
-	 * This can happen if for instance a VACUUM truncated the table since the
-	 * location was saved.
-	 */
-	if (startloc >= relnblocks)
-		startloc = 0;
-
-#ifdef TRACE_SYNCSCAN
-	if (trace_syncscan)
-		elog(LOG,
-			 "SYNC_SCAN: start \"%s\" (size %u) at %u",
-			 RelationGetRelationName(rel), relnblocks, startloc);
-#endif
-
-	return startloc;
-}
-
-/*
- * ss_report_location --- update the current scan location
- *
- * Writes an entry into the shared Sync Scan state of the form
- * (relfilenode, blocknumber), overwriting any existing entry for the
- * same relfilenode.
- */
-void
-ss_report_location(Relation rel, BlockNumber location)
-{
-#ifdef TRACE_SYNCSCAN
-	if (trace_syncscan)
-	{
-		if ((location % 1024) == 0)
-			elog(LOG,
-				 "SYNC_SCAN: scanning \"%s\" at %u",
-				 RelationGetRelationName(rel), location);
-	}
-#endif
-
-	/*
-	 * To reduce lock contention, only report scan progress every N pages. For
-	 * the same reason, don't block if the lock isn't immediately available.
-	 * Missing a few updates isn't critical, it just means that a new scan
-	 * that wants to join the pack will start a little bit behind the head of
-	 * the scan.  Hopefully the pages are still in OS cache and the scan
-	 * catches up quickly.
-	 */
-	if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
-	{
-		if (LWLockConditionalAcquire(SyncScanLock, LW_EXCLUSIVE))
-		{
-			(void) ss_search(rel->rd_node, location, true);
-			LWLockRelease(SyncScanLock);
-		}
-#ifdef TRACE_SYNCSCAN
-		else if (trace_syncscan)
-			elog(LOG,
-				 "SYNC_SCAN: missed update for \"%s\" at %u",
-				 RelationGetRelationName(rel), location);
-#endif
-	}
-}