Restructure index access method API to hide most of it at the C level.

This patch reduces pg_am to just two columns, a name and a handler function. All the data formerly obtained from pg_am is now provided in a C struct returned by the handler function. This is similar to the designs we've adopted for FDWs and tablesample methods. There are multiple advantages. For one, the index AM's support functions are now simple C functions, making them faster to call and much less error-prone, since the C compiler can now check function signatures. For another, this will make it far more practical to define index access methods in installable extensions. A disadvantage is that SQL-level code can no longer see attributes of index AMs; in particular, some of the crosschecks in the opr_sanity regression test are no longer possible from SQL. We've addressed that by adding a facility for the index AM to perform such checks instead. (Much more could be done in that line, but for now we're content if the amvalidate functions more or less replace what opr_sanity used to do.) We might also want to expose some sort of reporting functionality, but this patch doesn't do that. Alexander Korotkov, reviewed by Petr Jelínek, and rather heavily editorialized on by me.
2025-11-13 16:22:44 +03:00 · 2016-01-17 19:36:59 -05:00
parent 8d290c8ec6
commit 65c5fcd353
93 changed files with 2493 additions and 1924 deletions
--- a/src/backend/access/hash/Makefile
+++ b/src/backend/access/hash/Makefile
@@ -13,6 +13,6 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global

 OBJS = hash.o hashfunc.o hashinsert.o hashovfl.o hashpage.o hashscan.o \
-       hashsearch.o hashsort.o hashutil.o
+       hashsearch.o hashsort.o hashutil.o hashvalidate.o

 include $(top_srcdir)/src/backend/common.mk
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -22,9 +22,8 @@
 #include "access/relscan.h"
 #include "catalog/index.h"
 #include "commands/vacuum.h"
-#include "optimizer/cost.h"
 #include "optimizer/plancat.h"
-#include "storage/bufmgr.h"
+#include "utils/index_selfuncs.h"
 #include "utils/rel.h"


@@ -44,14 +43,55 @@ static void hashbuildCallback(Relation index,


 /*
- *	hashbuild() -- build a new hash index.
+ * Hash handler function: return IndexAmRoutine with access method parameters
+ * and callbacks.
 */
 Datum
-hashbuild(PG_FUNCTION_ARGS)
+hashhandler(PG_FUNCTION_ARGS)
+{
+	IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
+
+	amroutine->amstrategies = 1;
+	amroutine->amsupport = 1;
+	amroutine->amcanorder = false;
+	amroutine->amcanorderbyop = false;
+	amroutine->amcanbackward = true;
+	amroutine->amcanunique = false;
+	amroutine->amcanmulticol = false;
+	amroutine->amoptionalkey = false;
+	amroutine->amsearcharray = false;
+	amroutine->amsearchnulls = false;
+	amroutine->amstorage = false;
+	amroutine->amclusterable = false;
+	amroutine->ampredlocks = false;
+	amroutine->amkeytype = INT4OID;
+
+	amroutine->ambuild = hashbuild;
+	amroutine->ambuildempty = hashbuildempty;
+	amroutine->aminsert = hashinsert;
+	amroutine->ambulkdelete = hashbulkdelete;
+	amroutine->amvacuumcleanup = hashvacuumcleanup;
+	amroutine->amcanreturn = NULL;
+	amroutine->amcostestimate = hashcostestimate;
+	amroutine->amoptions = hashoptions;
+	amroutine->amvalidate = hashvalidate;
+	amroutine->ambeginscan = hashbeginscan;
+	amroutine->amrescan = hashrescan;
+	amroutine->amgettuple = hashgettuple;
+	amroutine->amgetbitmap = hashgetbitmap;
+	amroutine->amendscan = hashendscan;
+	amroutine->ammarkpos = NULL;
+	amroutine->amrestrpos = NULL;
+
+	PG_RETURN_POINTER(amroutine);
+}
+
+/*
+ *	hashbuild() -- build a new hash index.
+ */
+IndexBuildResult *
+hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 {
-	Relation	heap = (Relation) PG_GETARG_POINTER(0);
-	Relation	index = (Relation) PG_GETARG_POINTER(1);
-	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
 	IndexBuildResult *result;
 	BlockNumber relpages;
 	double		reltuples;
@@ -112,20 +152,16 @@ hashbuild(PG_FUNCTION_ARGS)
 	result->heap_tuples = reltuples;
 	result->index_tuples = buildstate.indtuples;

-	PG_RETURN_POINTER(result);
+	return result;
 }

 /*
 *	hashbuildempty() -- build an empty hash index in the initialization fork
 */
-Datum
-hashbuildempty(PG_FUNCTION_ARGS)
+void
+hashbuildempty(Relation index)
 {
-	Relation	index = (Relation) PG_GETARG_POINTER(0);
-
 	_hash_metapinit(index, 0, INIT_FORKNUM);
-
-	PG_RETURN_VOID();
 }

 /*
@@ -167,18 +203,11 @@ hashbuildCallback(Relation index,
 *	Hash on the heap tuple's key, form an index tuple with hash code.
 *	Find the appropriate location for the new tuple, and put it there.
 */
-Datum
-hashinsert(PG_FUNCTION_ARGS)
+bool
+hashinsert(Relation rel, Datum *values, bool *isnull,
+		   ItemPointer ht_ctid, Relation heapRel,
+		   IndexUniqueCheck checkUnique)
 {
-	Relation	rel = (Relation) PG_GETARG_POINTER(0);
-	Datum	   *values = (Datum *) PG_GETARG_POINTER(1);
-	bool	   *isnull = (bool *) PG_GETARG_POINTER(2);
-	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
-
-#ifdef NOT_USED
-	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);
-	IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
-#endif
 	IndexTuple	itup;

 	/*
@@ -191,7 +220,7 @@ hashinsert(PG_FUNCTION_ARGS)
 	 * chosen in 1986, not of the way nulls are handled here.
 	 */
 	if (isnull[0])
-		PG_RETURN_BOOL(false);
+		return false;

 	/* generate an index tuple */
 	itup = _hash_form_tuple(rel, values, isnull);
@@ -201,18 +230,16 @@ hashinsert(PG_FUNCTION_ARGS)

 	pfree(itup);

-	PG_RETURN_BOOL(false);
+	return false;
 }


 /*
 *	hashgettuple() -- Get the next tuple in the scan.
 */
-Datum
-hashgettuple(PG_FUNCTION_ARGS)
+bool
+hashgettuple(IndexScanDesc scan, ScanDirection dir)
 {
-	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
-	ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
 	Relation	rel = scan->indexRelation;
 	Buffer		buf;
@@ -314,18 +341,16 @@ hashgettuple(PG_FUNCTION_ARGS)
 	/* Return current heap TID on success */
 	scan->xs_ctup.t_self = so->hashso_heappos;

-	PG_RETURN_BOOL(res);
+	return res;
 }


 /*
 *	hashgetbitmap() -- get all tuples at once
 */
-Datum
-hashgetbitmap(PG_FUNCTION_ARGS)
+int64
+hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 {
-	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
-	TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
 	bool		res;
 	int64		ntids = 0;
@@ -362,19 +387,16 @@ hashgetbitmap(PG_FUNCTION_ARGS)
 		res = _hash_next(scan, ForwardScanDirection);
 	}

-	PG_RETURN_INT64(ntids);
+	return ntids;
 }


 /*
 *	hashbeginscan() -- start a scan on a hash index
 */
-Datum
-hashbeginscan(PG_FUNCTION_ARGS)
+IndexScanDesc
+hashbeginscan(Relation rel, int nkeys, int norderbys)
 {
-	Relation	rel = (Relation) PG_GETARG_POINTER(0);
-	int			nkeys = PG_GETARG_INT32(1);
-	int			norderbys = PG_GETARG_INT32(2);
 	IndexScanDesc scan;
 	HashScanOpaque so;

@@ -396,19 +418,16 @@ hashbeginscan(PG_FUNCTION_ARGS)
 	/* register scan in case we change pages it's using */
 	_hash_regscan(scan);

-	PG_RETURN_POINTER(scan);
+	return scan;
 }

 /*
 *	hashrescan() -- rescan an index relation
 */
-Datum
-hashrescan(PG_FUNCTION_ARGS)
+void
+hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+		   ScanKey orderbys, int norderbys)
 {
-	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
-	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(1);
-
-	/* remaining arguments are ignored */
 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
 	Relation	rel = scan->indexRelation;

@@ -434,17 +453,14 @@ hashrescan(PG_FUNCTION_ARGS)
 				scan->numberOfKeys * sizeof(ScanKeyData));
 		so->hashso_bucket_valid = false;
 	}
-
-	PG_RETURN_VOID();
 }

 /*
 *	hashendscan() -- close down a scan
 */
-Datum
-hashendscan(PG_FUNCTION_ARGS)
+void
+hashendscan(IndexScanDesc scan)
 {
-	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
 	Relation	rel = scan->indexRelation;

@@ -463,28 +479,6 @@ hashendscan(PG_FUNCTION_ARGS)

 	pfree(so);
 	scan->opaque = NULL;
-
-	PG_RETURN_VOID();
-}
-
-/*
- *	hashmarkpos() -- save current scan position
- */
-Datum
-hashmarkpos(PG_FUNCTION_ARGS)
-{
-	elog(ERROR, "hash does not support mark/restore");
-	PG_RETURN_VOID();
-}
-
-/*
- *	hashrestrpos() -- restore scan to last saved position
- */
-Datum
-hashrestrpos(PG_FUNCTION_ARGS)
-{
-	elog(ERROR, "hash does not support mark/restore");
-	PG_RETURN_VOID();
 }

 /*
@@ -494,13 +488,10 @@ hashrestrpos(PG_FUNCTION_ARGS)
 *
 * Result: a palloc'd struct containing statistical info for VACUUM displays.
 */
-Datum
-hashbulkdelete(PG_FUNCTION_ARGS)
+IndexBulkDeleteResult *
+hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+			   IndexBulkDeleteCallback callback, void *callback_state)
 {
-	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
-	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
-	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
-	void	   *callback_state = (void *) PG_GETARG_POINTER(3);
 	Relation	rel = info->index;
 	double		tuples_removed;
 	double		num_index_tuples;
@@ -670,7 +661,7 @@ loop_top:
 	stats->tuples_removed += tuples_removed;
 	/* hashvacuumcleanup will fill in num_pages */

-	PG_RETURN_POINTER(stats);
+	return stats;
 }

 /*
@@ -678,24 +669,22 @@ loop_top:
 *
 * Result: a palloc'd struct containing statistical info for VACUUM displays.
 */
-Datum
-hashvacuumcleanup(PG_FUNCTION_ARGS)
+IndexBulkDeleteResult *
+hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 {
-	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
-	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
 	Relation	rel = info->index;
 	BlockNumber num_pages;

 	/* If hashbulkdelete wasn't called, return NULL signifying no change */
 	/* Note: this covers the analyze_only case too */
 	if (stats == NULL)
-		PG_RETURN_POINTER(NULL);
+		return NULL;

 	/* update statistics */
 	num_pages = RelationGetNumberOfBlocks(rel);
 	stats->num_pages = num_pages;

-	PG_RETURN_POINTER(stats);
+	return stats;
 }


--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -217,18 +217,10 @@ _hash_checkpage(Relation rel, Buffer buf, int flags)
 	}
 }

-Datum
-hashoptions(PG_FUNCTION_ARGS)
+bytea *
+hashoptions(Datum reloptions, bool validate)
 {
-	Datum		reloptions = PG_GETARG_DATUM(0);
-	bool		validate = PG_GETARG_BOOL(1);
-	bytea	   *result;
-
-	result = default_reloptions(reloptions, validate, RELOPT_KIND_HASH);
-
-	if (result)
-		PG_RETURN_BYTEA_P(result);
-	PG_RETURN_NULL();
+	return default_reloptions(reloptions, validate, RELOPT_KIND_HASH);
 }

 /*
--- a/src/backend/access/hash/hashvalidate.c
+++ b/src/backend/access/hash/hashvalidate.c
@@ -0,0 +1,157 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashvalidate.c
+ *	  Opclass validator for hash.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/hash/hashvalidate.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/htup_details.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_opclass.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/syscache.h"
+
+
+/*
+ * Validator for a hash opclass.
+ *
+ * Some of the checks done here cover the whole opfamily, and therefore are
+ * redundant when checking each opclass in a family.  But they don't run long
+ * enough to be much of a problem, so we accept the duplication rather than
+ * complicate the amvalidate API.
+ *
+ * Some of the code here relies on the fact that hash has only one operator
+ * strategy and support function; we don't have to check for incomplete sets.
+ */
+bool
+hashvalidate(Oid opclassoid)
+{
+	HeapTuple	classtup;
+	Form_pg_opclass classform;
+	Oid			opfamilyoid;
+	Oid			opcintype;
+	int			numclassops;
+	int32		classfuncbits;
+	CatCList   *proclist,
+			   *oprlist;
+	int			i,
+				j;
+
+	/* Fetch opclass information */
+	classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
+	if (!HeapTupleIsValid(classtup))
+		elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
+	classform = (Form_pg_opclass) GETSTRUCT(classtup);
+
+	opfamilyoid = classform->opcfamily;
+	opcintype = classform->opcintype;
+
+	ReleaseSysCache(classtup);
+
+	/* Fetch all operators and support functions of the opfamily */
+	oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
+	proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
+
+	/* We'll track the ops and functions belonging to the named opclass */
+	numclassops = 0;
+	classfuncbits = 0;
+
+	/* Check support functions */
+	for (i = 0; i < proclist->n_members; i++)
+	{
+		HeapTuple	proctup = &proclist->members[i]->tuple;
+		Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
+
+		/* Check that only allowed procedure numbers exist */
+		if (procform->amprocnum != HASHPROC)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("hash opfamily %u contains invalid support number %d for procedure %u",
+							opfamilyoid,
+							procform->amprocnum, procform->amproc)));
+
+		/* Remember functions that are specifically for the named opclass */
+		if (procform->amproclefttype == opcintype &&
+			procform->amprocrighttype == opcintype)
+			classfuncbits |= (1 << procform->amprocnum);
+	}
+
+	/* Check operators */
+	for (i = 0; i < oprlist->n_members; i++)
+	{
+		HeapTuple	oprtup = &oprlist->members[i]->tuple;
+		Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
+		bool		leftFound = false,
+					rightFound = false;
+
+		/* Check that only allowed strategy numbers exist */
+		if (oprform->amopstrategy < 1 ||
+			oprform->amopstrategy > HTMaxStrategyNumber)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("hash opfamily %u contains invalid strategy number %d for operator %u",
+							opfamilyoid,
+							oprform->amopstrategy, oprform->amopopr)));
+
+		/*
+		 * There should be relevant hash procedures for each operator
+		 */
+		for (j = 0; j < proclist->n_members; j++)
+		{
+			HeapTuple	proctup = &proclist->members[j]->tuple;
+			Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
+
+			if (procform->amproclefttype == oprform->amoplefttype)
+				leftFound = true;
+			if (procform->amproclefttype == oprform->amoprighttype)
+				rightFound = true;
+		}
+
+		if (!leftFound || !rightFound)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+			errmsg("hash opfamily %u lacks support function for operator %u",
+				   opfamilyoid, oprform->amopopr)));
+
+		/* hash doesn't support ORDER BY operators */
+		if (oprform->amoppurpose != AMOP_SEARCH ||
+			OidIsValid(oprform->amopsortfamily))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("hash opfamily %u contains invalid ORDER BY specification for operator %u",
+							opfamilyoid, oprform->amopopr)));
+
+		/* Count operators that are specifically for the named opclass */
+		if (oprform->amoplefttype == opcintype &&
+			oprform->amoprighttype == opcintype)
+			numclassops++;
+	}
+
+	/* Check that the named opclass is complete */
+	if (numclassops != HTMaxStrategyNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("hash opclass %u is missing operator(s)",
+						opclassoid)));
+	if ((classfuncbits & (1 << HASHPROC)) == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+			   errmsg("hash opclass %u is missing required support function",
+					  opclassoid)));
+
+	ReleaseCatCacheList(proclist);
+	ReleaseCatCacheList(oprlist);
+
+	return true;
+}