diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 9c1d590dc71..3b435c107d0 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1391,6 +1391,153 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s return true; } +void +heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, + ItemPointer maxtid) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + BlockNumber startBlk; + BlockNumber numBlks; + ItemPointerData highestItem; + ItemPointerData lowestItem; + + /* + * For relations without any pages, we can simply leave the TID range + * unset. There will be no tuples to scan, therefore no tuples outside + * the given TID range. + */ + if (scan->rs_nblocks == 0) + return; + + /* + * Set up some ItemPointers which point to the first and last possible + * tuples in the heap. + */ + ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber); + ItemPointerSet(&lowestItem, 0, FirstOffsetNumber); + + /* + * If the given maximum TID is below the highest possible TID in the + * relation, then restrict the range to that, otherwise we scan to the end + * of the relation. + */ + if (ItemPointerCompare(maxtid, &highestItem) < 0) + ItemPointerCopy(maxtid, &highestItem); + + /* + * If the given minimum TID is above the lowest possible TID in the + * relation, then restrict the range to only scan for TIDs above that. + */ + if (ItemPointerCompare(mintid, &lowestItem) > 0) + ItemPointerCopy(mintid, &lowestItem); + + /* + * Check for an empty range and protect from would be negative results + * from the numBlks calculation below. + */ + if (ItemPointerCompare(&highestItem, &lowestItem) < 0) + { + /* Set an empty range of blocks to scan */ + heap_setscanlimits(sscan, 0, 0); + return; + } + + /* + * Calculate the first block and the number of blocks we must scan. We + * could be more aggressive here and perform some more validation to try + * and further narrow the scope of blocks to scan by checking if the + * lowerItem has an offset above MaxOffsetNumber. In this case, we could + * advance startBlk by one. Likewise, if highestItem has an offset of 0 + * we could scan one fewer blocks. However, such an optimization does not + * seem worth troubling over, currently. + */ + startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem); + + numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) - + ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1; + + /* Set the start block and number of blocks to scan */ + heap_setscanlimits(sscan, startBlk, numBlks); + + /* Finally, set the TID range in sscan */ + ItemPointerCopy(&lowestItem, &sscan->rs_mintid); + ItemPointerCopy(&highestItem, &sscan->rs_maxtid); +} + +bool +heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, + TupleTableSlot *slot) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + ItemPointer mintid = &sscan->rs_mintid; + ItemPointer maxtid = &sscan->rs_maxtid; + + /* Note: no locking manipulations needed */ + for (;;) + { + if (sscan->rs_flags & SO_ALLOW_PAGEMODE) + heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key); + else + heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key); + + if (scan->rs_ctup.t_data == NULL) + { + ExecClearTuple(slot); + return false; + } + + /* + * heap_set_tidrange will have used heap_setscanlimits to limit the + * range of pages we scan to only ones that can contain the TID range + * we're scanning for. Here we must filter out any tuples from these + * pages that are outwith that range. + */ + if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0) + { + ExecClearTuple(slot); + + /* + * When scanning backwards, the TIDs will be in descending order. + * Future tuples in this direction will be lower still, so we can + * just return false to indicate there will be no more tuples. + */ + if (ScanDirectionIsBackward(direction)) + return false; + + continue; + } + + /* + * Likewise for the final page, we must filter out TIDs greater than + * maxtid. + */ + if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0) + { + ExecClearTuple(slot); + + /* + * When scanning forward, the TIDs will be in ascending order. + * Future tuples in this direction will be higher still, so we can + * just return false to indicate there will be no more tuples. + */ + if (ScanDirectionIsForward(direction)) + return false; + continue; + } + + break; + } + + /* + * if we get here it means we have a new current scan tuple, so point to + * the proper return buffer and return the tuple. + */ + pgstat_count_heap_getnext(scan->rs_base.rs_rd); + + ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf); + return true; +} + /* * heap_fetch - retrieve tuple with given tid * diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 4a70e20a143..bd5faf0c1fb 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2542,6 +2542,9 @@ static const TableAmRoutine heapam_methods = { .scan_rescan = heap_rescan, .scan_getnextslot = heap_getnextslot, + .scan_set_tidrange = heap_set_tidrange, + .scan_getnextslot_tidrange = heap_getnextslot_tidrange, + .parallelscan_estimate = table_block_parallelscan_estimate, .parallelscan_initialize = table_block_parallelscan_initialize, .parallelscan_reinitialize = table_block_parallelscan_reinitialize, diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index f80e379973a..afc45429ba4 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1057,6 +1057,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: + case T_TidRangeScan: case T_SubqueryScan: case T_FunctionScan: case T_TableFuncScan: @@ -1223,6 +1224,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_TidScan: pname = sname = "Tid Scan"; break; + case T_TidRangeScan: + pname = sname = "Tid Range Scan"; + break; case T_SubqueryScan: pname = sname = "Subquery Scan"; break; @@ -1417,6 +1421,7 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_SampleScan: case T_BitmapHeapScan: case T_TidScan: + case T_TidRangeScan: case T_SubqueryScan: case T_FunctionScan: case T_TableFuncScan: @@ -1871,6 +1876,23 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); } break; + case T_TidRangeScan: + { + /* + * The tidrangequals list has AND semantics, so be sure to + * show it as an AND condition. + */ + List *tidquals = ((TidRangeScan *) plan)->tidrangequals; + + if (list_length(tidquals) > 1) + tidquals = list_make1(make_andclause(tidquals)); + show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + } + break; case T_ForeignScan: show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) @@ -3558,6 +3580,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: + case T_TidRangeScan: case T_ForeignScan: case T_CustomScan: case T_ModifyTable: diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index f990c6473a3..74ac59faa13 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -67,6 +67,7 @@ OBJS = \ nodeSubplan.o \ nodeSubqueryscan.o \ nodeTableFuncscan.o \ + nodeTidrangescan.o \ nodeTidscan.o \ nodeUnique.o \ nodeValuesscan.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 23bdb53cd10..4543ac79edf 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -51,6 +51,7 @@ #include "executor/nodeSubplan.h" #include "executor/nodeSubqueryscan.h" #include "executor/nodeTableFuncscan.h" +#include "executor/nodeTidrangescan.h" #include "executor/nodeTidscan.h" #include "executor/nodeUnique.h" #include "executor/nodeValuesscan.h" @@ -197,6 +198,10 @@ ExecReScan(PlanState *node) ExecReScanTidScan((TidScanState *) node); break; + case T_TidRangeScanState: + ExecReScanTidRangeScan((TidRangeScanState *) node); + break; + case T_SubqueryScanState: ExecReScanSubqueryScan((SubqueryScanState *) node); break; @@ -562,6 +567,7 @@ ExecSupportsBackwardScan(Plan *node) case T_SeqScan: case T_TidScan: + case T_TidRangeScan: case T_FunctionScan: case T_ValuesScan: case T_CteScan: diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index 33221a4d6ce..4f430fb1603 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -336,6 +336,7 @@ search_plan_tree(PlanState *node, Oid table_oid, case T_IndexOnlyScanState: case T_BitmapHeapScanState: case T_TidScanState: + case T_TidRangeScanState: case T_ForeignScanState: case T_CustomScanState: { diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 414df50a054..29766d8196f 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -109,6 +109,7 @@ #include "executor/nodeSubplan.h" #include "executor/nodeSubqueryscan.h" #include "executor/nodeTableFuncscan.h" +#include "executor/nodeTidrangescan.h" #include "executor/nodeTidscan.h" #include "executor/nodeUnique.h" #include "executor/nodeValuesscan.h" @@ -238,6 +239,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_TidRangeScan: + result = (PlanState *) ExecInitTidRangeScan((TidRangeScan *) node, + estate, eflags); + break; + case T_SubqueryScan: result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node, estate, eflags); @@ -637,6 +643,10 @@ ExecEndNode(PlanState *node) ExecEndTidScan((TidScanState *) node); break; + case T_TidRangeScanState: + ExecEndTidRangeScan((TidRangeScanState *) node); + break; + case T_SubqueryScanState: ExecEndSubqueryScan((SubqueryScanState *) node); break; diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c new file mode 100644 index 00000000000..2b0d205d7dd --- /dev/null +++ b/src/backend/executor/nodeTidrangescan.c @@ -0,0 +1,413 @@ +/*------------------------------------------------------------------------- + * + * nodeTidrangescan.c + * Routines to support TID range scans of relations + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeTidrangescan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "access/sysattr.h" +#include "access/tableam.h" +#include "catalog/pg_operator.h" +#include "executor/execdebug.h" +#include "executor/nodeTidrangescan.h" +#include "nodes/nodeFuncs.h" +#include "storage/bufmgr.h" +#include "utils/rel.h" + + +#define IsCTIDVar(node) \ + ((node) != NULL && \ + IsA((node), Var) && \ + ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \ + ((Var *) (node))->varlevelsup == 0) + +typedef enum +{ + TIDEXPR_UPPER_BOUND, + TIDEXPR_LOWER_BOUND +} TidExprType; + +/* Upper or lower range bound for scan */ +typedef struct TidOpExpr +{ + TidExprType exprtype; /* type of op; lower or upper */ + ExprState *exprstate; /* ExprState for a TID-yielding subexpr */ + bool inclusive; /* whether op is inclusive */ +} TidOpExpr; + +/* + * For the given 'expr', build and return an appropriate TidOpExpr taking into + * account the expr's operator and operand order. + */ +static TidOpExpr * +MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate) +{ + Node *arg1 = get_leftop((Expr *) expr); + Node *arg2 = get_rightop((Expr *) expr); + ExprState *exprstate = NULL; + bool invert = false; + TidOpExpr *tidopexpr; + + if (IsCTIDVar(arg1)) + exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps); + else if (IsCTIDVar(arg2)) + { + exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps); + invert = true; + } + else + elog(ERROR, "could not identify CTID variable"); + + tidopexpr = (TidOpExpr *) palloc(sizeof(TidOpExpr)); + tidopexpr->inclusive = false; /* for now */ + + switch (expr->opno) + { + case TIDLessEqOperator: + tidopexpr->inclusive = true; + /* fall through */ + case TIDLessOperator: + tidopexpr->exprtype = invert ? TIDEXPR_LOWER_BOUND : TIDEXPR_UPPER_BOUND; + break; + case TIDGreaterEqOperator: + tidopexpr->inclusive = true; + /* fall through */ + case TIDGreaterOperator: + tidopexpr->exprtype = invert ? TIDEXPR_UPPER_BOUND : TIDEXPR_LOWER_BOUND; + break; + default: + elog(ERROR, "could not identify CTID operator"); + } + + tidopexpr->exprstate = exprstate; + + return tidopexpr; +} + +/* + * Extract the qual subexpressions that yield TIDs to search for, + * and compile them into ExprStates if they're ordinary expressions. + */ +static void +TidExprListCreate(TidRangeScanState *tidrangestate) +{ + TidRangeScan *node = (TidRangeScan *) tidrangestate->ss.ps.plan; + List *tidexprs = NIL; + ListCell *l; + + foreach(l, node->tidrangequals) + { + OpExpr *opexpr = lfirst(l); + TidOpExpr *tidopexpr; + + if (!IsA(opexpr, OpExpr)) + elog(ERROR, "could not identify CTID expression"); + + tidopexpr = MakeTidOpExpr(opexpr, tidrangestate); + tidexprs = lappend(tidexprs, tidopexpr); + } + + tidrangestate->trss_tidexprs = tidexprs; +} + +/* ---------------------------------------------------------------- + * TidRangeEval + * + * Compute and set node's block and offset range to scan by evaluating + * the trss_tidexprs. Returns false if we detect the range cannot + * contain any tuples. Returns true if it's possible for the range to + * contain tuples. + * ---------------------------------------------------------------- + */ +static bool +TidRangeEval(TidRangeScanState *node) +{ + ExprContext *econtext = node->ss.ps.ps_ExprContext; + ItemPointerData lowerBound; + ItemPointerData upperBound; + ListCell *l; + + /* + * Set the upper and lower bounds to the absolute limits of the range of + * the ItemPointer type. Below we'll try to narrow this range on either + * side by looking at the TidOpExprs. + */ + ItemPointerSet(&lowerBound, 0, 0); + ItemPointerSet(&upperBound, InvalidBlockNumber, PG_UINT16_MAX); + + foreach(l, node->trss_tidexprs) + { + TidOpExpr *tidopexpr = (TidOpExpr *) lfirst(l); + ItemPointer itemptr; + bool isNull; + + /* Evaluate this bound. */ + itemptr = (ItemPointer) + DatumGetPointer(ExecEvalExprSwitchContext(tidopexpr->exprstate, + econtext, + &isNull)); + + /* If the bound is NULL, *nothing* matches the qual. */ + if (isNull) + return false; + + if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND) + { + ItemPointerData lb; + + ItemPointerCopy(itemptr, &lb); + + /* + * Normalize non-inclusive ranges to become inclusive. The + * resulting ItemPointer here may not be a valid item pointer. + */ + if (!tidopexpr->inclusive) + ItemPointerInc(&lb); + + /* Check if we can narrow the range using this qual */ + if (ItemPointerCompare(&lb, &lowerBound) > 0) + ItemPointerCopy(&lb, &lowerBound); + } + + else if (tidopexpr->exprtype == TIDEXPR_UPPER_BOUND) + { + ItemPointerData ub; + + ItemPointerCopy(itemptr, &ub); + + /* + * Normalize non-inclusive ranges to become inclusive. The + * resulting ItemPointer here may not be a valid item pointer. + */ + if (!tidopexpr->inclusive) + ItemPointerDec(&ub); + + /* Check if we can narrow the range using this qual */ + if (ItemPointerCompare(&ub, &upperBound) < 0) + ItemPointerCopy(&ub, &upperBound); + } + } + + ItemPointerCopy(&lowerBound, &node->trss_mintid); + ItemPointerCopy(&upperBound, &node->trss_maxtid); + + return true; +} + +/* ---------------------------------------------------------------- + * TidRangeNext + * + * Retrieve a tuple from the TidRangeScan node's currentRelation + * using the TIDs in the TidRangeScanState information. + * + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +TidRangeNext(TidRangeScanState *node) +{ + TableScanDesc scandesc; + EState *estate; + ScanDirection direction; + TupleTableSlot *slot; + + /* + * extract necessary information from TID scan node + */ + scandesc = node->ss.ss_currentScanDesc; + estate = node->ss.ps.state; + slot = node->ss.ss_ScanTupleSlot; + direction = estate->es_direction; + + if (!node->trss_inScan) + { + /* First time through, compute TID range to scan */ + if (!TidRangeEval(node)) + return NULL; + + if (scandesc == NULL) + { + scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation, + estate->es_snapshot, + &node->trss_mintid, + &node->trss_maxtid); + node->ss.ss_currentScanDesc = scandesc; + } + else + { + /* rescan with the updated TID range */ + table_rescan_tidrange(scandesc, &node->trss_mintid, + &node->trss_maxtid); + } + + node->trss_inScan = true; + } + + /* Fetch the next tuple. */ + if (!table_scan_getnextslot_tidrange(scandesc, direction, slot)) + { + node->trss_inScan = false; + ExecClearTuple(slot); + } + + return slot; +} + +/* + * TidRangeRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot) +{ + return true; +} + +/* ---------------------------------------------------------------- + * ExecTidRangeScan(node) + * + * Scans the relation using tids and returns the next qualifying tuple. + * We call the ExecScan() routine and pass it the appropriate + * access method functions. + * + * Conditions: + * -- the "cursor" maintained by the AMI is positioned at the tuple + * returned previously. + * + * Initial States: + * -- the relation indicated is opened for TID range scanning. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +ExecTidRangeScan(PlanState *pstate) +{ + TidRangeScanState *node = castNode(TidRangeScanState, pstate); + + return ExecScan(&node->ss, + (ExecScanAccessMtd) TidRangeNext, + (ExecScanRecheckMtd) TidRangeRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanTidRangeScan(node) + * ---------------------------------------------------------------- + */ +void +ExecReScanTidRangeScan(TidRangeScanState *node) +{ + /* mark scan as not in progress, and tid range list as not computed yet */ + node->trss_inScan = false; + + /* + * We must wait until TidRangeNext before calling table_rescan_tidrange. + */ + ExecScanReScan(&node->ss); +} + +/* ---------------------------------------------------------------- + * ExecEndTidRangeScan + * + * Releases any storage allocated through C routines. + * Returns nothing. + * ---------------------------------------------------------------- + */ +void +ExecEndTidRangeScan(TidRangeScanState *node) +{ + TableScanDesc scan = node->ss.ss_currentScanDesc; + + if (scan != NULL) + table_endscan(scan); + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * clear out tuple table slots + */ + if (node->ss.ps.ps_ResultTupleSlot) + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); +} + +/* ---------------------------------------------------------------- + * ExecInitTidRangeScan + * + * Initializes the tid range scan's state information, creates + * scan keys, and opens the scan relation. + * + * Parameters: + * node: TidRangeScan node produced by the planner. + * estate: the execution state initialized in InitPlan. + * ---------------------------------------------------------------- + */ +TidRangeScanState * +ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags) +{ + TidRangeScanState *tidrangestate; + Relation currentRelation; + + /* + * create state structure + */ + tidrangestate = makeNode(TidRangeScanState); + tidrangestate->ss.ps.plan = (Plan *) node; + tidrangestate->ss.ps.state = estate; + tidrangestate->ss.ps.ExecProcNode = ExecTidRangeScan; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &tidrangestate->ss.ps); + + /* + * mark scan as not in progress, and TID range as not computed yet + */ + tidrangestate->trss_inScan = false; + + /* + * open the scan relation + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); + + tidrangestate->ss.ss_currentRelation = currentRelation; + tidrangestate->ss.ss_currentScanDesc = NULL; /* no table scan here */ + + /* + * get the scan type from the relation descriptor. + */ + ExecInitScanTupleSlot(estate, &tidrangestate->ss, + RelationGetDescr(currentRelation), + table_slot_callbacks(currentRelation)); + + /* + * Initialize result type and projection. + */ + ExecInitResultTypeTL(&tidrangestate->ss.ps); + ExecAssignScanProjectionInfo(&tidrangestate->ss); + + /* + * initialize child expressions + */ + tidrangestate->ss.ps.qual = + ExecInitQual(node->scan.plan.qual, (PlanState *) tidrangestate); + + TidExprListCreate(tidrangestate); + + /* + * all done. + */ + return tidrangestate; +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 65bbc18ecba..aaba1ec2c4a 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -585,6 +585,27 @@ _copyTidScan(const TidScan *from) return newnode; } +/* + * _copyTidRangeScan + */ +static TidRangeScan * +_copyTidRangeScan(const TidRangeScan *from) +{ + TidRangeScan *newnode = makeNode(TidRangeScan); + + /* + * copy node superclass fields + */ + CopyScanFields((const Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + COPY_NODE_FIELD(tidrangequals); + + return newnode; +} + /* * _copySubqueryScan */ @@ -4938,6 +4959,9 @@ copyObjectImpl(const void *from) case T_TidScan: retval = _copyTidScan(from); break; + case T_TidRangeScan: + retval = _copyTidRangeScan(from); + break; case T_SubqueryScan: retval = _copySubqueryScan(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index f5dcedf6e89..8fc432bfe17 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -608,6 +608,16 @@ _outTidScan(StringInfo str, const TidScan *node) WRITE_NODE_FIELD(tidquals); } +static void +_outTidRangeScan(StringInfo str, const TidRangeScan *node) +{ + WRITE_NODE_TYPE("TIDRANGESCAN"); + + _outScanInfo(str, (const Scan *) node); + + WRITE_NODE_FIELD(tidrangequals); +} + static void _outSubqueryScan(StringInfo str, const SubqueryScan *node) { @@ -2314,6 +2324,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_NODE_FIELD(subroot); WRITE_NODE_FIELD(subplan_params); WRITE_INT_FIELD(rel_parallel_workers); + WRITE_UINT_FIELD(amflags); WRITE_OID_FIELD(serverid); WRITE_OID_FIELD(userid); WRITE_BOOL_FIELD(useridiscurrent); @@ -3810,6 +3821,9 @@ outNode(StringInfo str, const void *obj) case T_TidScan: _outTidScan(str, obj); break; + case T_TidRangeScan: + _outTidRangeScan(str, obj); + break; case T_SubqueryScan: _outSubqueryScan(str, obj); break; diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index efb52858c88..4a6c3481623 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -374,6 +374,7 @@ RelOptInfo - a relation or joined relations IndexPath - index scan BitmapHeapPath - top of a bitmapped index scan TidPath - scan by CTID + TidRangePath - scan a contiguous range of CTIDs SubqueryScanPath - scan a subquery-in-FROM ForeignPath - scan a foreign table, foreign join or foreign upper-relation CustomPath - for custom scan providers diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index aab06c7d213..a25b674a192 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1283,6 +1283,101 @@ cost_tidscan(Path *path, PlannerInfo *root, path->total_cost = startup_cost + run_cost; } +/* + * cost_tidrangescan + * Determines and sets the costs of scanning a relation using a range of + * TIDs for 'path' + * + * 'baserel' is the relation to be scanned + * 'tidrangequals' is the list of TID-checkable range quals + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_tidrangescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, List *tidrangequals, + ParamPathInfo *param_info) +{ + Selectivity selectivity; + double pages; + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + QualCost tid_qual_cost; + double ntuples; + double nseqpages; + double spc_random_page_cost; + double spc_seq_page_cost; + + /* Should only be applied to base relations */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* Count how many tuples and pages we expect to scan */ + selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid, + JOIN_INNER, NULL); + pages = ceil(selectivity * baserel->pages); + + if (pages <= 0.0) + pages = 1.0; + + /* + * The first page in a range requires a random seek, but each subsequent + * page is just a normal sequential page read. NOTE: it's desirable for + * TID Range Scans to cost more than the equivalent Sequential Scans, + * because Seq Scans have some performance advantages such as scan + * synchronization and parallelizability, and we'd prefer one of them to + * be picked unless a TID Range Scan really is better. + */ + ntuples = selectivity * baserel->tuples; + nseqpages = pages - 1.0; + + if (!enable_tidscan) + startup_cost += disable_cost; + + /* + * The TID qual expressions will be computed once, any other baserestrict + * quals once per retrieved tuple. + */ + cost_qual_eval(&tid_qual_cost, tidrangequals, root); + + /* fetch estimated page cost for tablespace containing table */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + + /* disk costs; 1 random page and the remainder as seq pages */ + run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + /* + * XXX currently we assume TID quals are a subset of qpquals at this + * point; they will be removed (if possible) when we create the plan, so + * we subtract their cost from the total qpqual cost. (If the TID quals + * can't be removed, this is a mistake and we're going to underestimate + * the CPU cost a bit.) + */ + startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple - + tid_qual_cost.per_tuple; + run_cost += cpu_per_tuple * ntuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + /* * cost_subqueryscan * Determines and returns the cost of scanning a subquery RTE. diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 0845b460e2c..0725d950c54 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -2,9 +2,9 @@ * * tidpath.c * Routines to determine which TID conditions are usable for scanning - * a given relation, and create TidPaths accordingly. + * a given relation, and create TidPaths and TidRangePaths accordingly. * - * What we are looking for here is WHERE conditions of the form + * For TidPaths, we look for WHERE conditions of the form * "CTID = pseudoconstant", which can be implemented by just fetching * the tuple directly via heap_fetch(). We can also handle OR'd conditions * such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr @@ -23,6 +23,9 @@ * a function, but in practice it works better to keep the special node * representation all the way through to execution. * + * Additionally, TidRangePaths may be created for conditions of the form + * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=, and + * AND-clauses composed of such conditions. * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -63,14 +66,14 @@ IsCTIDVar(Var *var, RelOptInfo *rel) /* * Check to see if a RestrictInfo is of the form - * CTID = pseudoconstant + * CTID OP pseudoconstant * or - * pseudoconstant = CTID - * where the CTID Var belongs to relation "rel", and nothing on the - * other side of the clause does. + * pseudoconstant OP CTID + * where OP is a binary operation, the CTID Var belongs to relation "rel", + * and nothing on the other side of the clause does. */ static bool -IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) +IsBinaryTidClause(RestrictInfo *rinfo, RelOptInfo *rel) { OpExpr *node; Node *arg1, @@ -83,10 +86,9 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) return false; node = (OpExpr *) rinfo->clause; - /* Operator must be tideq */ - if (node->opno != TIDEqualOperator) + /* OpExpr must have two arguments */ + if (list_length(node->args) != 2) return false; - Assert(list_length(node->args) == 2); arg1 = linitial(node->args); arg2 = lsecond(node->args); @@ -116,6 +118,50 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) return true; /* success */ } +/* + * Check to see if a RestrictInfo is of the form + * CTID = pseudoconstant + * or + * pseudoconstant = CTID + * where the CTID Var belongs to relation "rel", and nothing on the + * other side of the clause does. + */ +static bool +IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel) +{ + if (!IsBinaryTidClause(rinfo, rel)) + return false; + + if (((OpExpr *) rinfo->clause)->opno == TIDEqualOperator) + return true; + + return false; +} + +/* + * Check to see if a RestrictInfo is of the form + * CTID OP pseudoconstant + * or + * pseudoconstant OP CTID + * where OP is a range operator such as <, <=, >, or >=, the CTID Var belongs + * to relation "rel", and nothing on the other side of the clause does. + */ +static bool +IsTidRangeClause(RestrictInfo *rinfo, RelOptInfo *rel) +{ + Oid opno; + + if (!IsBinaryTidClause(rinfo, rel)) + return false; + opno = ((OpExpr *) rinfo->clause)->opno; + + if (opno == TIDLessOperator || opno == TIDLessEqOperator || + opno == TIDGreaterOperator || opno == TIDGreaterEqOperator) + return true; + + return false; +} + /* * Check to see if a RestrictInfo is of the form * CTID = ANY (pseudoconstant_array) @@ -222,7 +268,7 @@ TidQualFromRestrictInfo(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel) * * Returns a List of CTID qual RestrictInfos for the specified rel (with * implicit OR semantics across the list), or NIL if there are no usable - * conditions. + * equality conditions. * * This function is just concerned with handling AND/OR recursion. */ @@ -301,6 +347,34 @@ TidQualFromRestrictInfoList(PlannerInfo *root, List *rlist, RelOptInfo *rel) return rlst; } +/* + * Extract a set of CTID range conditions from implicit-AND List of RestrictInfos + * + * Returns a List of CTID range qual RestrictInfos for the specified rel + * (with implicit AND semantics across the list), or NIL if there are no + * usable range conditions or if the rel's table AM does not support TID range + * scans. + */ +static List * +TidRangeQualFromRestrictInfoList(List *rlist, RelOptInfo *rel) +{ + List *rlst = NIL; + ListCell *l; + + if ((rel->amflags & AMFLAG_HAS_TID_RANGE) == 0) + return NIL; + + foreach(l, rlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (IsTidRangeClause(rinfo, rel)) + rlst = lappend(rlst, rinfo); + } + + return rlst; +} + /* * Given a list of join clauses involving our rel, create a parameterized * TidPath for each one that is a suitable TidEqual clause. @@ -385,6 +459,7 @@ void create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) { List *tidquals; + List *tidrangequals; /* * If any suitable quals exist in the rel's baserestrict list, generate a @@ -392,7 +467,7 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) */ tidquals = TidQualFromRestrictInfoList(root, rel->baserestrictinfo, rel); - if (tidquals) + if (tidquals != NIL) { /* * This path uses no join clauses, but it could still have required @@ -404,6 +479,26 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) required_outer)); } + /* + * If there are range quals in the baserestrict list, generate a + * TidRangePath. + */ + tidrangequals = TidRangeQualFromRestrictInfoList(rel->baserestrictinfo, + rel); + + if (tidrangequals != NIL) + { + /* + * This path uses no join clauses, but it could still have required + * parameterization due to LATERAL refs in its tlist. + */ + Relids required_outer = rel->lateral_relids; + + add_path(rel, (Path *) create_tidrangescan_path(root, rel, + tidrangequals, + required_outer)); + } + /* * Try to generate parameterized TidPaths using equality clauses extracted * from EquivalenceClasses. (This is important since simple "t1.ctid = diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 6c8305c977e..906cab70532 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -129,6 +129,10 @@ static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, static void bitmap_subplan_mark_shared(Plan *plan); static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path, List *tlist, List *scan_clauses); +static TidRangeScan *create_tidrangescan_plan(PlannerInfo *root, + TidRangePath *best_path, + List *tlist, + List *scan_clauses); static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path, List *tlist, List *scan_clauses); @@ -193,6 +197,8 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist, Index scanrelid); static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, List *tidquals); +static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual, + Index scanrelid, List *tidrangequals); static SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual, Index scanrelid, @@ -384,6 +390,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: + case T_TidRangeScan: case T_SubqueryScan: case T_FunctionScan: case T_TableFuncScan: @@ -679,6 +686,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) scan_clauses); break; + case T_TidRangeScan: + plan = (Plan *) create_tidrangescan_plan(root, + (TidRangePath *) best_path, + tlist, + scan_clauses); + break; + case T_SubqueryScan: plan = (Plan *) create_subqueryscan_plan(root, (SubqueryScanPath *) best_path, @@ -3436,6 +3450,71 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path, return scan_plan; } +/* + * create_tidrangescan_plan + * Returns a tidrangescan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static TidRangeScan * +create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path, + List *tlist, List *scan_clauses) +{ + TidRangeScan *scan_plan; + Index scan_relid = best_path->path.parent->relid; + List *tidrangequals = best_path->tidrangequals; + + /* it should be a base rel... */ + Assert(scan_relid > 0); + Assert(best_path->path.parent->rtekind == RTE_RELATION); + + /* + * The qpqual list must contain all restrictions not enforced by the + * tidrangequals list. tidrangequals has AND semantics, so we can simply + * remove any qual that appears in it. + */ + { + List *qpqual = NIL; + ListCell *l; + + foreach(l, scan_clauses) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); + + if (rinfo->pseudoconstant) + continue; /* we may drop pseudoconstants here */ + if (list_member_ptr(tidrangequals, rinfo)) + continue; /* simple duplicate */ + qpqual = lappend(qpqual, rinfo); + } + scan_clauses = qpqual; + } + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo lists to bare expressions; ignore pseudoconstants */ + tidrangequals = extract_actual_clauses(tidrangequals, false); + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->path.param_info) + { + tidrangequals = (List *) + replace_nestloop_params(root, (Node *) tidrangequals); + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_tidrangescan(tlist, + scan_clauses, + scan_relid, + tidrangequals); + + copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); + + return scan_plan; +} + /* * create_subqueryscan_plan * Returns a subqueryscan plan for the base relation scanned by 'best_path' @@ -5369,6 +5448,25 @@ make_tidscan(List *qptlist, return node; } +static TidRangeScan * +make_tidrangescan(List *qptlist, + List *qpqual, + Index scanrelid, + List *tidrangequals) +{ + TidRangeScan *node = makeNode(TidRangeScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->tidrangequals = tidrangequals; + + return node; +} + static SubqueryScan * make_subqueryscan(List *qptlist, List *qpqual, diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index c3c36be13e1..42f088ad714 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -619,6 +619,22 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) rtoffset, 1); } break; + case T_TidRangeScan: + { + TidRangeScan *splan = (TidRangeScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, + rtoffset, NUM_EXEC_TLIST(plan)); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, + rtoffset, NUM_EXEC_QUAL(plan)); + splan->tidrangequals = + fix_scan_list(root, splan->tidrangequals, + rtoffset, 1); + } + break; case T_SubqueryScan: /* Needs special treatment, see comments below */ return set_subqueryscan_references(root, diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 54ef61bfb35..f3e46e0959e 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2367,6 +2367,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_TidRangeScan: + finalize_primnode((Node *) ((TidRangeScan *) plan)->tidrangequals, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_SubqueryScan: { SubqueryScan *sscan = (SubqueryScan *) plan; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 9be0c4a6af5..69b83071cf2 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1203,6 +1203,35 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, return pathnode; } +/* + * create_tidrangescan_path + * Creates a path corresponding to a scan by a range of TIDs, returning + * the pathnode. + */ +TidRangePath * +create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel, + List *tidrangequals, Relids required_outer) +{ + TidRangePath *pathnode = makeNode(TidRangePath); + + pathnode->path.pathtype = T_TidRangeScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; /* always unordered */ + + pathnode->tidrangequals = tidrangequals; + + cost_tidrangescan(&pathnode->path, root, rel, tidrangequals, + pathnode->path.param_info); + + return pathnode; +} + /* * create_append_path * Creates a path corresponding to an Append plan, returning the diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 177e6e336ab..c5947fa4185 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -467,6 +467,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* Collect info about relation's foreign keys, if relevant */ get_relation_foreign_keys(root, rel, relation, inhparent); + /* Collect info about functions implemented by the rel's table AM. */ + if (relation->rd_tableam && + relation->rd_tableam->scan_set_tidrange != NULL && + relation->rd_tableam->scan_getnextslot_tidrange != NULL) + rel->amflags |= AMFLAG_HAS_TID_RANGE; + /* * Collect info about relation's partitioning scheme, if any. Only * inheritance parents may be partitioned. diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 731ff708b90..345c877aeb3 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -234,6 +234,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->subroot = NULL; rel->subplan_params = NIL; rel->rel_parallel_workers = -1; /* set up in get_relation_info */ + rel->amflags = 0; rel->serverid = InvalidOid; rel->userid = rte->checkAsUser; rel->useridiscurrent = false; @@ -646,6 +647,7 @@ build_join_rel(PlannerInfo *root, joinrel->subroot = NULL; joinrel->subplan_params = NIL; joinrel->rel_parallel_workers = -1; + joinrel->amflags = 0; joinrel->serverid = InvalidOid; joinrel->userid = InvalidOid; joinrel->useridiscurrent = false; @@ -826,6 +828,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->eclass_indexes = NULL; joinrel->subroot = NULL; joinrel->subplan_params = NIL; + joinrel->amflags = 0; joinrel->serverid = InvalidOid; joinrel->userid = InvalidOid; joinrel->useridiscurrent = false; diff --git a/src/backend/storage/page/itemptr.c b/src/backend/storage/page/itemptr.c index 55759c383b6..f40d6c22a02 100644 --- a/src/backend/storage/page/itemptr.c +++ b/src/backend/storage/page/itemptr.c @@ -71,3 +71,62 @@ ItemPointerCompare(ItemPointer arg1, ItemPointer arg2) else return 0; } + +/* + * ItemPointerInc + * Increment 'pointer' by 1 only paying attention to the ItemPointer's + * type's range limits and not MaxOffsetNumber and FirstOffsetNumber. + * This may result in 'pointer' becoming !OffsetNumberIsValid. + * + * If the pointer is already the maximum possible values permitted by the + * range of the ItemPointer's types, then do nothing. + */ +void +ItemPointerInc(ItemPointer pointer) +{ + BlockNumber blk = ItemPointerGetBlockNumberNoCheck(pointer); + OffsetNumber off = ItemPointerGetOffsetNumberNoCheck(pointer); + + if (off == PG_UINT16_MAX) + { + if (blk != InvalidBlockNumber) + { + off = 0; + blk++; + } + } + else + off++; + + ItemPointerSet(pointer, blk, off); +} + +/* + * ItemPointerDec + * Decrement 'pointer' by 1 only paying attention to the ItemPointer's + * type's range limits and not MaxOffsetNumber and FirstOffsetNumber. + * This may result in 'pointer' becoming !OffsetNumberIsValid. + * + * If the pointer is already the minimum possible values permitted by the + * range of the ItemPointer's types, then do nothing. This does rely on + * FirstOffsetNumber being 1 rather than 0. + */ +void +ItemPointerDec(ItemPointer pointer) +{ + BlockNumber blk = ItemPointerGetBlockNumberNoCheck(pointer); + OffsetNumber off = ItemPointerGetOffsetNumberNoCheck(pointer); + + if (off == 0) + { + if (blk != 0) + { + off = PG_UINT16_MAX; + blk--; + } + } + else + off--; + + ItemPointerSet(pointer, blk, off); +} diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 60e5cd3109b..bc0936bc2de 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -121,7 +121,11 @@ extern void heap_endscan(TableScanDesc scan); extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction); extern bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot); - +extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, + ItemPointer maxtid); +extern bool heap_getnextslot_tidrange(TableScanDesc sscan, + ScanDirection direction, + TupleTableSlot *slot); extern bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf); extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation, diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 005f3fdd2b8..0ef6d8edf7f 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -36,6 +36,10 @@ typedef struct TableScanDescData int rs_nkeys; /* number of scan keys */ struct ScanKeyData *rs_key; /* array of scan key descriptors */ + /* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */ + ItemPointerData rs_mintid; + ItemPointerData rs_maxtid; + /* * Information about type and behaviour of the scan, a bitmask of members * of the ScanOptions enum (see tableam.h). diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 33bffb6815b..414b6b4d578 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -49,18 +49,19 @@ typedef enum ScanOptions SO_TYPE_BITMAPSCAN = 1 << 1, SO_TYPE_SAMPLESCAN = 1 << 2, SO_TYPE_TIDSCAN = 1 << 3, - SO_TYPE_ANALYZE = 1 << 4, + SO_TYPE_TIDRANGESCAN = 1 << 4, + SO_TYPE_ANALYZE = 1 << 5, /* several of SO_ALLOW_* may be specified */ /* allow or disallow use of access strategy */ - SO_ALLOW_STRAT = 1 << 5, + SO_ALLOW_STRAT = 1 << 6, /* report location to syncscan logic? */ - SO_ALLOW_SYNC = 1 << 6, + SO_ALLOW_SYNC = 1 << 7, /* verify visibility page-at-a-time? */ - SO_ALLOW_PAGEMODE = 1 << 7, + SO_ALLOW_PAGEMODE = 1 << 8, /* unregister snapshot at scan end? */ - SO_TEMP_SNAPSHOT = 1 << 8 + SO_TEMP_SNAPSHOT = 1 << 9 } ScanOptions; /* @@ -325,6 +326,34 @@ typedef struct TableAmRoutine ScanDirection direction, TupleTableSlot *slot); + /*----------- + * Optional functions to provide scanning for ranges of ItemPointers. + * Implementations must either provide both of these functions, or neither + * of them. + * + * Implementations of scan_set_tidrange must themselves handle + * ItemPointers of any value. i.e, they must handle each of the following: + * + * 1) mintid or maxtid is beyond the end of the table; and + * 2) mintid is above maxtid; and + * 3) item offset for mintid or maxtid is beyond the maximum offset + * allowed by the AM. + * + * Implementations can assume that scan_set_tidrange is always called + * before can_getnextslot_tidrange or after scan_rescan and before any + * further calls to scan_getnextslot_tidrange. + */ + void (*scan_set_tidrange) (TableScanDesc scan, + ItemPointer mintid, + ItemPointer maxtid); + + /* + * Return next tuple from `scan` that's in the range of TIDs defined by + * scan_set_tidrange. + */ + bool (*scan_getnextslot_tidrange) (TableScanDesc scan, + ScanDirection direction, + TupleTableSlot *slot); /* ------------------------------------------------------------------------ * Parallel table scan related functions. @@ -1015,6 +1044,64 @@ table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableS return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot); } +/* ---------------------------------------------------------------------------- + * TID Range scanning related functions. + * ---------------------------------------------------------------------------- + */ + +/* + * table_beginscan_tidrange is the entry point for setting up a TableScanDesc + * for a TID range scan. + */ +static inline TableScanDesc +table_beginscan_tidrange(Relation rel, Snapshot snapshot, + ItemPointer mintid, + ItemPointer maxtid) +{ + TableScanDesc sscan; + uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE; + + sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags); + + /* Set the range of TIDs to scan */ + sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid); + + return sscan; +} + +/* + * table_rescan_tidrange resets the scan position and sets the minimum and + * maximum TID range to scan for a TableScanDesc created by + * table_beginscan_tidrange. + */ +static inline void +table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, + ItemPointer maxtid) +{ + /* Ensure table_beginscan_tidrange() was used. */ + Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0); + + sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false); + sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid); +} + +/* + * Fetch the next tuple from `sscan` for a TID range scan created by + * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true, + * or returns false if no more tuples exist in the range. + */ +static inline bool +table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, + TupleTableSlot *slot) +{ + /* Ensure table_beginscan_tidrange() was used. */ + Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0); + + return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan, + direction, + slot); +} + /* ---------------------------------------------------------------------------- * Parallel table scan related functions. diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 0d4eac8f963..85395a81eec 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -237,15 +237,15 @@ oprname => '<', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '>(tid,tid)', oprnegate => '>=(tid,tid)', oprcode => 'tidlt', oprrest => 'scalarltsel', oprjoin => 'scalarltjoinsel' }, -{ oid => '2800', descr => 'greater than', +{ oid => '2800', oid_symbol => 'TIDGreaterOperator', descr => 'greater than', oprname => '>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '<(tid,tid)', oprnegate => '<=(tid,tid)', oprcode => 'tidgt', oprrest => 'scalargtsel', oprjoin => 'scalargtjoinsel' }, -{ oid => '2801', descr => 'less than or equal', +{ oid => '2801', oid_symbol => 'TIDLessEqOperator', descr => 'less than or equal', oprname => '<=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '>=(tid,tid)', oprnegate => '>(tid,tid)', oprcode => 'tidle', oprrest => 'scalarlesel', oprjoin => 'scalarlejoinsel' }, -{ oid => '2802', descr => 'greater than or equal', +{ oid => '2802', oid_symbol => 'TIDGreaterEqOperator', descr => 'greater than or equal', oprname => '>=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool', oprcom => '<=(tid,tid)', oprnegate => '<(tid,tid)', oprcode => 'tidge', oprrest => 'scalargesel', oprjoin => 'scalargejoinsel' }, diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h new file mode 100644 index 00000000000..a57a47ed37f --- /dev/null +++ b/src/include/executor/nodeTidrangescan.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * + * nodeTidrangescan.h + * + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeTidrangescan.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODETIDRANGESCAN_H +#define NODETIDRANGESCAN_H + +#include "nodes/execnodes.h" + +extern TidRangeScanState *ExecInitTidRangeScan(TidRangeScan *node, + EState *estate, int eflags); +extern void ExecEndTidRangeScan(TidRangeScanState *node); +extern void ExecReScanTidRangeScan(TidRangeScanState *node); + +#endif /* NODETIDRANGESCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 943931f65d0..e31ad6204e6 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1624,6 +1624,24 @@ typedef struct TidScanState HeapTupleData tss_htup; } TidScanState; +/* ---------------- + * TidRangeScanState information + * + * trss_tidexprs list of TidOpExpr structs (see nodeTidrangescan.c) + * trss_mintid the lowest TID in the scan range + * trss_maxtid the highest TID in the scan range + * trss_inScan is a scan currently in progress? + * ---------------- + */ +typedef struct TidRangeScanState +{ + ScanState ss; /* its first field is NodeTag */ + List *trss_tidexprs; + ItemPointerData trss_mintid; + ItemPointerData trss_maxtid; + bool trss_inScan; +} TidRangeScanState; + /* ---------------- * SubqueryScanState information * diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 40ae489c235..e22df890ef4 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -59,6 +59,7 @@ typedef enum NodeTag T_BitmapIndexScan, T_BitmapHeapScan, T_TidScan, + T_TidRangeScan, T_SubqueryScan, T_FunctionScan, T_ValuesScan, @@ -116,6 +117,7 @@ typedef enum NodeTag T_BitmapIndexScanState, T_BitmapHeapScanState, T_TidScanState, + T_TidRangeScanState, T_SubqueryScanState, T_FunctionScanState, T_TableFuncScanState, @@ -229,6 +231,7 @@ typedef enum NodeTag T_BitmapAndPath, T_BitmapOrPath, T_TidPath, + T_TidRangePath, T_SubqueryScanPath, T_ForeignPath, T_CustomPath, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 0ec93e648c4..b8a6e0fc9f4 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -621,6 +621,10 @@ typedef struct PartitionSchemeData *PartitionScheme; * to simplify matching join clauses to those lists. *---------- */ + +/* Bitmask of flags supported by table AMs */ +#define AMFLAG_HAS_TID_RANGE (1 << 0) + typedef enum RelOptKind { RELOPT_BASEREL, @@ -710,6 +714,8 @@ typedef struct RelOptInfo PlannerInfo *subroot; /* if subquery */ List *subplan_params; /* if subquery */ int rel_parallel_workers; /* wanted number of parallel workers */ + uint32 amflags; /* Bitmask of optional features supported by + * the table AM */ /* Information about foreign tables and foreign joins */ Oid serverid; /* identifies server for the table or join */ @@ -1323,6 +1329,18 @@ typedef struct TidPath List *tidquals; /* qual(s) involving CTID = something */ } TidPath; +/* + * TidRangePath represents a scan by a continguous range of TIDs + * + * tidrangequals is an implicitly AND'ed list of qual expressions of the form + * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=. + */ +typedef struct TidRangePath +{ + Path path; + List *tidrangequals; +} TidRangePath; + /* * SubqueryScanPath represents a scan of an unflattened subquery-in-FROM * diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 43160439f05..6e62104d0b7 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -485,6 +485,19 @@ typedef struct TidScan List *tidquals; /* qual(s) involving CTID = something */ } TidScan; +/* ---------------- + * tid range scan node + * + * tidrangequals is an implicitly AND'ed list of qual expressions of the form + * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=. + * ---------------- + */ +typedef struct TidRangeScan +{ + Scan scan; + List *tidrangequals; /* qual(s) involving CTID op something */ +} TidRangeScan; + /* ---------------- * subquery scan node * diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index ed2e4af4be7..1be93be0983 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -83,6 +83,9 @@ extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root); extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec); extern void cost_tidscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info); +extern void cost_tidrangescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, List *tidrangequals, + ParamPathInfo *param_info); extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info); extern void cost_functionscan(Path *path, PlannerInfo *root, diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 8dfc36a4e15..54f4b782fc7 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -63,6 +63,10 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root, List *bitmapquals); extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, Relids required_outer); +extern TidRangePath *create_tidrangescan_path(PlannerInfo *root, + RelOptInfo *rel, + List *tidrangequals, + Relids required_outer); extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, List *pathkeys, Relids required_outer, diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h index 0e6990140b8..cd4b8fbacb2 100644 --- a/src/include/storage/itemptr.h +++ b/src/include/storage/itemptr.h @@ -202,5 +202,7 @@ typedef ItemPointerData *ItemPointer; extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2); extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2); +extern void ItemPointerInc(ItemPointer pointer); +extern void ItemPointerDec(ItemPointer pointer); #endif /* ITEMPTR_H */ diff --git a/src/test/regress/expected/tidrangescan.out b/src/test/regress/expected/tidrangescan.out new file mode 100644 index 00000000000..721f3b94e04 --- /dev/null +++ b/src/test/regress/expected/tidrangescan.out @@ -0,0 +1,300 @@ +-- tests for tidrangescans +SET enable_seqscan TO off; +CREATE TABLE tidrangescan(id integer, data text); +-- empty table +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid < '(1,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)'; + ctid +------ +(0 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid > '(9,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)'; + ctid +------ +(0 rows) + +-- insert enough tuples to fill at least two pages +INSERT INTO tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,200) AS s(i); +-- remove all tuples after the 10th tuple on each page. Trying to ensure +-- we get the same layout with all CPU architectures and smaller than standard +-- page sizes. +DELETE FROM tidrangescan +WHERE substring(ctid::text FROM ',(\d+)\)')::integer > 10 OR substring(ctid::text FROM '\((\d+),')::integer > 2; +VACUUM tidrangescan; +-- range scans with upper bound +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid < '(1,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; + ctid +-------- + (0,1) + (0,2) + (0,3) + (0,4) + (0,5) + (0,6) + (0,7) + (0,8) + (0,9) + (0,10) +(10 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)'; + QUERY PLAN +------------------------------------ + Tid Range Scan on tidrangescan + TID Cond: (ctid <= '(1,5)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)'; + ctid +-------- + (0,1) + (0,2) + (0,3) + (0,4) + (0,5) + (0,6) + (0,7) + (0,8) + (0,9) + (0,10) + (1,1) + (1,2) + (1,3) + (1,4) + (1,5) +(15 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid < '(0,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; + ctid +------ +(0 rows) + +-- range scans with lower bound +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid > '(2,8)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)'; + ctid +-------- + (2,9) + (2,10) +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: ('(2,8)'::tid < ctid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid; + ctid +-------- + (2,9) + (2,10) +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)'; + QUERY PLAN +------------------------------------ + Tid Range Scan on tidrangescan + TID Cond: (ctid >= '(2,8)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)'; + ctid +-------- + (2,8) + (2,9) + (2,10) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)'; + QUERY PLAN +-------------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid >= '(100,0)'::tid) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)'; + ctid +------ +(0 rows) + +-- range scans with both bounds +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid; + QUERY PLAN +---------------------------------------------------------------- + Tid Range Scan on tidrangescan + TID Cond: ((ctid > '(1,4)'::tid) AND ('(1,7)'::tid >= ctid)) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid; + ctid +------- + (1,5) + (1,6) + (1,7) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)'; + QUERY PLAN +---------------------------------------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (('(1,7)'::tid >= ctid) AND (ctid > '(1,4)'::tid)) +(2 rows) + +SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)'; + ctid +------- + (1,5) + (1,6) + (1,7) +(3 rows) + +-- extreme offsets +SELECT ctid FROM tidrangescan WHERE ctid > '(0,65535)' AND ctid < '(1,0)' LIMIT 1; + ctid +------ +(0 rows) + +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)' LIMIT 1; + ctid +------ +(0 rows) + +SELECT ctid FROM tidrangescan WHERE ctid > '(4294967295,65535)'; + ctid +------ +(0 rows) + +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; + ctid +------ +(0 rows) + +-- NULLs in the range cannot return tuples +SELECT ctid FROM tidrangescan WHERE ctid >= (SELECT NULL::tid); + ctid +------ +(0 rows) + +-- rescans +EXPLAIN (COSTS OFF) +SELECT t.ctid,t2.c FROM tidrangescan t, +LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 +WHERE t.ctid < '(1,0)'; + QUERY PLAN +----------------------------------------------- + Nested Loop + -> Tid Range Scan on tidrangescan t + TID Cond: (ctid < '(1,0)'::tid) + -> Aggregate + -> Tid Range Scan on tidrangescan t2 + TID Cond: (ctid <= t.ctid) +(6 rows) + +SELECT t.ctid,t2.c FROM tidrangescan t, +LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 +WHERE t.ctid < '(1,0)'; + ctid | c +--------+---- + (0,1) | 1 + (0,2) | 2 + (0,3) | 3 + (0,4) | 4 + (0,5) | 5 + (0,6) | 6 + (0,7) | 7 + (0,8) | 8 + (0,9) | 9 + (0,10) | 10 +(10 rows) + +-- cursors +-- Ensure we get a TID Range scan without a Materialize node. +EXPLAIN (COSTS OFF) +DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; + QUERY PLAN +----------------------------------- + Tid Range Scan on tidrangescan + TID Cond: (ctid < '(1,0)'::tid) +(2 rows) + +BEGIN; +DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; +FETCH NEXT c; + ctid +------- + (0,1) +(1 row) + +FETCH NEXT c; + ctid +------- + (0,2) +(1 row) + +FETCH PRIOR c; + ctid +------- + (0,1) +(1 row) + +FETCH FIRST c; + ctid +------- + (0,1) +(1 row) + +FETCH LAST c; + ctid +-------- + (0,10) +(1 row) + +COMMIT; +DROP TABLE tidrangescan; +RESET enable_seqscan; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 12bb67e4911..c77b0d7342f 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -80,7 +80,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview # ---------- # Another group of parallel tests # ---------- -test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tid tidscan collate.icu.utf8 incremental_sort +test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tid tidscan tidrangescan collate.icu.utf8 incremental_sort # rules cannot run concurrently with any test that creates # a view or rule in the public schema diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 59b416fd80c..0264a97324c 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -138,6 +138,7 @@ test: sysviews test: tsrf test: tid test: tidscan +test: tidrangescan test: collate.icu.utf8 test: rules test: psql diff --git a/src/test/regress/sql/tidrangescan.sql b/src/test/regress/sql/tidrangescan.sql new file mode 100644 index 00000000000..ac09ebb6262 --- /dev/null +++ b/src/test/regress/sql/tidrangescan.sql @@ -0,0 +1,101 @@ +-- tests for tidrangescans + +SET enable_seqscan TO off; +CREATE TABLE tidrangescan(id integer, data text); + +-- empty table +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)'; +SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)'; +SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)'; + +-- insert enough tuples to fill at least two pages +INSERT INTO tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,200) AS s(i); + +-- remove all tuples after the 10th tuple on each page. Trying to ensure +-- we get the same layout with all CPU architectures and smaller than standard +-- page sizes. +DELETE FROM tidrangescan +WHERE substring(ctid::text FROM ',(\d+)\)')::integer > 10 OR substring(ctid::text FROM '\((\d+),')::integer > 2; +VACUUM tidrangescan; + +-- range scans with upper bound +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; +SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)'; +SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; + +-- range scans with lower bound +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)'; +SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid; +SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)'; +SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)'; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)'; +SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)'; + +-- range scans with both bounds +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid; +SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid; + +EXPLAIN (COSTS OFF) +SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)'; +SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)'; + +-- extreme offsets +SELECT ctid FROM tidrangescan WHERE ctid > '(0,65535)' AND ctid < '(1,0)' LIMIT 1; +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)' LIMIT 1; + +SELECT ctid FROM tidrangescan WHERE ctid > '(4294967295,65535)'; +SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)'; + +-- NULLs in the range cannot return tuples +SELECT ctid FROM tidrangescan WHERE ctid >= (SELECT NULL::tid); + +-- rescans +EXPLAIN (COSTS OFF) +SELECT t.ctid,t2.c FROM tidrangescan t, +LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 +WHERE t.ctid < '(1,0)'; + +SELECT t.ctid,t2.c FROM tidrangescan t, +LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 +WHERE t.ctid < '(1,0)'; + +-- cursors + +-- Ensure we get a TID Range scan without a Materialize node. +EXPLAIN (COSTS OFF) +DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; + +BEGIN; +DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)'; +FETCH NEXT c; +FETCH NEXT c; +FETCH PRIOR c; +FETCH FIRST c; +FETCH LAST c; +COMMIT; + +DROP TABLE tidrangescan; + +RESET enable_seqscan;