mirror of
https://github.com/postgres/postgres.git
synced 2025-05-03 22:24:49 +03:00
tableam: Avoid relying on relation size to determine validity of tids.
Instead add a tableam callback to do so. To avoid adding per validation overhead, pass a scan to tuple_tid_valid. In heap's case we'd otherwise incurred a RelationGetNumberOfBlocks() call for each tid - which'd have added noticable overhead to nodeTidscan.c. Author: Andres Freund Reviewed-By: Ashwin Agrawal Discussion: https://postgr.es/m/20190515185447.gno2jtqxyktylyvs@alap3.anarazel.de
This commit is contained in:
parent
7f44ede594
commit
147e3722f7
@ -1654,8 +1654,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
|
|||||||
/*
|
/*
|
||||||
* heap_get_latest_tid - get the latest tid of a specified tuple
|
* heap_get_latest_tid - get the latest tid of a specified tuple
|
||||||
*
|
*
|
||||||
* Actually, this gets the latest version that is visible according to
|
* Actually, this gets the latest version that is visible according to the
|
||||||
* the passed snapshot. You can pass SnapshotDirty to get the very latest,
|
* scan's snapshot. Create a scan using SnapshotDirty to get the very latest,
|
||||||
* possibly uncommitted version.
|
* possibly uncommitted version.
|
||||||
*
|
*
|
||||||
* *tid is both an input and an output parameter: it is updated to
|
* *tid is both an input and an output parameter: it is updated to
|
||||||
@ -1663,28 +1663,20 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
|
|||||||
* if no version of the row passes the snapshot test.
|
* if no version of the row passes the snapshot test.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
heap_get_latest_tid(Relation relation,
|
heap_get_latest_tid(TableScanDesc sscan,
|
||||||
Snapshot snapshot,
|
|
||||||
ItemPointer tid)
|
ItemPointer tid)
|
||||||
{
|
{
|
||||||
BlockNumber blk;
|
Relation relation = sscan->rs_rd;
|
||||||
|
Snapshot snapshot = sscan->rs_snapshot;
|
||||||
ItemPointerData ctid;
|
ItemPointerData ctid;
|
||||||
TransactionId priorXmax;
|
TransactionId priorXmax;
|
||||||
|
|
||||||
/* this is to avoid Assert failures on bad input */
|
|
||||||
if (!ItemPointerIsValid(tid))
|
|
||||||
return;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since this can be called with user-supplied TID, don't trust the input
|
* table_get_latest_tid verified that the passed in tid is valid. Assume
|
||||||
* too much. (RelationGetNumberOfBlocks is an expensive check, so we
|
* that t_ctid links are valid however - there shouldn't be invalid ones
|
||||||
* don't check t_ctid links again this way. Note that it would not do to
|
* in the table.
|
||||||
* call it just once and save the result, either.)
|
|
||||||
*/
|
*/
|
||||||
blk = ItemPointerGetBlockNumber(tid);
|
Assert(ItemPointerIsValid(tid));
|
||||||
if (blk >= RelationGetNumberOfBlocks(relation))
|
|
||||||
elog(ERROR, "block number %u is out of range for relation \"%s\"",
|
|
||||||
blk, RelationGetRelationName(relation));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
|
* Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
|
||||||
|
@ -204,6 +204,15 @@ heapam_fetch_row_version(Relation relation,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
|
||||||
|
{
|
||||||
|
HeapScanDesc hscan = (HeapScanDesc) scan;
|
||||||
|
|
||||||
|
return ItemPointerIsValid(tid) &&
|
||||||
|
ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
|
heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
|
||||||
Snapshot snapshot)
|
Snapshot snapshot)
|
||||||
@ -2568,6 +2577,7 @@ static const TableAmRoutine heapam_methods = {
|
|||||||
|
|
||||||
.tuple_fetch_row_version = heapam_fetch_row_version,
|
.tuple_fetch_row_version = heapam_fetch_row_version,
|
||||||
.tuple_get_latest_tid = heap_get_latest_tid,
|
.tuple_get_latest_tid = heap_get_latest_tid,
|
||||||
|
.tuple_tid_valid = heapam_tuple_tid_valid,
|
||||||
.tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
|
.tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
|
||||||
.compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
|
.compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
|
||||||
|
|
||||||
|
@ -213,6 +213,33 @@ table_index_fetch_tuple_check(Relation rel,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------------
|
||||||
|
* Functions for non-modifying operations on individual tuples
|
||||||
|
* ------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
table_get_latest_tid(TableScanDesc scan, ItemPointer tid)
|
||||||
|
{
|
||||||
|
Relation rel = scan->rs_rd;
|
||||||
|
const TableAmRoutine *tableam = rel->rd_tableam;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since this can be called with user-supplied TID, don't trust the input
|
||||||
|
* too much.
|
||||||
|
*/
|
||||||
|
if (!tableam->tuple_tid_valid(scan, tid))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("tid (%u, %u) is not valid for relation for relation \"%s\"",
|
||||||
|
ItemPointerGetBlockNumberNoCheck(tid),
|
||||||
|
ItemPointerGetOffsetNumberNoCheck(tid),
|
||||||
|
RelationGetRelationName(rel))));
|
||||||
|
|
||||||
|
return tableam->tuple_get_latest_tid(scan, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------------
|
||||||
* Functions to make modifications a bit simpler.
|
* Functions to make modifications a bit simpler.
|
||||||
* ----------------------------------------------------------------------------
|
* ----------------------------------------------------------------------------
|
||||||
|
@ -129,19 +129,23 @@ static void
|
|||||||
TidListEval(TidScanState *tidstate)
|
TidListEval(TidScanState *tidstate)
|
||||||
{
|
{
|
||||||
ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
|
ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
|
||||||
BlockNumber nblocks;
|
TableScanDesc scan;
|
||||||
ItemPointerData *tidList;
|
ItemPointerData *tidList;
|
||||||
int numAllocTids;
|
int numAllocTids;
|
||||||
int numTids;
|
int numTids;
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We silently discard any TIDs that are out of range at the time of scan
|
* Start scan on-demand - initializing a scan isn't free (e.g. heap stats
|
||||||
* start. (Since we hold at least AccessShareLock on the table, it won't
|
* the size of the table), so it makes sense to delay that until needed -
|
||||||
* be possible for someone to truncate away the blocks we intend to
|
* the node might never get executed.
|
||||||
* visit.)
|
|
||||||
*/
|
*/
|
||||||
nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
|
if (tidstate->ss.ss_currentScanDesc == NULL)
|
||||||
|
tidstate->ss.ss_currentScanDesc =
|
||||||
|
table_beginscan(tidstate->ss.ss_currentRelation,
|
||||||
|
tidstate->ss.ps.state->es_snapshot,
|
||||||
|
0, NULL);
|
||||||
|
scan = tidstate->ss.ss_currentScanDesc;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We initialize the array with enough slots for the case that all quals
|
* We initialize the array with enough slots for the case that all quals
|
||||||
@ -165,19 +169,27 @@ TidListEval(TidScanState *tidstate)
|
|||||||
DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
|
DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
|
||||||
econtext,
|
econtext,
|
||||||
&isNull));
|
&isNull));
|
||||||
if (!isNull &&
|
if (isNull)
|
||||||
ItemPointerIsValid(itemptr) &&
|
continue;
|
||||||
ItemPointerGetBlockNumber(itemptr) < nblocks)
|
|
||||||
|
/*
|
||||||
|
* We silently discard any TIDs that the AM considers invalid
|
||||||
|
* (E.g. for heap, they could be out of range at the time of scan
|
||||||
|
* start. Since we hold at least AccessShareLock on the table, it
|
||||||
|
* won't be possible for someone to truncate away the blocks we
|
||||||
|
* intend to visit.).
|
||||||
|
*/
|
||||||
|
if (!table_tuple_tid_valid(scan, itemptr))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (numTids >= numAllocTids)
|
||||||
{
|
{
|
||||||
if (numTids >= numAllocTids)
|
numAllocTids *= 2;
|
||||||
{
|
tidList = (ItemPointerData *)
|
||||||
numAllocTids *= 2;
|
repalloc(tidList,
|
||||||
tidList = (ItemPointerData *)
|
numAllocTids * sizeof(ItemPointerData));
|
||||||
repalloc(tidList,
|
|
||||||
numAllocTids * sizeof(ItemPointerData));
|
|
||||||
}
|
|
||||||
tidList[numTids++] = *itemptr;
|
|
||||||
}
|
}
|
||||||
|
tidList[numTids++] = *itemptr;
|
||||||
}
|
}
|
||||||
else if (tidexpr->exprstate && tidexpr->isarray)
|
else if (tidexpr->exprstate && tidexpr->isarray)
|
||||||
{
|
{
|
||||||
@ -206,13 +218,15 @@ TidListEval(TidScanState *tidstate)
|
|||||||
}
|
}
|
||||||
for (i = 0; i < ndatums; i++)
|
for (i = 0; i < ndatums; i++)
|
||||||
{
|
{
|
||||||
if (!ipnulls[i])
|
if (ipnulls[i])
|
||||||
{
|
continue;
|
||||||
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
|
|
||||||
if (ItemPointerIsValid(itemptr) &&
|
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
|
||||||
ItemPointerGetBlockNumber(itemptr) < nblocks)
|
|
||||||
tidList[numTids++] = *itemptr;
|
if (!table_tuple_tid_valid(scan, itemptr))
|
||||||
}
|
continue;
|
||||||
|
|
||||||
|
tidList[numTids++] = *itemptr;
|
||||||
}
|
}
|
||||||
pfree(ipdatums);
|
pfree(ipdatums);
|
||||||
pfree(ipnulls);
|
pfree(ipnulls);
|
||||||
@ -306,6 +320,7 @@ TidNext(TidScanState *node)
|
|||||||
EState *estate;
|
EState *estate;
|
||||||
ScanDirection direction;
|
ScanDirection direction;
|
||||||
Snapshot snapshot;
|
Snapshot snapshot;
|
||||||
|
TableScanDesc scan;
|
||||||
Relation heapRelation;
|
Relation heapRelation;
|
||||||
TupleTableSlot *slot;
|
TupleTableSlot *slot;
|
||||||
ItemPointerData *tidList;
|
ItemPointerData *tidList;
|
||||||
@ -327,6 +342,7 @@ TidNext(TidScanState *node)
|
|||||||
if (node->tss_TidList == NULL)
|
if (node->tss_TidList == NULL)
|
||||||
TidListEval(node);
|
TidListEval(node);
|
||||||
|
|
||||||
|
scan = node->ss.ss_currentScanDesc;
|
||||||
tidList = node->tss_TidList;
|
tidList = node->tss_TidList;
|
||||||
numTids = node->tss_NumTids;
|
numTids = node->tss_NumTids;
|
||||||
|
|
||||||
@ -365,7 +381,7 @@ TidNext(TidScanState *node)
|
|||||||
* current according to our snapshot.
|
* current according to our snapshot.
|
||||||
*/
|
*/
|
||||||
if (node->tss_isCurrentOf)
|
if (node->tss_isCurrentOf)
|
||||||
table_get_latest_tid(heapRelation, snapshot, &tid);
|
table_get_latest_tid(scan, &tid);
|
||||||
|
|
||||||
if (table_fetch_row_version(heapRelation, &tid, snapshot, slot))
|
if (table_fetch_row_version(heapRelation, &tid, snapshot, slot))
|
||||||
return slot;
|
return slot;
|
||||||
@ -442,6 +458,10 @@ ExecReScanTidScan(TidScanState *node)
|
|||||||
node->tss_NumTids = 0;
|
node->tss_NumTids = 0;
|
||||||
node->tss_TidPtr = -1;
|
node->tss_TidPtr = -1;
|
||||||
|
|
||||||
|
/* not really necessary, but seems good form */
|
||||||
|
if (node->ss.ss_currentScanDesc)
|
||||||
|
table_rescan(node->ss.ss_currentScanDesc, NULL);
|
||||||
|
|
||||||
ExecScanReScan(&node->ss);
|
ExecScanReScan(&node->ss);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -455,6 +475,9 @@ ExecReScanTidScan(TidScanState *node)
|
|||||||
void
|
void
|
||||||
ExecEndTidScan(TidScanState *node)
|
ExecEndTidScan(TidScanState *node)
|
||||||
{
|
{
|
||||||
|
if (node->ss.ss_currentScanDesc)
|
||||||
|
table_endscan(node->ss.ss_currentScanDesc);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Free the exprcontext
|
* Free the exprcontext
|
||||||
*/
|
*/
|
||||||
|
@ -358,6 +358,7 @@ currtid_byreloid(PG_FUNCTION_ARGS)
|
|||||||
Relation rel;
|
Relation rel;
|
||||||
AclResult aclresult;
|
AclResult aclresult;
|
||||||
Snapshot snapshot;
|
Snapshot snapshot;
|
||||||
|
TableScanDesc scan;
|
||||||
|
|
||||||
result = (ItemPointer) palloc(sizeof(ItemPointerData));
|
result = (ItemPointer) palloc(sizeof(ItemPointerData));
|
||||||
if (!reloid)
|
if (!reloid)
|
||||||
@ -380,7 +381,9 @@ currtid_byreloid(PG_FUNCTION_ARGS)
|
|||||||
ItemPointerCopy(tid, result);
|
ItemPointerCopy(tid, result);
|
||||||
|
|
||||||
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
||||||
table_get_latest_tid(rel, snapshot, result);
|
scan = table_beginscan(rel, snapshot, 0, NULL);
|
||||||
|
table_get_latest_tid(scan, result);
|
||||||
|
table_endscan(scan);
|
||||||
UnregisterSnapshot(snapshot);
|
UnregisterSnapshot(snapshot);
|
||||||
|
|
||||||
table_close(rel, AccessShareLock);
|
table_close(rel, AccessShareLock);
|
||||||
@ -398,6 +401,7 @@ currtid_byrelname(PG_FUNCTION_ARGS)
|
|||||||
Relation rel;
|
Relation rel;
|
||||||
AclResult aclresult;
|
AclResult aclresult;
|
||||||
Snapshot snapshot;
|
Snapshot snapshot;
|
||||||
|
TableScanDesc scan;
|
||||||
|
|
||||||
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
|
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
|
||||||
rel = table_openrv(relrv, AccessShareLock);
|
rel = table_openrv(relrv, AccessShareLock);
|
||||||
@ -415,7 +419,9 @@ currtid_byrelname(PG_FUNCTION_ARGS)
|
|||||||
ItemPointerCopy(tid, result);
|
ItemPointerCopy(tid, result);
|
||||||
|
|
||||||
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
||||||
table_get_latest_tid(rel, snapshot, result);
|
scan = table_beginscan(rel, snapshot, 0, NULL);
|
||||||
|
table_get_latest_tid(scan, result);
|
||||||
|
table_endscan(scan);
|
||||||
UnregisterSnapshot(snapshot);
|
UnregisterSnapshot(snapshot);
|
||||||
|
|
||||||
table_close(rel, AccessShareLock);
|
table_close(rel, AccessShareLock);
|
||||||
|
@ -134,8 +134,7 @@ extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
|
|||||||
Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
|
Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
|
||||||
bool *all_dead, bool first_call);
|
bool *all_dead, bool first_call);
|
||||||
|
|
||||||
extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
|
extern void heap_get_latest_tid(TableScanDesc scan, ItemPointer tid);
|
||||||
ItemPointer tid);
|
|
||||||
extern void setLastTid(const ItemPointer tid);
|
extern void setLastTid(const ItemPointer tid);
|
||||||
|
|
||||||
extern BulkInsertState GetBulkInsertState(void);
|
extern BulkInsertState GetBulkInsertState(void);
|
||||||
|
@ -308,12 +308,17 @@ typedef struct TableAmRoutine
|
|||||||
Snapshot snapshot,
|
Snapshot snapshot,
|
||||||
TupleTableSlot *slot);
|
TupleTableSlot *slot);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Is tid valid for a scan of this relation.
|
||||||
|
*/
|
||||||
|
bool (*tuple_tid_valid) (TableScanDesc scan,
|
||||||
|
ItemPointer tid);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the latest version of the tuple at `tid`, by updating `tid` to
|
* Return the latest version of the tuple at `tid`, by updating `tid` to
|
||||||
* point at the newest version.
|
* point at the newest version.
|
||||||
*/
|
*/
|
||||||
void (*tuple_get_latest_tid) (Relation rel,
|
void (*tuple_get_latest_tid) (TableScanDesc scan,
|
||||||
Snapshot snapshot,
|
|
||||||
ItemPointer tid);
|
ItemPointer tid);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -548,10 +553,10 @@ typedef struct TableAmRoutine
|
|||||||
/*
|
/*
|
||||||
* See table_relation_size().
|
* See table_relation_size().
|
||||||
*
|
*
|
||||||
* Note that currently a few callers use the MAIN_FORKNUM size to vet the
|
* Note that currently a few callers use the MAIN_FORKNUM size to figure
|
||||||
* validity of tids (e.g. nodeTidscans.c), and others use it to figure out
|
* out the range of potentially interesting blocks (brin, analyze). It's
|
||||||
* the range of potentially interesting blocks (brin, analyze). The
|
* probable that we'll need to revise the interface for those at some
|
||||||
* abstraction around this will need to be improved in the near future.
|
* point.
|
||||||
*/
|
*/
|
||||||
uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
|
uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
|
||||||
|
|
||||||
@ -985,15 +990,25 @@ table_fetch_row_version(Relation rel,
|
|||||||
return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
|
return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Verify that `tid` is a potentially valid tuple identifier. That doesn't
|
||||||
|
* mean that the pointed to row needs to exist or be visible, but that
|
||||||
|
* attempting to fetch the row (e.g. with table_get_latest_tid() or
|
||||||
|
* table_fetch_row_version()) should not error out if called with that tid.
|
||||||
|
*
|
||||||
|
* `scan` needs to have been started via table_beginscan().
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
|
||||||
|
{
|
||||||
|
return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the latest version of the tuple at `tid`, by updating `tid` to
|
* Return the latest version of the tuple at `tid`, by updating `tid` to
|
||||||
* point at the newest version.
|
* point at the newest version.
|
||||||
*/
|
*/
|
||||||
static inline void
|
extern void table_get_latest_tid(TableScanDesc scan, ItemPointer tid);
|
||||||
table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid)
|
|
||||||
{
|
|
||||||
rel->rd_tableam->tuple_get_latest_tid(rel, snapshot, tid);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return true iff tuple in slot satisfies the snapshot.
|
* Return true iff tuple in slot satisfies the snapshot.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user