mirror of
https://github.com/postgres/postgres.git
synced 2025-06-23 14:01:44 +03:00
Reduce the number of pallocs() in BRIN
Instead of allocating memory in brin_deform_tuple and brin_copy_tuple over and over during a scan, allow reuse of previously allocated memory. This is said to make for a measurable performance improvement. Author: Jinyu Zhang, Álvaro Herrera Reviewed by: Tomas Vondra Discussion: https://postgr.es/m/495deb78.4186.1500dacaa63.Coremail.beijing_pg@163.com
This commit is contained in:
@ -226,7 +226,8 @@ brin_page_items(PG_FUNCTION_ARGS)
|
|||||||
if (ItemIdIsUsed(itemId))
|
if (ItemIdIsUsed(itemId))
|
||||||
{
|
{
|
||||||
dtup = brin_deform_tuple(bdesc,
|
dtup = brin_deform_tuple(bdesc,
|
||||||
(BrinTuple *) PageGetItem(page, itemId));
|
(BrinTuple *) PageGetItem(page, itemId),
|
||||||
|
NULL);
|
||||||
attno = 1;
|
attno = 1;
|
||||||
unusedItem = false;
|
unusedItem = false;
|
||||||
}
|
}
|
||||||
|
@ -217,7 +217,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
|
|||||||
MemoryContextSwitchTo(tupcxt);
|
MemoryContextSwitchTo(tupcxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
dtup = brin_deform_tuple(bdesc, brtup);
|
dtup = brin_deform_tuple(bdesc, brtup, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compare the key values of the new tuple to the stored index values;
|
* Compare the key values of the new tuple to the stored index values;
|
||||||
@ -268,7 +268,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
|
|||||||
* re-acquiring the lock.
|
* re-acquiring the lock.
|
||||||
*/
|
*/
|
||||||
origsz = ItemIdGetLength(lp);
|
origsz = ItemIdGetLength(lp);
|
||||||
origtup = brin_copy_tuple(brtup, origsz);
|
origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before releasing the lock, check if we can attempt a same-page
|
* Before releasing the lock, check if we can attempt a same-page
|
||||||
@ -363,6 +363,9 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|||||||
FmgrInfo *consistentFn;
|
FmgrInfo *consistentFn;
|
||||||
MemoryContext oldcxt;
|
MemoryContext oldcxt;
|
||||||
MemoryContext perRangeCxt;
|
MemoryContext perRangeCxt;
|
||||||
|
BrinMemTuple *dtup;
|
||||||
|
BrinTuple *btup = NULL;
|
||||||
|
Size btupsz = 0;
|
||||||
|
|
||||||
opaque = (BrinOpaque *) scan->opaque;
|
opaque = (BrinOpaque *) scan->opaque;
|
||||||
bdesc = opaque->bo_bdesc;
|
bdesc = opaque->bo_bdesc;
|
||||||
@ -384,6 +387,9 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|||||||
*/
|
*/
|
||||||
consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
|
consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);
|
||||||
|
|
||||||
|
/* allocate an initial in-memory tuple, out of the per-range memcxt */
|
||||||
|
dtup = brin_new_memtuple(bdesc);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Setup and use a per-range memory context, which is reset every time we
|
* Setup and use a per-range memory context, which is reset every time we
|
||||||
* loop below. This avoids having to free the tuples within the loop.
|
* loop below. This avoids having to free the tuples within the loop.
|
||||||
@ -401,6 +407,7 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|||||||
for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
|
for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
|
||||||
{
|
{
|
||||||
bool addrange;
|
bool addrange;
|
||||||
|
bool gottuple = false;
|
||||||
BrinTuple *tup;
|
BrinTuple *tup;
|
||||||
OffsetNumber off;
|
OffsetNumber off;
|
||||||
Size size;
|
Size size;
|
||||||
@ -414,7 +421,8 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|||||||
scan->xs_snapshot);
|
scan->xs_snapshot);
|
||||||
if (tup)
|
if (tup)
|
||||||
{
|
{
|
||||||
tup = brin_copy_tuple(tup, size);
|
gottuple = true;
|
||||||
|
btup = brin_copy_tuple(tup, size, btup, &btupsz);
|
||||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -422,15 +430,13 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|||||||
* For page ranges with no indexed tuple, we must return the whole
|
* For page ranges with no indexed tuple, we must return the whole
|
||||||
* range; otherwise, compare it to the scan keys.
|
* range; otherwise, compare it to the scan keys.
|
||||||
*/
|
*/
|
||||||
if (tup == NULL)
|
if (!gottuple)
|
||||||
{
|
{
|
||||||
addrange = true;
|
addrange = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
BrinMemTuple *dtup;
|
dtup = brin_deform_tuple(bdesc, btup, dtup);
|
||||||
|
|
||||||
dtup = brin_deform_tuple(bdesc, tup);
|
|
||||||
if (dtup->bt_placeholder)
|
if (dtup->bt_placeholder)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -1210,7 +1216,7 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
|
|||||||
/* the placeholder tuple must exist */
|
/* the placeholder tuple must exist */
|
||||||
if (phtup == NULL)
|
if (phtup == NULL)
|
||||||
elog(ERROR, "missing placeholder tuple");
|
elog(ERROR, "missing placeholder tuple");
|
||||||
phtup = brin_copy_tuple(phtup, phsz);
|
phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
|
||||||
LockBuffer(phbuf, BUFFER_LOCK_UNLOCK);
|
LockBuffer(phbuf, BUFFER_LOCK_UNLOCK);
|
||||||
|
|
||||||
/* merge it into the tuple from the heap scan */
|
/* merge it into the tuple from the heap scan */
|
||||||
@ -1358,7 +1364,7 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
|
|||||||
"brin union",
|
"brin union",
|
||||||
ALLOCSET_DEFAULT_SIZES);
|
ALLOCSET_DEFAULT_SIZES);
|
||||||
oldcxt = MemoryContextSwitchTo(cxt);
|
oldcxt = MemoryContextSwitchTo(cxt);
|
||||||
db = brin_deform_tuple(bdesc, b);
|
db = brin_deform_tuple(bdesc, b, NULL);
|
||||||
MemoryContextSwitchTo(oldcxt);
|
MemoryContextSwitchTo(oldcxt);
|
||||||
|
|
||||||
for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
|
for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
|
||||||
|
@ -548,6 +548,8 @@ brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
|
|||||||
OffsetNumber off;
|
OffsetNumber off;
|
||||||
OffsetNumber maxoff;
|
OffsetNumber maxoff;
|
||||||
Page page;
|
Page page;
|
||||||
|
BrinTuple *btup = NULL;
|
||||||
|
Size btupsz = 0;
|
||||||
|
|
||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
|
|
||||||
@ -567,7 +569,7 @@ brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
|
|||||||
{
|
{
|
||||||
sz = ItemIdGetLength(lp);
|
sz = ItemIdGetLength(lp);
|
||||||
tup = (BrinTuple *) PageGetItem(page, lp);
|
tup = (BrinTuple *) PageGetItem(page, lp);
|
||||||
tup = brin_copy_tuple(tup, sz);
|
tup = brin_copy_tuple(tup, sz, btup, &btupsz);
|
||||||
|
|
||||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||||
|
|
||||||
|
@ -311,17 +311,26 @@ brin_free_tuple(BrinTuple *tuple)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a palloc'd copy of a BrinTuple.
|
* Given a brin tuple of size len, create a copy of it. If 'dest' is not
|
||||||
|
* NULL, its size is destsz, and can be used as output buffer; if the tuple
|
||||||
|
* to be copied does not fit, it is enlarged by repalloc, and the size is
|
||||||
|
* updated to match. This avoids palloc/free cycles when many brin tuples
|
||||||
|
* are being processed in loops.
|
||||||
*/
|
*/
|
||||||
BrinTuple *
|
BrinTuple *
|
||||||
brin_copy_tuple(BrinTuple *tuple, Size len)
|
brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
|
||||||
{
|
{
|
||||||
BrinTuple *newtup;
|
if (!destsz || *destsz == 0)
|
||||||
|
dest = palloc(len);
|
||||||
|
else if (len > *destsz)
|
||||||
|
{
|
||||||
|
dest = repalloc(dest, len);
|
||||||
|
*destsz = len;
|
||||||
|
}
|
||||||
|
|
||||||
newtup = palloc(len);
|
memcpy(dest, tuple, len);
|
||||||
memcpy(newtup, tuple, len);
|
|
||||||
|
|
||||||
return newtup;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -348,54 +357,69 @@ BrinMemTuple *
|
|||||||
brin_new_memtuple(BrinDesc *brdesc)
|
brin_new_memtuple(BrinDesc *brdesc)
|
||||||
{
|
{
|
||||||
BrinMemTuple *dtup;
|
BrinMemTuple *dtup;
|
||||||
char *currdatum;
|
|
||||||
long basesize;
|
long basesize;
|
||||||
int i;
|
|
||||||
|
|
||||||
basesize = MAXALIGN(sizeof(BrinMemTuple) +
|
basesize = MAXALIGN(sizeof(BrinMemTuple) +
|
||||||
sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
|
sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
|
||||||
dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
|
dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
|
||||||
currdatum = (char *) dtup + basesize;
|
|
||||||
for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
|
dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
|
||||||
{
|
dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
|
||||||
dtup->bt_columns[i].bv_attno = i + 1;
|
dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
|
||||||
dtup->bt_columns[i].bv_allnulls = true;
|
|
||||||
dtup->bt_columns[i].bv_hasnulls = false;
|
|
||||||
dtup->bt_columns[i].bv_values = (Datum *) currdatum;
|
|
||||||
currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
|
|
||||||
}
|
|
||||||
|
|
||||||
dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
|
dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
|
||||||
"brin dtuple",
|
"brin dtuple",
|
||||||
ALLOCSET_DEFAULT_SIZES);
|
ALLOCSET_DEFAULT_SIZES);
|
||||||
|
|
||||||
|
brin_memtuple_initialize(dtup, brdesc);
|
||||||
|
|
||||||
return dtup;
|
return dtup;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reset a BrinMemTuple to initial state
|
* Reset a BrinMemTuple to initial state. We return the same tuple, for
|
||||||
|
* notational convenience.
|
||||||
*/
|
*/
|
||||||
void
|
BrinMemTuple *
|
||||||
brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
|
brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
char *currdatum;
|
||||||
|
|
||||||
MemoryContextReset(dtuple->bt_context);
|
MemoryContextReset(dtuple->bt_context);
|
||||||
|
|
||||||
|
currdatum = (char *) dtuple +
|
||||||
|
MAXALIGN(sizeof(BrinMemTuple) +
|
||||||
|
sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
|
||||||
for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
|
for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
|
||||||
{
|
{
|
||||||
dtuple->bt_columns[i].bv_allnulls = true;
|
dtuple->bt_columns[i].bv_allnulls = true;
|
||||||
dtuple->bt_columns[i].bv_hasnulls = false;
|
dtuple->bt_columns[i].bv_hasnulls = false;
|
||||||
|
|
||||||
|
dtuple->bt_columns[i].bv_attno = i + 1;
|
||||||
|
dtuple->bt_columns[i].bv_allnulls = true;
|
||||||
|
dtuple->bt_columns[i].bv_hasnulls = false;
|
||||||
|
dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
|
||||||
|
currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return dtuple;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convert a BrinTuple back to a BrinMemTuple. This is the reverse of
|
* Convert a BrinTuple back to a BrinMemTuple. This is the reverse of
|
||||||
* brin_form_tuple.
|
* brin_form_tuple.
|
||||||
*
|
*
|
||||||
|
* As an optimization, the caller can pass a previously allocated 'dMemtuple'.
|
||||||
|
* This avoids having to allocate it here, which can be useful when this
|
||||||
|
* function is called many times in a loop. It is caller's responsibility
|
||||||
|
* that the given BrinMemTuple matches what we need here.
|
||||||
|
*
|
||||||
* Note we don't need the "on disk tupdesc" here; we rely on our own routine to
|
* Note we don't need the "on disk tupdesc" here; we rely on our own routine to
|
||||||
* deconstruct the tuple from the on-disk format.
|
* deconstruct the tuple from the on-disk format.
|
||||||
*/
|
*/
|
||||||
BrinMemTuple *
|
BrinMemTuple *
|
||||||
brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
|
brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
|
||||||
{
|
{
|
||||||
BrinMemTuple *dtup;
|
BrinMemTuple *dtup;
|
||||||
Datum *values;
|
Datum *values;
|
||||||
@ -407,15 +431,16 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
|
|||||||
int valueno;
|
int valueno;
|
||||||
MemoryContext oldcxt;
|
MemoryContext oldcxt;
|
||||||
|
|
||||||
dtup = brin_new_memtuple(brdesc);
|
dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) :
|
||||||
|
brin_new_memtuple(brdesc);
|
||||||
|
|
||||||
if (BrinTupleIsPlaceholder(tuple))
|
if (BrinTupleIsPlaceholder(tuple))
|
||||||
dtup->bt_placeholder = true;
|
dtup->bt_placeholder = true;
|
||||||
dtup->bt_blkno = tuple->bt_blkno;
|
dtup->bt_blkno = tuple->bt_blkno;
|
||||||
|
|
||||||
values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
|
values = dtup->bt_values;
|
||||||
allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
|
allnulls = dtup->bt_allnulls;
|
||||||
hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
|
hasnulls = dtup->bt_hasnulls;
|
||||||
|
|
||||||
tp = (char *) tuple + BrinTupleDataOffset(tuple);
|
tp = (char *) tuple + BrinTupleDataOffset(tuple);
|
||||||
|
|
||||||
@ -458,10 +483,6 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple)
|
|||||||
|
|
||||||
MemoryContextSwitchTo(oldcxt);
|
MemoryContextSwitchTo(oldcxt);
|
||||||
|
|
||||||
pfree(values);
|
|
||||||
pfree(allnulls);
|
|
||||||
pfree(hasnulls);
|
|
||||||
|
|
||||||
return dtup;
|
return dtup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,6 +38,11 @@ typedef struct BrinMemTuple
|
|||||||
bool bt_placeholder; /* this is a placeholder tuple */
|
bool bt_placeholder; /* this is a placeholder tuple */
|
||||||
BlockNumber bt_blkno; /* heap blkno that the tuple is for */
|
BlockNumber bt_blkno; /* heap blkno that the tuple is for */
|
||||||
MemoryContext bt_context; /* memcxt holding the bt_columns values */
|
MemoryContext bt_context; /* memcxt holding the bt_columns values */
|
||||||
|
/* output arrays for brin_deform_tuple: */
|
||||||
|
Datum *bt_values; /* values array */
|
||||||
|
bool *bt_allnulls; /* allnulls array */
|
||||||
|
bool *bt_hasnulls; /* hasnulls array */
|
||||||
|
/* not an output array, but must be last */
|
||||||
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER];
|
BrinValues bt_columns[FLEXIBLE_ARRAY_MEMBER];
|
||||||
} BrinMemTuple;
|
} BrinMemTuple;
|
||||||
|
|
||||||
@ -83,14 +88,15 @@ extern BrinTuple *brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno,
|
|||||||
extern BrinTuple *brin_form_placeholder_tuple(BrinDesc *brdesc,
|
extern BrinTuple *brin_form_placeholder_tuple(BrinDesc *brdesc,
|
||||||
BlockNumber blkno, Size *size);
|
BlockNumber blkno, Size *size);
|
||||||
extern void brin_free_tuple(BrinTuple *tuple);
|
extern void brin_free_tuple(BrinTuple *tuple);
|
||||||
extern BrinTuple *brin_copy_tuple(BrinTuple *tuple, Size len);
|
extern BrinTuple *brin_copy_tuple(BrinTuple *tuple, Size len,
|
||||||
|
BrinTuple *dest, Size *destsz);
|
||||||
extern bool brin_tuples_equal(const BrinTuple *a, Size alen,
|
extern bool brin_tuples_equal(const BrinTuple *a, Size alen,
|
||||||
const BrinTuple *b, Size blen);
|
const BrinTuple *b, Size blen);
|
||||||
|
|
||||||
extern BrinMemTuple *brin_new_memtuple(BrinDesc *brdesc);
|
extern BrinMemTuple *brin_new_memtuple(BrinDesc *brdesc);
|
||||||
extern void brin_memtuple_initialize(BrinMemTuple *dtuple,
|
extern BrinMemTuple *brin_memtuple_initialize(BrinMemTuple *dtuple,
|
||||||
BrinDesc *brdesc);
|
BrinDesc *brdesc);
|
||||||
extern BrinMemTuple *brin_deform_tuple(BrinDesc *brdesc,
|
extern BrinMemTuple *brin_deform_tuple(BrinDesc *brdesc,
|
||||||
BrinTuple *tuple);
|
BrinTuple *tuple, BrinMemTuple *dMemtuple);
|
||||||
|
|
||||||
#endif /* BRIN_TUPLE_H */
|
#endif /* BRIN_TUPLE_H */
|
||||||
|
Reference in New Issue
Block a user