1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

pgindent run for 8.2.

This commit is contained in:
Bruce Momjian
2006-10-04 00:30:14 +00:00
parent 451e419e98
commit f99a569a2e
522 changed files with 21297 additions and 17170 deletions

View File

@@ -16,7 +16,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.110 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.111 2006/10/04 00:29:47 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1294,7 +1294,7 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
{
if (tuple == NULL) /* internal error */
elog(ERROR, "cannot extract system attribute from virtual tuple");
if (slot->tts_mintuple) /* internal error */
if (slot->tts_mintuple) /* internal error */
elog(ERROR, "cannot extract system attribute from minimal tuple");
return heap_getsysattr(tuple, attnum, tupleDesc, isnull);
}
@@ -1480,7 +1480,7 @@ slot_attisnull(TupleTableSlot *slot, int attnum)
{
if (tuple == NULL) /* internal error */
elog(ERROR, "cannot extract system attribute from virtual tuple");
if (slot->tts_mintuple) /* internal error */
if (slot->tts_mintuple) /* internal error */
elog(ERROR, "cannot extract system attribute from minimal tuple");
return heap_attisnull(tuple, attnum);
}

View File

@@ -9,7 +9,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/common/printtup.c,v 1.98 2006/08/12 02:52:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/common/printtup.c,v 1.99 2006/10/04 00:29:47 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -127,8 +127,8 @@ printtup_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
}
/*
* If we are supposed to emit row descriptions,
* then send the tuple descriptor of the tuples.
* If we are supposed to emit row descriptions, then send the tuple
* descriptor of the tuples.
*/
if (myState->sendDescrip)
SendRowDescriptionMessage(typeinfo,

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.1 2006/07/03 22:45:36 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.2 2006/10/04 00:29:47 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -59,7 +59,7 @@ transformRelOptions(Datum oldOptions, List *defList,
/* Copy any oldOptions that aren't to be replaced */
if (oldOptions != (Datum) 0)
{
ArrayType *array = DatumGetArrayTypeP(oldOptions);
ArrayType *array = DatumGetArrayTypeP(oldOptions);
Datum *oldoptions;
int noldoptions;
int i;
@@ -71,15 +71,15 @@ transformRelOptions(Datum oldOptions, List *defList,
for (i = 0; i < noldoptions; i++)
{
text *oldoption = DatumGetTextP(oldoptions[i]);
char *text_str = (char *) VARATT_DATA(oldoption);
int text_len = VARATT_SIZE(oldoption) - VARHDRSZ;
text *oldoption = DatumGetTextP(oldoptions[i]);
char *text_str = (char *) VARATT_DATA(oldoption);
int text_len = VARATT_SIZE(oldoption) - VARHDRSZ;
/* Search for a match in defList */
foreach(cell, defList)
{
DefElem *def = lfirst(cell);
int kw_len = strlen(def->defname);
DefElem *def = lfirst(cell);
int kw_len = strlen(def->defname);
if (text_len > kw_len && text_str[kw_len] == '=' &&
pg_strncasecmp(text_str, def->defname, kw_len) == 0)
@@ -96,33 +96,33 @@ transformRelOptions(Datum oldOptions, List *defList,
}
/*
* If CREATE/SET, add new options to array; if RESET, just check that
* the user didn't say RESET (option=val). (Must do this because the
* grammar doesn't enforce it.)
* If CREATE/SET, add new options to array; if RESET, just check that the
* user didn't say RESET (option=val). (Must do this because the grammar
* doesn't enforce it.)
*/
foreach(cell, defList)
{
DefElem *def = lfirst(cell);
DefElem *def = lfirst(cell);
if (isReset)
{
if (def->arg != NULL)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("RESET must not include values for parameters")));
errmsg("RESET must not include values for parameters")));
}
else
{
text *t;
text *t;
const char *value;
Size len;
Size len;
if (ignoreOids && pg_strcasecmp(def->defname, "oids") == 0)
continue;
/*
* Flatten the DefElem into a text string like "name=arg".
* If we have just "name", assume "name=true" is meant.
* Flatten the DefElem into a text string like "name=arg". If we
* have just "name", assume "name=true" is meant.
*/
if (def->arg != NULL)
value = defGetString(def);
@@ -163,10 +163,10 @@ transformRelOptions(Datum oldOptions, List *defList,
* containing the corresponding value, or NULL if the keyword does not appear.
*/
void
parseRelOptions(Datum options, int numkeywords, const char * const *keywords,
parseRelOptions(Datum options, int numkeywords, const char *const * keywords,
char **values, bool validate)
{
ArrayType *array;
ArrayType *array;
Datum *optiondatums;
int noptions;
int i;
@@ -187,21 +187,21 @@ parseRelOptions(Datum options, int numkeywords, const char * const *keywords,
for (i = 0; i < noptions; i++)
{
text *optiontext = DatumGetTextP(optiondatums[i]);
char *text_str = (char *) VARATT_DATA(optiontext);
int text_len = VARATT_SIZE(optiontext) - VARHDRSZ;
int j;
text *optiontext = DatumGetTextP(optiondatums[i]);
char *text_str = (char *) VARATT_DATA(optiontext);
int text_len = VARATT_SIZE(optiontext) - VARHDRSZ;
int j;
/* Search for a match in keywords */
for (j = 0; j < numkeywords; j++)
{
int kw_len = strlen(keywords[j]);
int kw_len = strlen(keywords[j]);
if (text_len > kw_len && text_str[kw_len] == '=' &&
pg_strncasecmp(text_str, keywords[j], kw_len) == 0)
{
char *value;
int value_len;
char *value;
int value_len;
if (values[j] && validate)
ereport(ERROR,
@@ -218,8 +218,8 @@ parseRelOptions(Datum options, int numkeywords, const char * const *keywords,
}
if (j >= numkeywords && validate)
{
char *s;
char *p;
char *s;
char *p;
s = DatumGetCString(DirectFunctionCall1(textout, optiondatums[i]));
p = strchr(s, '=');
@@ -240,17 +240,17 @@ bytea *
default_reloptions(Datum reloptions, bool validate,
int minFillfactor, int defaultFillfactor)
{
static const char * const default_keywords[1] = { "fillfactor" };
char *values[1];
int32 fillfactor;
static const char *const default_keywords[1] = {"fillfactor"};
char *values[1];
int32 fillfactor;
StdRdOptions *result;
parseRelOptions(reloptions, 1, default_keywords, values, validate);
/*
* If no options, we can just return NULL rather than doing anything.
* (defaultFillfactor is thus not used, but we require callers to pass
* it anyway since we would need it if more options were added.)
* (defaultFillfactor is thus not used, but we require callers to pass it
* anyway since we would need it if more options were added.)
*/
if (values[0] == NULL)
return NULL;

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* ginarrayproc.c
* support functions for GIN's indexing of any array
* support functions for GIN's indexing of any array
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.5 2006/09/10 20:14:20 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.6 2006/10/04 00:29:47 momjian Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
@@ -23,64 +23,73 @@
#define GinContainedStrategy 3
#define GinEqualStrategy 4
#define ARRAYCHECK(x) do { \
#define ARRAYCHECK(x) do { \
if ( ARR_HASNULL(x) ) \
ereport(ERROR, \
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
errmsg("array must not contain nulls"))); \
} while(0)
ereport(ERROR, \
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
errmsg("array must not contain nulls"))); \
} while(0)
/*
* Function used as extractValue and extractQuery both
*/
Datum
ginarrayextract(PG_FUNCTION_ARGS) {
ArrayType *array;
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
Datum *entries = NULL;
int16 elmlen;
bool elmbyval;
char elmalign;
ginarrayextract(PG_FUNCTION_ARGS)
{
ArrayType *array;
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
Datum *entries = NULL;
int16 elmlen;
bool elmbyval;
char elmalign;
/* we should guarantee that array will not be destroyed during all operation */
/*
* we should guarantee that array will not be destroyed during all
* operation
*/
array = PG_GETARG_ARRAYTYPE_P_COPY(0);
ARRAYCHECK(array);
get_typlenbyvalalign(ARR_ELEMTYPE(array),
&elmlen, &elmbyval, &elmalign);
&elmlen, &elmbyval, &elmalign);
deconstruct_array(array,
ARR_ELEMTYPE(array),
elmlen, elmbyval, elmalign,
&entries, NULL, (int*)nentries);
ARR_ELEMTYPE(array),
elmlen, elmbyval, elmalign,
&entries, NULL, (int *) nentries);
/* we should not free array, entries[i] points into it */
PG_RETURN_POINTER(entries);
}
Datum
ginarrayconsistent(PG_FUNCTION_ARGS) {
bool *check = (bool*)PG_GETARG_POINTER(0);
StrategyNumber strategy = PG_GETARG_UINT16(1);
ArrayType *query = PG_GETARG_ARRAYTYPE_P(2);
int res, i, nentries;
ginarrayconsistent(PG_FUNCTION_ARGS)
{
bool *check = (bool *) PG_GETARG_POINTER(0);
StrategyNumber strategy = PG_GETARG_UINT16(1);
ArrayType *query = PG_GETARG_ARRAYTYPE_P(2);
int res,
i,
nentries;
/* ARRAYCHECK was already done by previous ginarrayextract call */
switch( strategy ) {
switch (strategy)
{
case GinOverlapStrategy:
case GinContainedStrategy:
/* at least one element in check[] is true, so result = true */
/* at least one element in check[] is true, so result = true */
res = TRUE;
break;
case GinContainsStrategy:
case GinEqualStrategy:
nentries=ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query));
nentries = ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query));
res = TRUE;
for(i=0;i<nentries;i++)
if ( !check[i] ) {
for (i = 0; i < nentries; i++)
if (!check[i])
{
res = FALSE;
break;
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* ginbtree.c
* page utilities routines for the postgres inverted index access method.
* page utilities routines for the postgres inverted index access method.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.4 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.5 2006/10/04 00:29:47 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -20,24 +20,29 @@
* Locks buffer by needed method for search.
*/
static int
ginTraverseLock(Buffer buffer, bool searchMode) {
Page page;
int access=GIN_SHARE;
ginTraverseLock(Buffer buffer, bool searchMode)
{
Page page;
int access = GIN_SHARE;
LockBuffer(buffer, GIN_SHARE);
page = BufferGetPage( buffer );
if ( GinPageIsLeaf(page) ) {
if ( searchMode == FALSE ) {
page = BufferGetPage(buffer);
if (GinPageIsLeaf(page))
{
if (searchMode == FALSE)
{
/* we should relock our page */
LockBuffer(buffer, GIN_UNLOCK);
LockBuffer(buffer, GIN_EXCLUSIVE);
/* But root can become non-leaf during relock */
if ( !GinPageIsLeaf(page) ) {
/* resore old lock type (very rare) */
if (!GinPageIsLeaf(page))
{
/* resore old lock type (very rare) */
LockBuffer(buffer, GIN_UNLOCK);
LockBuffer(buffer, GIN_SHARE);
} else
}
else
access = GIN_EXCLUSIVE;
}
}
@@ -45,9 +50,10 @@ ginTraverseLock(Buffer buffer, bool searchMode) {
return access;
}
GinBtreeStack*
ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno) {
GinBtreeStack *stack = (GinBtreeStack*)palloc(sizeof(GinBtreeStack));
GinBtreeStack *
ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno)
{
GinBtreeStack *stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
stack->blkno = blkno;
stack->buffer = ReadBuffer(btree->index, stack->blkno);
@@ -62,63 +68,73 @@ ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno) {
/*
* Locates leaf page contained tuple
*/
GinBtreeStack*
ginFindLeafPage(GinBtree btree, GinBtreeStack *stack) {
bool isfirst=TRUE;
GinBtreeStack *
ginFindLeafPage(GinBtree btree, GinBtreeStack *stack)
{
bool isfirst = TRUE;
BlockNumber rootBlkno;
if ( !stack )
if (!stack)
stack = ginPrepareFindLeafPage(btree, GIN_ROOT_BLKNO);
rootBlkno = stack->blkno;
for(;;) {
Page page;
for (;;)
{
Page page;
BlockNumber child;
int access=GIN_SHARE;
int access = GIN_SHARE;
stack->off = InvalidOffsetNumber;
page = BufferGetPage( stack->buffer );
if ( isfirst ) {
if ( GinPageIsLeaf(page) && !btree->searchMode )
page = BufferGetPage(stack->buffer);
if (isfirst)
{
if (GinPageIsLeaf(page) && !btree->searchMode)
access = GIN_EXCLUSIVE;
isfirst = FALSE;
} else
}
else
access = ginTraverseLock(stack->buffer, btree->searchMode);
/* ok, page is correctly locked, we should check to move right ..,
root never has a right link, so small optimization */
while( btree->fullScan==FALSE && stack->blkno != rootBlkno && btree->isMoveRight(btree, page) ) {
/*
* ok, page is correctly locked, we should check to move right ..,
* root never has a right link, so small optimization
*/
while (btree->fullScan == FALSE && stack->blkno != rootBlkno && btree->isMoveRight(btree, page))
{
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
if ( rightlink==InvalidBlockNumber )
if (rightlink == InvalidBlockNumber)
/* rightmost page */
break;
stack->blkno = rightlink;
LockBuffer(stack->buffer, GIN_UNLOCK);
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
LockBuffer(stack->buffer, access);
page = BufferGetPage( stack->buffer );
LockBuffer(stack->buffer, access);
page = BufferGetPage(stack->buffer);
}
if ( GinPageIsLeaf(page) ) /* we found, return locked page */
if (GinPageIsLeaf(page)) /* we found, return locked page */
return stack;
/* now we have correct buffer, try to find child */
child = btree->findChildPage(btree, stack);
LockBuffer(stack->buffer, GIN_UNLOCK);
Assert( child != InvalidBlockNumber );
Assert( stack->blkno != child );
Assert(child != InvalidBlockNumber);
Assert(stack->blkno != child);
if ( btree->searchMode ) {
if (btree->searchMode)
{
/* in search mode we may forget path to leaf */
stack->blkno = child;
stack->buffer = ReleaseAndReadBuffer( stack->buffer, btree->index, stack->blkno );
} else {
GinBtreeStack *ptr = (GinBtreeStack*)palloc(sizeof(GinBtreeStack));
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
}
else
{
GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
ptr->parent = stack;
stack = ptr;
@@ -133,93 +149,110 @@ ginFindLeafPage(GinBtree btree, GinBtreeStack *stack) {
}
void
freeGinBtreeStack( GinBtreeStack *stack ) {
while(stack) {
GinBtreeStack *tmp = stack->parent;
if ( stack->buffer != InvalidBuffer )
freeGinBtreeStack(GinBtreeStack *stack)
{
while (stack)
{
GinBtreeStack *tmp = stack->parent;
if (stack->buffer != InvalidBuffer)
ReleaseBuffer(stack->buffer);
pfree( stack );
pfree(stack);
stack = tmp;
}
}
/*
* Try to find parent for current stack position, returns correct
* Try to find parent for current stack position, returns correct
* parent and child's offset in stack->parent.
* Function should never release root page to prevent conflicts
* with vacuum process
*/
void
findParents( GinBtree btree, GinBtreeStack *stack,
BlockNumber rootBlkno) {
findParents(GinBtree btree, GinBtreeStack *stack,
BlockNumber rootBlkno)
{
Page page;
Buffer buffer;
BlockNumber blkno, leftmostBlkno;
Page page;
Buffer buffer;
BlockNumber blkno,
leftmostBlkno;
OffsetNumber offset;
GinBtreeStack *root = stack->parent;
GinBtreeStack *ptr;
GinBtreeStack *root = stack->parent;
GinBtreeStack *ptr;
if ( !root ) {
if (!root)
{
/* XLog mode... */
root = (GinBtreeStack*)palloc(sizeof(GinBtreeStack));
root = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
root->blkno = rootBlkno;
root->buffer = ReadBuffer(btree->index, rootBlkno);
LockBuffer(root->buffer, GIN_EXCLUSIVE);
root->parent = NULL;
} else {
/* find root, we should not release root page until update is finished!! */
while( root->parent ) {
ReleaseBuffer( root->buffer );
}
else
{
/*
* find root, we should not release root page until update is
* finished!!
*/
while (root->parent)
{
ReleaseBuffer(root->buffer);
root = root->parent;
}
Assert( root->blkno == rootBlkno );
Assert( BufferGetBlockNumber(root->buffer) == rootBlkno );
Assert(root->blkno == rootBlkno);
Assert(BufferGetBlockNumber(root->buffer) == rootBlkno);
LockBuffer(root->buffer, GIN_EXCLUSIVE);
}
root->off = InvalidOffsetNumber;
page = BufferGetPage(root->buffer);
Assert( !GinPageIsLeaf(page) );
Assert(!GinPageIsLeaf(page));
/* check trivial case */
if ( (root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber ) {
if ((root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber)
{
stack->parent = root;
return;
}
leftmostBlkno = blkno = btree->getLeftMostPage(btree, page);
LockBuffer(root->buffer, GIN_UNLOCK );
Assert( blkno!=InvalidBlockNumber );
LockBuffer(root->buffer, GIN_UNLOCK);
Assert(blkno != InvalidBlockNumber);
for(;;) {
for (;;)
{
buffer = ReadBuffer(btree->index, blkno);
LockBuffer(buffer, GIN_EXCLUSIVE);
page = BufferGetPage(buffer);
if ( GinPageIsLeaf(page) )
if (GinPageIsLeaf(page))
elog(ERROR, "Lost path");
leftmostBlkno = btree->getLeftMostPage(btree, page);
while( (offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber))==InvalidOffsetNumber ) {
while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
{
blkno = GinPageGetOpaque(page)->rightlink;
LockBuffer(buffer,GIN_UNLOCK);
LockBuffer(buffer, GIN_UNLOCK);
ReleaseBuffer(buffer);
if ( blkno == InvalidBlockNumber )
if (blkno == InvalidBlockNumber)
break;
buffer = ReadBuffer(btree->index, blkno);
LockBuffer(buffer, GIN_EXCLUSIVE);
page = BufferGetPage(buffer);
}
if ( blkno != InvalidBlockNumber ) {
ptr = (GinBtreeStack*)palloc(sizeof(GinBtreeStack));
if (blkno != InvalidBlockNumber)
{
ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
ptr->blkno = blkno;
ptr->buffer = buffer;
ptr->parent = root; /* it's may be wrong, but in next call we will correct */
ptr->parent = root; /* it's may be wrong, but in next call we will
* correct */
ptr->off = offset;
stack->parent = ptr;
return;
@@ -233,79 +266,94 @@ findParents( GinBtree btree, GinBtreeStack *stack,
* Insert value (stored in GinBtree) to tree descibed by stack
*/
void
ginInsertValue(GinBtree btree, GinBtreeStack *stack) {
GinBtreeStack *parent = stack;
BlockNumber rootBlkno = InvalidBuffer;
Page page, rpage, lpage;
ginInsertValue(GinBtree btree, GinBtreeStack *stack)
{
GinBtreeStack *parent = stack;
BlockNumber rootBlkno = InvalidBuffer;
Page page,
rpage,
lpage;
/* remember root BlockNumber */
while( parent ) {
while (parent)
{
rootBlkno = parent->blkno;
parent = parent->parent;
}
while( stack ) {
while (stack)
{
XLogRecData *rdata;
BlockNumber savedRightLink;
BlockNumber savedRightLink;
page = BufferGetPage( stack->buffer );
page = BufferGetPage(stack->buffer);
savedRightLink = GinPageGetOpaque(page)->rightlink;
if ( btree->isEnoughSpace( btree, stack->buffer, stack->off ) ) {
if (btree->isEnoughSpace(btree, stack->buffer, stack->off))
{
START_CRIT_SECTION();
btree->placeToPage( btree, stack->buffer, stack->off, &rdata );
btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
if (!btree->index->rd_istemp) {
XLogRecPtr recptr;
if (!btree->index->rd_istemp)
{
XLogRecPtr recptr;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
}
MarkBufferDirty( stack->buffer );
MarkBufferDirty(stack->buffer);
UnlockReleaseBuffer(stack->buffer);
END_CRIT_SECTION();
freeGinBtreeStack(stack->parent);
return;
} else {
Buffer rbuffer = GinNewBuffer(btree->index);
Page newlpage;
}
else
{
Buffer rbuffer = GinNewBuffer(btree->index);
Page newlpage;
/* newlpage is a pointer to memory page, it does'nt assosiates with buffer,
stack->buffer shoud be untouched */
newlpage = btree->splitPage( btree, stack->buffer, rbuffer, stack->off, &rdata );
/*
* newlpage is a pointer to memory page, it does'nt assosiates
* with buffer, stack->buffer shoud be untouched
*/
newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
((ginxlogSplit*)(rdata->data))->rootBlkno = rootBlkno;
((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
parent = stack->parent;
if ( parent == NULL ) {
/* split root, so we need to allocate new left page and
place pointer on root to left and right page */
Buffer lbuffer = GinNewBuffer(btree->index);
if (parent == NULL)
{
/*
* split root, so we need to allocate new left page and place
* pointer on root to left and right page
*/
Buffer lbuffer = GinNewBuffer(btree->index);
((ginxlogSplit*)(rdata->data))->isRootSplit = TRUE;
((ginxlogSplit*)(rdata->data))->rrlink = InvalidBlockNumber;
((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;
page = BufferGetPage( stack->buffer );
lpage = BufferGetPage( lbuffer );
rpage = BufferGetPage( rbuffer );
page = BufferGetPage(stack->buffer);
lpage = BufferGetPage(lbuffer);
rpage = BufferGetPage(rbuffer);
GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
((ginxlogSplit*)(rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);
((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);
START_CRIT_SECTION();
GinInitBuffer( stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF );
PageRestoreTempPage( newlpage, lpage );
btree->fillRoot( btree, stack->buffer, lbuffer, rbuffer );
if (!btree->index->rd_istemp) {
XLogRecPtr recptr;
GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF);
PageRestoreTempPage(newlpage, lpage);
btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer);
if (!btree->index->rd_istemp)
{
XLogRecPtr recptr;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
PageSetLSN(page, recptr);
@@ -324,23 +372,26 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) {
UnlockReleaseBuffer(stack->buffer);
END_CRIT_SECTION();
return;
} else {
/* split non-root page */
((ginxlogSplit*)(rdata->data))->isRootSplit = FALSE;
((ginxlogSplit*)(rdata->data))->rrlink = savedRightLink;
lpage = BufferGetPage( stack->buffer );
rpage = BufferGetPage( rbuffer );
return;
}
else
{
/* split non-root page */
((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE;
((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink;
lpage = BufferGetPage(stack->buffer);
rpage = BufferGetPage(rbuffer);
GinPageGetOpaque(rpage)->rightlink = savedRightLink;
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
START_CRIT_SECTION();
PageRestoreTempPage( newlpage, lpage );
if (!btree->index->rd_istemp) {
XLogRecPtr recptr;
PageRestoreTempPage(newlpage, lpage);
if (!btree->index->rd_istemp)
{
XLogRecPtr recptr;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
PageSetLSN(lpage, recptr);
@@ -350,7 +401,7 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) {
}
MarkBufferDirty(rbuffer);
UnlockReleaseBuffer(rbuffer);
MarkBufferDirty( stack->buffer );
MarkBufferDirty(stack->buffer);
END_CRIT_SECTION();
}
}
@@ -361,31 +412,33 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) {
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
/* move right if it's needed */
page = BufferGetPage( parent->buffer );
while( (parent->off=btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber ) {
page = BufferGetPage(parent->buffer);
while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
{
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
LockBuffer(parent->buffer, GIN_UNLOCK);
if ( rightlink==InvalidBlockNumber ) {
/* rightmost page, but we don't find parent, we should
use plain search... */
if (rightlink == InvalidBlockNumber)
{
/*
* rightmost page, but we don't find parent, we should use
* plain search...
*/
findParents(btree, stack, rootBlkno);
parent=stack->parent;
page = BufferGetPage( parent->buffer );
parent = stack->parent;
page = BufferGetPage(parent->buffer);
break;
}
parent->blkno = rightlink;
parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno);
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
page = BufferGetPage( parent->buffer );
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
page = BufferGetPage(parent->buffer);
}
UnlockReleaseBuffer(stack->buffer);
pfree( stack );
pfree(stack);
stack = parent;
}
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* ginbulk.c
* routines for fast build of inverted index
* routines for fast build of inverted index
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.5 2006/08/29 14:05:44 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.6 2006/10/04 00:29:47 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -22,7 +22,8 @@
#define DEF_NPTR 4
void
ginInitBA(BuildAccumulator *accum) {
ginInitBA(BuildAccumulator *accum)
{
accum->maxdepth = 1;
accum->stackpos = 0;
accum->entries = NULL;
@@ -31,11 +32,13 @@ ginInitBA(BuildAccumulator *accum) {
accum->entryallocator = NULL;
}
static EntryAccumulator*
EAAllocate( BuildAccumulator *accum ) {
if ( accum->entryallocator == NULL || accum->length>=DEF_NENTRY ) {
accum->entryallocator = palloc(sizeof(EntryAccumulator)*DEF_NENTRY);
accum->allocatedMemory += sizeof(EntryAccumulator)*DEF_NENTRY;
static EntryAccumulator *
EAAllocate(BuildAccumulator *accum)
{
if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY)
{
accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY);
accum->allocatedMemory += sizeof(EntryAccumulator) * DEF_NENTRY;
accum->length = 0;
}
@@ -48,24 +51,27 @@ EAAllocate( BuildAccumulator *accum ) {
* item pointer are ordered
*/
static void
ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heapptr) {
if ( entry->number >= entry->length ) {
ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heapptr)
{
if (entry->number >= entry->length)
{
accum->allocatedMemory += sizeof(ItemPointerData) * entry->length;
entry->length *= 2;
entry->list = (ItemPointerData*)repalloc(entry->list,
sizeof(ItemPointerData)*entry->length);
entry->list = (ItemPointerData *) repalloc(entry->list,
sizeof(ItemPointerData) * entry->length);
}
if ( entry->shouldSort==FALSE ) {
int res = compareItemPointers( entry->list + entry->number - 1, heapptr );
if (entry->shouldSort == FALSE)
{
int res = compareItemPointers(entry->list + entry->number - 1, heapptr);
Assert( res != 0 );
Assert(res != 0);
if ( res > 0 )
entry->shouldSort=TRUE;
if (res > 0)
entry->shouldSort = TRUE;
}
entry->list[ entry->number ] = *heapptr;
entry->list[entry->number] = *heapptr;
entry->number++;
}
@@ -74,7 +80,8 @@ ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heap
* to avoid computing the datum size twice.
*/
static Datum
getDatumCopy(BuildAccumulator *accum, Datum value) {
getDatumCopy(BuildAccumulator *accum, Datum value)
{
Form_pg_attribute *att = accum->ginstate->tupdesc->attrs;
Datum res;
@@ -100,51 +107,58 @@ getDatumCopy(BuildAccumulator *accum, Datum value) {
* Find/store one entry from indexed value.
*/
static void
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry) {
EntryAccumulator *ea = accum->entries, *pea = NULL;
int res = 0;
uint32 depth = 1;
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry)
{
EntryAccumulator *ea = accum->entries,
*pea = NULL;
int res = 0;
uint32 depth = 1;
while( ea ) {
while (ea)
{
res = compareEntries(accum->ginstate, entry, ea->value);
if ( res == 0 )
break; /* found */
else {
if (res == 0)
break; /* found */
else
{
pea = ea;
if ( res < 0 )
if (res < 0)
ea = ea->left;
else
ea = ea->right;
}
depth++;
}
if ( depth > accum->maxdepth )
if (depth > accum->maxdepth)
accum->maxdepth = depth;
if ( ea == NULL ) {
if (ea == NULL)
{
ea = EAAllocate(accum);
ea->left = ea->right = NULL;
ea->value = getDatumCopy(accum, entry);
ea->value = getDatumCopy(accum, entry);
ea->length = DEF_NPTR;
ea->number = 1;
ea->shouldSort = FALSE;
ea->list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
ea->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
ea->list[0] = *heapptr;
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
accum->allocatedMemory += sizeof(ItemPointerData) * DEF_NPTR;
if ( pea == NULL )
if (pea == NULL)
accum->entries = ea;
else {
Assert( res != 0 );
if ( res < 0 )
else
{
Assert(res != 0);
if (res < 0)
pea->left = ea;
else
pea->right = ea;
}
} else
ginInsertData( accum, ea, heapptr );
}
else
ginInsertData(accum, ea, heapptr);
}
/*
@@ -152,22 +166,23 @@ ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry) {
* then calls itself for each parts
*/
static void
ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry,
uint32 low, uint32 high, uint32 offset) {
uint32 pos;
uint32 middle = (low+high)>>1;
ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry,
uint32 low, uint32 high, uint32 offset)
{
uint32 pos;
uint32 middle = (low + high) >> 1;
pos = (low+middle)>>1;
if ( low!=middle && pos>=offset && pos-offset < nentry )
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
pos = (high+middle+1)>>1;
if ( middle+1 != high && pos>=offset && pos-offset < nentry )
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
pos = (low + middle) >> 1;
if (low != middle && pos >= offset && pos - offset < nentry)
ginInsertEntry(accum, heapptr, entries[pos - offset]);
pos = (high + middle + 1) >> 1;
if (middle + 1 != high && pos >= offset && pos - offset < nentry)
ginInsertEntry(accum, heapptr, entries[pos - offset]);
if ( low!=middle )
ginChooseElem(accum, heapptr, entries, nentry, low, middle, offset );
if ( high!=middle+1 )
ginChooseElem(accum, heapptr, entries, nentry, middle+1, high, offset );
if (low != middle)
ginChooseElem(accum, heapptr, entries, nentry, low, middle, offset);
if (high != middle + 1)
ginChooseElem(accum, heapptr, entries, nentry, middle + 1, high, offset);
}
/*
@@ -176,56 +191,71 @@ ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint
* next middle on left part and middle of right part.
*/
void
ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) {
uint32 i, nbit=0, offset;
ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry)
{
uint32 i,
nbit = 0,
offset;
if (nentry==0)
if (nentry == 0)
return;
i=nentry-1;
for(;i>0;i>>=1) nbit++;
i = nentry - 1;
for (; i > 0; i >>= 1)
nbit++;
nbit = 1<<nbit;
offset = (nbit-nentry)/2;
nbit = 1 << nbit;
offset = (nbit - nentry) / 2;
ginInsertEntry( accum, heapptr, entries[ (nbit>>1)-offset ]);
ginInsertEntry(accum, heapptr, entries[(nbit >> 1) - offset]);
ginChooseElem(accum, heapptr, entries, nentry, 0, nbit, offset);
}
static int
qsortCompareItemPointers( const void *a, const void *b ) {
int res = compareItemPointers( (ItemPointer)a, (ItemPointer)b );
Assert( res!=0 );
static int
qsortCompareItemPointers(const void *a, const void *b)
{
int res = compareItemPointers((ItemPointer) a, (ItemPointer) b);
Assert(res != 0);
return res;
}
/*
* walk on binary tree and returns ordered nodes
*/
static EntryAccumulator*
walkTree( BuildAccumulator *accum ) {
EntryAccumulator *entry = accum->stack[ accum->stackpos ];
* walk on binary tree and returns ordered nodes
*/
static EntryAccumulator *
walkTree(BuildAccumulator *accum)
{
EntryAccumulator *entry = accum->stack[accum->stackpos];
if ( entry->list != NULL ) {
if (entry->list != NULL)
{
/* return entry itself: we already was at left sublink */
return entry;
} else if ( entry->right && entry->right != accum->stack[ accum->stackpos+1 ] ) {
}
else if (entry->right && entry->right != accum->stack[accum->stackpos + 1])
{
/* go on right sublink */
accum->stackpos++;
entry = entry->right;
/* find most-left value */
for(;;) {
accum->stack[ accum->stackpos ] = entry;
if ( entry->left ) {
for (;;)
{
accum->stack[accum->stackpos] = entry;
if (entry->left)
{
accum->stackpos++;
entry = entry->left;
} else
}
else
break;
}
} else {
}
else
{
/* we already return all left subtree, itself and right subtree */
if ( accum->stackpos == 0 )
if (accum->stackpos == 0)
return 0;
accum->stackpos--;
return walkTree(accum);
@@ -234,47 +264,53 @@ walkTree( BuildAccumulator *accum ) {
return entry;
}
ItemPointerData*
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) {
EntryAccumulator *entry;
ItemPointerData *
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n)
{
EntryAccumulator *entry;
ItemPointerData *list;
if ( accum->stack == NULL ) {
if (accum->stack == NULL)
{
/* first call */
accum->stack = palloc0(sizeof(EntryAccumulator*)*(accum->maxdepth+1));
accum->stack = palloc0(sizeof(EntryAccumulator *) * (accum->maxdepth + 1));
entry = accum->entries;
if ( entry == NULL )
if (entry == NULL)
return NULL;
/* find most-left value */
for(;;) {
accum->stack[ accum->stackpos ] = entry;
if ( entry->left ) {
for (;;)
{
accum->stack[accum->stackpos] = entry;
if (entry->left)
{
accum->stackpos++;
entry = entry->left;
} else
}
else
break;
}
} else {
pfree( accum->stack[ accum->stackpos ]->list );
accum->stack[ accum->stackpos ]->list = NULL;
entry = walkTree( accum );
}
else
{
pfree(accum->stack[accum->stackpos]->list);
accum->stack[accum->stackpos]->list = NULL;
entry = walkTree(accum);
}
if ( entry == NULL )
if (entry == NULL)
return NULL;
*n = entry->number;
*value = entry->value;
list = entry->list;
*n = entry->number;
*value = entry->value;
list = entry->list;
Assert(list != NULL);
if ( entry->shouldSort && entry->number > 1 )
if (entry->shouldSort && entry->number > 1)
qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers);
return list;
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* gindatapage.c
* page utilities routines for the postgres inverted index access method.
* page utilities routines for the postgres inverted index access method.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.3 2006/07/16 00:52:05 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.4 2006/10/04 00:29:47 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -16,50 +16,56 @@
#include "access/gin.h"
int
compareItemPointers( ItemPointer a, ItemPointer b ) {
if ( GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b) ) {
if ( GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b) )
compareItemPointers(ItemPointer a, ItemPointer b)
{
if (GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b))
{
if (GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b))
return 0;
return ( GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b) ) ? 1 : -1;
}
return (GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b)) ? 1 : -1;
}
return ( GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b) ) ? 1 : -1;
return (GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b)) ? 1 : -1;
}
/*
* Merge two ordered array of itempointer
*/
void
MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb) {
void
MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb)
{
ItemPointerData *dptr = dst;
ItemPointerData *aptr = a, *bptr = b;
ItemPointerData *aptr = a,
*bptr = b;
while( aptr - a < na && bptr - b < nb ) {
if ( compareItemPointers(aptr, bptr) > 0 )
while (aptr - a < na && bptr - b < nb)
{
if (compareItemPointers(aptr, bptr) > 0)
*dptr++ = *bptr++;
else
*dptr++ = *aptr++;
}
while( aptr - a < na )
while (aptr - a < na)
*dptr++ = *aptr++;
while( bptr - b < nb )
while (bptr - b < nb)
*dptr++ = *bptr++;
}
/*
* Checks, should we move to right link...
* Checks, should we move to right link...
* Compares inserting itemp pointer with right bound of current page
*/
static bool
dataIsMoveRight(GinBtree btree, Page page) {
ItemPointer iptr = GinDataPageGetRightBound(page);
dataIsMoveRight(GinBtree btree, Page page)
{
ItemPointer iptr = GinDataPageGetRightBound(page);
if ( GinPageRightMost(page) )
return FALSE;
if (GinPageRightMost(page))
return FALSE;
return ( compareItemPointers( btree->items + btree->curitem, iptr ) > 0 ) ? TRUE : FALSE;
return (compareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
}
/*
@@ -67,94 +73,113 @@ dataIsMoveRight(GinBtree btree, Page page) {
* page correctly choosen and searching value SHOULD be on page
*/
static BlockNumber
dataLocateItem(GinBtree btree, GinBtreeStack *stack) {
OffsetNumber low, high, maxoff;
PostingItem *pitem=NULL;
int result;
Page page = BufferGetPage( stack->buffer );
dataLocateItem(GinBtree btree, GinBtreeStack *stack)
{
OffsetNumber low,
high,
maxoff;
PostingItem *pitem = NULL;
int result;
Page page = BufferGetPage(stack->buffer);
Assert( !GinPageIsLeaf(page) );
Assert( GinPageIsData(page) );
Assert(!GinPageIsLeaf(page));
Assert(GinPageIsData(page));
if ( btree->fullScan ) {
if (btree->fullScan)
{
stack->off = FirstOffsetNumber;
stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
return btree->getLeftMostPage(btree, page);
}
low = FirstOffsetNumber;
maxoff = high = GinPageGetOpaque(page)->maxoff;
Assert( high >= low );
maxoff = high = GinPageGetOpaque(page)->maxoff;
Assert(high >= low);
high++;
while (high > low) {
while (high > low)
{
OffsetNumber mid = low + ((high - low) / 2);
pitem = (PostingItem*)GinDataPageGetItem(page,mid);
if ( mid == maxoff )
/* Right infinity, page already correctly choosen
with a help of dataIsMoveRight */
pitem = (PostingItem *) GinDataPageGetItem(page, mid);
if (mid == maxoff)
/*
* Right infinity, page already correctly choosen with a help of
* dataIsMoveRight
*/
result = -1;
else {
pitem = (PostingItem*)GinDataPageGetItem(page,mid);
result = compareItemPointers( btree->items + btree->curitem, &( pitem->key ) );
else
{
pitem = (PostingItem *) GinDataPageGetItem(page, mid);
result = compareItemPointers(btree->items + btree->curitem, &(pitem->key));
}
if ( result == 0 ) {
if (result == 0)
{
stack->off = mid;
return PostingItemGetBlockNumber(pitem);
} else if ( result > 0 )
}
else if (result > 0)
low = mid + 1;
else
high = mid;
}
Assert( high>=FirstOffsetNumber && high <= maxoff );
Assert(high >= FirstOffsetNumber && high <= maxoff);
stack->off = high;
pitem = (PostingItem*)GinDataPageGetItem(page,high);
pitem = (PostingItem *) GinDataPageGetItem(page, high);
return PostingItemGetBlockNumber(pitem);
}
/*
/*
* Searches correct position for value on leaf page.
* Page should be corrrectly choosen.
* Page should be corrrectly choosen.
* Returns true if value found on page.
*/
static bool
dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) {
Page page = BufferGetPage( stack->buffer );
OffsetNumber low, high;
int result;
dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
{
Page page = BufferGetPage(stack->buffer);
OffsetNumber low,
high;
int result;
Assert( GinPageIsLeaf(page) );
Assert( GinPageIsData(page) );
Assert(GinPageIsLeaf(page));
Assert(GinPageIsData(page));
if ( btree->fullScan ) {
if (btree->fullScan)
{
stack->off = FirstOffsetNumber;
return TRUE;
}
low=FirstOffsetNumber;
low = FirstOffsetNumber;
high = GinPageGetOpaque(page)->maxoff;
if ( high < low ) {
if (high < low)
{
stack->off = FirstOffsetNumber;
return false;
}
high++;
while (high > low) {
while (high > low)
{
OffsetNumber mid = low + ((high - low) / 2);
result = compareItemPointers( btree->items + btree->curitem, (ItemPointer)GinDataPageGetItem(page,mid) );
result = compareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid));
if ( result == 0 ) {
if (result == 0)
{
stack->off = mid;
return true;
} else if ( result > 0 )
}
else if (result > 0)
low = mid + 1;
else
high = mid;
@@ -169,34 +194,41 @@ dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) {
* offset of PostingItem
*/
static OffsetNumber
dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) {
OffsetNumber i, maxoff = GinPageGetOpaque(page)->maxoff;
dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
{
OffsetNumber i,
maxoff = GinPageGetOpaque(page)->maxoff;
PostingItem *pitem;
Assert( !GinPageIsLeaf(page) );
Assert( GinPageIsData(page) );
Assert(!GinPageIsLeaf(page));
Assert(GinPageIsData(page));
/* if page isn't changed, we returns storedOff */
if ( storedOff>= FirstOffsetNumber && storedOff<=maxoff) {
pitem = (PostingItem*)GinDataPageGetItem(page, storedOff);
if ( PostingItemGetBlockNumber(pitem) == blkno )
if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
{
pitem = (PostingItem *) GinDataPageGetItem(page, storedOff);
if (PostingItemGetBlockNumber(pitem) == blkno)
return storedOff;
/* we hope, that needed pointer goes to right. It's true
if there wasn't a deletion */
for( i=storedOff+1 ; i <= maxoff ; i++ ) {
pitem = (PostingItem*)GinDataPageGetItem(page, i);
if ( PostingItemGetBlockNumber(pitem) == blkno )
/*
* we hope, that needed pointer goes to right. It's true if there
* wasn't a deletion
*/
for (i = storedOff + 1; i <= maxoff; i++)
{
pitem = (PostingItem *) GinDataPageGetItem(page, i);
if (PostingItemGetBlockNumber(pitem) == blkno)
return i;
}
maxoff = storedOff-1;
maxoff = storedOff - 1;
}
/* last chance */
for( i=FirstOffsetNumber; i <= maxoff ; i++ ) {
pitem = (PostingItem*)GinDataPageGetItem(page, i);
if ( PostingItemGetBlockNumber(pitem) == blkno )
for (i = FirstOffsetNumber; i <= maxoff; i++)
{
pitem = (PostingItem *) GinDataPageGetItem(page, i);
if (PostingItemGetBlockNumber(pitem) == blkno)
return i;
}
@@ -207,14 +239,15 @@ dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber stor
* retunrs blkno of lefmost child
*/
static BlockNumber
dataGetLeftMostPage(GinBtree btree, Page page) {
dataGetLeftMostPage(GinBtree btree, Page page)
{
PostingItem *pitem;
Assert( !GinPageIsLeaf(page) );
Assert( GinPageIsData(page) );
Assert( GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber );
Assert(!GinPageIsLeaf(page));
Assert(GinPageIsData(page));
Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
pitem = (PostingItem*)GinDataPageGetItem(page, FirstOffsetNumber);
pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber);
return PostingItemGetBlockNumber(pitem);
}
@@ -223,18 +256,22 @@ dataGetLeftMostPage(GinBtree btree, Page page) {
* correct value! depending on leaf or non-leaf page
*/
void
GinDataPageAddItem( Page page, void *data, OffsetNumber offset ) {
GinDataPageAddItem(Page page, void *data, OffsetNumber offset)
{
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
char *ptr;
char *ptr;
if ( offset == InvalidOffsetNumber ) {
ptr = GinDataPageGetItem(page,maxoff+1);
} else {
ptr = GinDataPageGetItem(page,offset);
if ( maxoff+1-offset != 0 )
memmove( ptr+GinSizeOfItem(page), ptr, (maxoff-offset+1) * GinSizeOfItem(page) );
if (offset == InvalidOffsetNumber)
{
ptr = GinDataPageGetItem(page, maxoff + 1);
}
memcpy( ptr, data, GinSizeOfItem(page) );
else
{
ptr = GinDataPageGetItem(page, offset);
if (maxoff + 1 - offset != 0)
memmove(ptr + GinSizeOfItem(page), ptr, (maxoff - offset + 1) * GinSizeOfItem(page));
}
memcpy(ptr, data, GinSizeOfItem(page));
GinPageGetOpaque(page)->maxoff++;
}
@@ -243,15 +280,16 @@ GinDataPageAddItem( Page page, void *data, OffsetNumber offset ) {
* Deletes posting item from non-leaf page
*/
void
PageDeletePostingItem(Page page, OffsetNumber offset) {
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
PageDeletePostingItem(Page page, OffsetNumber offset)
{
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
Assert( !GinPageIsLeaf(page) );
Assert( offset>=FirstOffsetNumber && offset <= maxoff );
Assert(!GinPageIsLeaf(page));
Assert(offset >= FirstOffsetNumber && offset <= maxoff);
if ( offset != maxoff )
memmove( GinDataPageGetItem(page,offset), GinDataPageGetItem(page,offset+1),
sizeof(PostingItem) * (maxoff-offset) );
if (offset != maxoff)
memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1),
sizeof(PostingItem) * (maxoff - offset));
GinPageGetOpaque(page)->maxoff--;
}
@@ -261,19 +299,24 @@ PageDeletePostingItem(Page page, OffsetNumber offset) {
* item pointer never deletes!
*/
static bool
dataIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
Page page = BufferGetPage(buf);
dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
{
Page page = BufferGetPage(buf);
Assert( GinPageIsData(page) );
Assert( !btree->isDelete );
Assert(GinPageIsData(page));
Assert(!btree->isDelete);
if ( GinPageIsLeaf(page) ) {
if ( GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff ) {
if ( (btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page) )
if (GinPageIsLeaf(page))
{
if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
{
if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
return true;
} else if ( sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page) )
}
else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
return true;
} else if ( sizeof(PostingItem) <= GinDataPageGetFreeSpace(page) )
}
else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
return true;
return false;
@@ -285,14 +328,17 @@ dataIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
* item pointer never deletes!
*/
static BlockNumber
dataPrepareData( GinBtree btree, Page page, OffsetNumber off) {
dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
{
BlockNumber ret = InvalidBlockNumber;
Assert( GinPageIsData(page) );
Assert(GinPageIsData(page));
if ( !GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber ) {
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page,off);
PostingItemSetBlockNumber( pitem, btree->rightblkno );
if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
{
PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off);
PostingItemSetBlockNumber(pitem, btree->rightblkno);
ret = btree->rightblkno;
}
@@ -301,24 +347,25 @@ dataPrepareData( GinBtree btree, Page page, OffsetNumber off) {
return ret;
}
/*
/*
* Places keys to page and fills WAL record. In case leaf page and
* build mode puts all ItemPointers to page.
*/
static void
dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) {
Page page = BufferGetPage(buf);
dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
{
Page page = BufferGetPage(buf);
static XLogRecData rdata[3];
int sizeofitem = GinSizeOfItem(page);
static ginxlogInsert data;
int sizeofitem = GinSizeOfItem(page);
static ginxlogInsert data;
*prdata = rdata;
Assert( GinPageIsData(page) );
Assert(GinPageIsData(page));
data.updateBlkno = dataPrepareData( btree, page, off );
data.updateBlkno = dataPrepareData(btree, page, off);
data.node = btree->index->rd_node;
data.blkno = BufferGetBlockNumber( buf );
data.blkno = BufferGetBlockNumber(buf);
data.offset = off;
data.nitem = 1;
data.isDelete = FALSE;
@@ -337,109 +384,124 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda
rdata[1].next = &rdata[2];
rdata[2].buffer = InvalidBuffer;
rdata[2].data = (GinPageIsLeaf(page)) ? ((char*)(btree->items+btree->curitem)) : ((char*)&(btree->pitem));
rdata[2].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
rdata[2].len = sizeofitem;
rdata[2].next = NULL;
if ( GinPageIsLeaf(page) ) {
if ( GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff ) {
if (GinPageIsLeaf(page))
{
if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
{
/* usually, create index... */
uint32 savedPos = btree->curitem;
uint32 savedPos = btree->curitem;
while( btree->curitem < btree->nitem ) {
GinDataPageAddItem(page, btree->items+btree->curitem, off);
while (btree->curitem < btree->nitem)
{
GinDataPageAddItem(page, btree->items + btree->curitem, off);
off++;
btree->curitem++;
}
data.nitem = btree->curitem-savedPos;
data.nitem = btree->curitem - savedPos;
rdata[2].len = sizeofitem * data.nitem;
} else {
GinDataPageAddItem(page, btree->items+btree->curitem, off);
}
else
{
GinDataPageAddItem(page, btree->items + btree->curitem, off);
btree->curitem++;
}
} else
GinDataPageAddItem(page, &(btree->pitem), off);
}
else
GinDataPageAddItem(page, &(btree->pitem), off);
}
/*
* split page and fills WAL record. original buffer(lbuf) leaves untouched,
* returns shadow page of lbuf filled new data. In leaf page and build mode puts all
* returns shadow page of lbuf filled new data. In leaf page and build mode puts all
* ItemPointers to pages. Also, in build mode splits data by way to full fulled
* left page
*/
static Page
dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) {
dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
{
static ginxlogSplit data;
static XLogRecData rdata[4];
static char vector[2*BLCKSZ];
char *ptr;
static char vector[2 * BLCKSZ];
char *ptr;
OffsetNumber separator;
ItemPointer bound;
Page lpage = GinPageGetCopyPage( BufferGetPage( lbuf ) );
ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
int sizeofitem = GinSizeOfItem(lpage);
ItemPointer bound;
Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
int sizeofitem = GinSizeOfItem(lpage);
OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
Page rpage = BufferGetPage( rbuf );
Size pageSize = PageGetPageSize( lpage );
Size freeSpace;
uint32 nCopied = 1;
Page rpage = BufferGetPage(rbuf);
Size pageSize = PageGetPageSize(lpage);
Size freeSpace;
uint32 nCopied = 1;
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize );
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
freeSpace = GinDataPageGetFreeSpace(rpage);
*prdata = rdata;
data.leftChildBlkno = ( GinPageIsLeaf(lpage) ) ?
InvalidOffsetNumber : PostingItemGetBlockNumber( &(btree->pitem) );
data.updateBlkno = dataPrepareData( btree, lpage, off );
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
data.updateBlkno = dataPrepareData(btree, lpage, off);
memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
maxoff*sizeofitem);
memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
maxoff * sizeofitem);
if ( GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff ) {
if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
{
nCopied = 0;
while( btree->curitem < btree->nitem && maxoff*sizeof(ItemPointerData) < 2*(freeSpace - sizeof(ItemPointerData)) ) {
memcpy( vector + maxoff*sizeof(ItemPointerData), btree->items+btree->curitem,
sizeof(ItemPointerData) );
while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
{
memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem,
sizeof(ItemPointerData));
maxoff++;
nCopied++;
btree->curitem++;
}
} else {
ptr = vector + (off-1)*sizeofitem;
if ( maxoff+1-off != 0 )
memmove( ptr+sizeofitem, ptr, (maxoff-off+1) * sizeofitem );
if ( GinPageIsLeaf(lpage) ) {
memcpy(ptr, btree->items+btree->curitem, sizeofitem );
}
else
{
ptr = vector + (off - 1) * sizeofitem;
if (maxoff + 1 - off != 0)
memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
if (GinPageIsLeaf(lpage))
{
memcpy(ptr, btree->items + btree->curitem, sizeofitem);
btree->curitem++;
} else
memcpy(ptr, &(btree->pitem), sizeofitem );
}
else
memcpy(ptr, &(btree->pitem), sizeofitem);
maxoff++;
}
/* we suppose that during index creation table scaned from
begin to end, so ItemPointers are monotonically increased.. */
if ( btree->isBuild && GinPageRightMost(lpage) )
separator=freeSpace/sizeofitem;
/*
* we suppose that during index creation table scaned from begin to end,
* so ItemPointers are monotonically increased..
*/
if (btree->isBuild && GinPageRightMost(lpage))
separator = freeSpace / sizeofitem;
else
separator=maxoff/2;
separator = maxoff / 2;
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize );
GinInitPage( lpage, GinPageGetOpaque(rpage)->flags, pageSize );
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
memcpy( GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem );
memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem);
GinPageGetOpaque(lpage)->maxoff = separator;
memcpy( GinDataPageGetItem(rpage, FirstOffsetNumber),
vector + separator * sizeofitem, (maxoff-separator) * sizeofitem );
GinPageGetOpaque(rpage)->maxoff = maxoff-separator;
memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber),
vector + separator * sizeofitem, (maxoff - separator) * sizeofitem);
GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
PostingItemSetBlockNumber( &(btree->pitem), BufferGetBlockNumber(lbuf) );
if ( GinPageIsLeaf(lpage) )
btree->pitem.key = *(ItemPointerData*)GinDataPageGetItem(lpage,
GinPageGetOpaque(lpage)->maxoff);
else
btree->pitem.key = ((PostingItem*)GinDataPageGetItem(lpage,
GinPageGetOpaque(lpage)->maxoff))->key;
PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
if (GinPageIsLeaf(lpage))
btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage,
GinPageGetOpaque(lpage)->maxoff);
else
btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage,
GinPageGetOpaque(lpage)->maxoff))->key;
btree->rightblkno = BufferGetBlockNumber(rbuf);
/* set up right bound for left page */
@@ -452,8 +514,8 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
data.node = btree->index->rd_node;
data.rootBlkno = InvalidBlockNumber;
data.lblkno = BufferGetBlockNumber( lbuf );
data.rblkno = BufferGetBlockNumber( rbuf );
data.lblkno = BufferGetBlockNumber(lbuf);
data.rblkno = BufferGetBlockNumber(rbuf);
data.separator = separator;
data.nitem = maxoff;
data.isData = TRUE;
@@ -468,34 +530,37 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
rdata[1].buffer = InvalidBuffer;
rdata[1].data = vector;
rdata[1].len = MAXALIGN( maxoff * sizeofitem );
rdata[1].len = MAXALIGN(maxoff * sizeofitem);
rdata[1].next = NULL;
return lpage;
}
/*
* Fills new root by right bound values from child.
* Fills new root by right bound values from child.
* Also called from ginxlog, should not use btree
*/
void
dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) {
Page page = BufferGetPage(root),
lpage = BufferGetPage(lbuf),
rpage = BufferGetPage(rbuf);
PostingItem li, ri;
dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
{
Page page = BufferGetPage(root),
lpage = BufferGetPage(lbuf),
rpage = BufferGetPage(rbuf);
PostingItem li,
ri;
li.key = *GinDataPageGetRightBound(lpage);
PostingItemSetBlockNumber( &li, BufferGetBlockNumber(lbuf) );
GinDataPageAddItem(page, &li, InvalidOffsetNumber );
PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
GinDataPageAddItem(page, &li, InvalidOffsetNumber);
ri.key = *GinDataPageGetRightBound(rpage);
PostingItemSetBlockNumber( &ri, BufferGetBlockNumber(rbuf) );
GinDataPageAddItem(page, &ri, InvalidOffsetNumber );
PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
GinDataPageAddItem(page, &ri, InvalidOffsetNumber);
}
void
prepareDataScan( GinBtree btree, Relation index) {
prepareDataScan(GinBtree btree, Relation index)
{
memset(btree, 0, sizeof(GinBtreeData));
btree->index = index;
btree->isMoveRight = dataIsMoveRight;
@@ -509,21 +574,22 @@ prepareDataScan( GinBtree btree, Relation index) {
btree->fillRoot = dataFillRoot;
btree->searchMode = FALSE;
btree->isDelete = FALSE;
btree->isDelete = FALSE;
btree->fullScan = FALSE;
btree->isBuild= FALSE;
btree->isBuild = FALSE;
}
GinPostingTreeScan*
prepareScanPostingTree( Relation index, BlockNumber rootBlkno, bool searchMode) {
GinPostingTreeScan *gdi = (GinPostingTreeScan*)palloc0( sizeof(GinPostingTreeScan) );
GinPostingTreeScan *
prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
{
GinPostingTreeScan *gdi = (GinPostingTreeScan *) palloc0(sizeof(GinPostingTreeScan));
prepareDataScan(&gdi->btree, index);
prepareDataScan( &gdi->btree, index );
gdi->btree.searchMode = searchMode;
gdi->btree.fullScan = searchMode;
gdi->stack = ginPrepareFindLeafPage( &gdi->btree, rootBlkno );
gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
return gdi;
}
@@ -532,33 +598,35 @@ prepareScanPostingTree( Relation index, BlockNumber rootBlkno, bool searchMode)
* Inserts array of item pointers, may execute several tree scan (very rare)
*/
void
insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem) {
insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
{
BlockNumber rootBlkno = gdi->stack->blkno;
gdi->btree.items = items;
gdi->btree.nitem = nitem;
gdi->btree.curitem = 0;
while( gdi->btree.curitem < gdi->btree.nitem ) {
while (gdi->btree.curitem < gdi->btree.nitem)
{
if (!gdi->stack)
gdi->stack = ginPrepareFindLeafPage( &gdi->btree, rootBlkno );
gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
gdi->stack = ginFindLeafPage( &gdi->btree, gdi->stack );
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
if ( gdi->btree.findItem( &(gdi->btree), gdi->stack ) )
elog(ERROR,"item pointer (%u,%d) already exists",
ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
elog(ERROR, "item pointer (%u,%d) already exists",
ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
ginInsertValue(&(gdi->btree), gdi->stack);
gdi->stack=NULL;
gdi->stack = NULL;
}
}
Buffer
scanBeginPostingTree( GinPostingTreeScan *gdi ) {
gdi->stack = ginFindLeafPage( &gdi->btree, gdi->stack );
scanBeginPostingTree(GinPostingTreeScan *gdi)
{
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
return gdi->stack->buffer;
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* ginentrypage.c
* page utilities routines for the postgres inverted index access method.
* page utilities routines for the postgres inverted index access method.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.3 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.4 2006/10/04 00:29:47 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -23,48 +23,52 @@
* 1) Posting list
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usial
* - ItemPointerGetBlockNumber(&itup->t_tid) contains original
* size of tuple (without posting list).
* size of tuple (without posting list).
* Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
* of elements in posting list (number of heap itempointer)
* Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
* - After usial part of tuple there is a posting list
* - After usial part of tuple there is a posting list
* Macros: GinGetPosting(itup)
* 2) Posting tree
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usial
* - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
* - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
* root of posting tree
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains magick number GIN_TREE_POSTING
*/
IndexTuple
GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd) {
bool isnull=FALSE;
GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd)
{
bool isnull = FALSE;
IndexTuple itup;
itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
GinSetOrigSizePosting( itup, IndexTupleSize(itup) );
GinSetOrigSizePosting(itup, IndexTupleSize(itup));
if ( nipd > 0 ) {
uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData)*nipd);
if (nipd > 0)
{
uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd);
if ( newsize >= INDEX_SIZE_MASK )
if (newsize >= INDEX_SIZE_MASK)
return NULL;
if ( newsize > TOAST_INDEX_TARGET && nipd > 1 )
if (newsize > TOAST_INDEX_TARGET && nipd > 1)
return NULL;
itup = repalloc( itup, newsize );
itup = repalloc(itup, newsize);
/* set new size */
itup->t_info &= ~INDEX_SIZE_MASK;
itup->t_info &= ~INDEX_SIZE_MASK;
itup->t_info |= newsize;
if ( ipd )
memcpy( GinGetPosting(itup), ipd, sizeof(ItemPointerData)*nipd );
GinSetNPosting(itup, nipd);
} else {
GinSetNPosting(itup, 0);
if (ipd)
memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
GinSetNPosting(itup, nipd);
}
else
{
GinSetNPosting(itup, 0);
}
return itup;
}
@@ -74,31 +78,35 @@ GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd) {
* so we don't use right bound, we use rightest key instead.
*/
static IndexTuple
getRightMostTuple(Page page) {
getRightMostTuple(Page page)
{
OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
}
Datum
ginGetHighKey(GinState *ginstate, Page page) {
IndexTuple itup;
bool isnull;
ginGetHighKey(GinState *ginstate, Page page)
{
IndexTuple itup;
bool isnull;
itup = getRightMostTuple(page);
return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
}
static bool
entryIsMoveRight(GinBtree btree, Page page) {
Datum highkey;
static bool
entryIsMoveRight(GinBtree btree, Page page)
{
Datum highkey;
if ( GinPageRightMost(page) )
if (GinPageRightMost(page))
return FALSE;
highkey = ginGetHighKey(btree->ginstate, page);
if ( compareEntries(btree->ginstate, btree->entryValue, highkey) > 0 )
if (compareEntries(btree->ginstate, btree->entryValue, highkey) > 0)
return TRUE;
return FALSE;
@@ -109,16 +117,20 @@ entryIsMoveRight(GinBtree btree, Page page) {
* page correctly choosen and searching value SHOULD be on page
*/
static BlockNumber
entryLocateEntry(GinBtree btree, GinBtreeStack *stack) {
OffsetNumber low, high, maxoff;
IndexTuple itup = NULL;
int result;
Page page = BufferGetPage( stack->buffer );
entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
{
OffsetNumber low,
high,
maxoff;
IndexTuple itup = NULL;
int result;
Page page = BufferGetPage(stack->buffer);
Assert( !GinPageIsLeaf(page) );
Assert( !GinPageIsData(page) );
Assert(!GinPageIsLeaf(page));
Assert(!GinPageIsData(page));
if ( btree->fullScan ) {
if (btree->fullScan)
{
stack->off = FirstOffsetNumber;
stack->predictNumber *= PageGetMaxOffsetNumber(page);
return btree->getLeftMostPage(btree, page);
@@ -126,39 +138,43 @@ entryLocateEntry(GinBtree btree, GinBtreeStack *stack) {
low = FirstOffsetNumber;
maxoff = high = PageGetMaxOffsetNumber(page);
Assert( high >= low );
Assert(high >= low);
high++;
while (high > low) {
while (high > low)
{
OffsetNumber mid = low + ((high - low) / 2);
if ( mid == maxoff && GinPageRightMost(page) )
if (mid == maxoff && GinPageRightMost(page))
/* Right infinity */
result = -1;
else {
bool isnull;
else
{
bool isnull;
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
result = compareEntries(btree->ginstate, btree->entryValue,
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull) );
result = compareEntries(btree->ginstate, btree->entryValue,
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
}
if ( result == 0 ) {
if (result == 0)
{
stack->off = mid;
Assert( GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO );
Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
} else if ( result > 0 )
}
else if (result > 0)
low = mid + 1;
else
high = mid;
}
Assert( high>=FirstOffsetNumber && high <= maxoff );
Assert(high >= FirstOffsetNumber && high <= maxoff);
stack->off = high;
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
Assert( GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO );
Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
}
@@ -168,15 +184,18 @@ entryLocateEntry(GinBtree btree, GinBtreeStack *stack) {
* Returns true if value found on page.
*/
static bool
entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) {
Page page = BufferGetPage( stack->buffer );
OffsetNumber low, high;
IndexTuple itup;
entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
{
Page page = BufferGetPage(stack->buffer);
OffsetNumber low,
high;
IndexTuple itup;
Assert( GinPageIsLeaf(page) );
Assert( !GinPageIsData(page) );
Assert(GinPageIsLeaf(page));
Assert(!GinPageIsData(page));
if ( btree->fullScan ) {
if (btree->fullScan)
{
stack->off = FirstOffsetNumber;
return TRUE;
}
@@ -184,26 +203,30 @@ entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) {
low = FirstOffsetNumber;
high = PageGetMaxOffsetNumber(page);
if ( high < low ) {
if (high < low)
{
stack->off = FirstOffsetNumber;
return false;
}
high++;
while (high > low) {
while (high > low)
{
OffsetNumber mid = low + ((high - low) / 2);
bool isnull;
int result;
bool isnull;
int result;
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
result = compareEntries(btree->ginstate, btree->entryValue,
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull) );
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
if ( result == 0 ) {
if (result == 0)
{
stack->off = mid;
return true;
} else if ( result > 0 )
}
else if (result > 0)
low = mid + 1;
else
high = mid;
@@ -214,33 +237,40 @@ entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) {
}
static OffsetNumber
entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) {
OffsetNumber i, maxoff = PageGetMaxOffsetNumber(page);
IndexTuple itup;
entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
{
OffsetNumber i,
maxoff = PageGetMaxOffsetNumber(page);
IndexTuple itup;
Assert( !GinPageIsLeaf(page) );
Assert( !GinPageIsData(page) );
Assert(!GinPageIsLeaf(page));
Assert(!GinPageIsData(page));
/* if page isn't changed, we returns storedOff */
if ( storedOff>= FirstOffsetNumber && storedOff<=maxoff) {
if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
{
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
return storedOff;
/* we hope, that needed pointer goes to right. It's true
if there wasn't a deletion */
for( i=storedOff+1 ; i <= maxoff ; i++ ) {
/*
* we hope, that needed pointer goes to right. It's true if there
* wasn't a deletion
*/
for (i = storedOff + 1; i <= maxoff; i++)
{
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
return i;
}
maxoff = storedOff-1;
maxoff = storedOff - 1;
}
/* last chance */
for( i=FirstOffsetNumber; i <= maxoff ; i++ ) {
for (i = FirstOffsetNumber; i <= maxoff; i++)
{
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
return i;
}
@@ -248,31 +278,35 @@ entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber sto
}
static BlockNumber
entryGetLeftMostPage(GinBtree btree, Page page) {
IndexTuple itup;
entryGetLeftMostPage(GinBtree btree, Page page)
{
IndexTuple itup;
Assert( !GinPageIsLeaf(page) );
Assert( !GinPageIsData(page) );
Assert( PageGetMaxOffsetNumber(page) >= FirstOffsetNumber );
Assert(!GinPageIsLeaf(page));
Assert(!GinPageIsData(page));
Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
}
static bool
entryIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
Size itupsz = 0;
Page page = BufferGetPage(buf);
entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
{
Size itupsz = 0;
Page page = BufferGetPage(buf);
Assert( btree->entry );
Assert( !GinPageIsData(page) );
Assert(btree->entry);
Assert(!GinPageIsData(page));
if ( btree->isDelete ) {
IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off));
itupsz = MAXALIGN( IndexTupleSize( itup ) ) + sizeof(ItemIdData);
if (btree->isDelete)
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
}
if ( PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData) )
if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData))
return true;
return false;
@@ -284,19 +318,23 @@ entryIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
* if child split is occured
*/
static BlockNumber
entryPreparePage( GinBtree btree, Page page, OffsetNumber off) {
entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
{
BlockNumber ret = InvalidBlockNumber;
Assert( btree->entry );
Assert( !GinPageIsData(page) );
Assert(btree->entry);
Assert(!GinPageIsData(page));
if ( btree->isDelete ) {
Assert( GinPageIsLeaf(page) );
if (btree->isDelete)
{
Assert(GinPageIsLeaf(page));
PageIndexTupleDelete(page, off);
}
if ( !GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber ) {
IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off));
if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber);
ret = btree->rightblkno;
}
@@ -310,22 +348,23 @@ entryPreparePage( GinBtree btree, Page page, OffsetNumber off) {
* Place tuple on page and fills WAL record
*/
static void
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) {
Page page = BufferGetPage(buf);
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
{
Page page = BufferGetPage(buf);
static XLogRecData rdata[3];
OffsetNumber placed;
static ginxlogInsert data;
OffsetNumber placed;
static ginxlogInsert data;
*prdata = rdata;
data.updateBlkno = entryPreparePage( btree, page, off );
data.updateBlkno = entryPreparePage(btree, page, off);
placed = PageAddItem( page, (Item)btree->entry, IndexTupleSize(btree->entry), off, LP_USED);
if ( placed != off )
placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, LP_USED);
if (placed != off)
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
RelationGetRelationName(btree->index));
data.node = btree->index->rd_node;
data.blkno = BufferGetBlockNumber( buf );
data.blkno = BufferGetBlockNumber(buf);
data.offset = off;
data.nitem = 1;
data.isDelete = btree->isDelete;
@@ -358,87 +397,99 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd
* an equal number!
*/
static Page
entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) {
entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
{
static XLogRecData rdata[2];
OffsetNumber i, maxoff, separator=InvalidOffsetNumber;
Size totalsize=0;
Size lsize = 0, size;
static char tupstore[ 2*BLCKSZ ];
char *ptr;
IndexTuple itup, leftrightmost=NULL;
static ginxlogSplit data;
Datum value;
bool isnull;
Page page;
Page lpage = GinPageGetCopyPage( BufferGetPage( lbuf ) );
Page rpage = BufferGetPage( rbuf );
Size pageSize = PageGetPageSize( lpage );
OffsetNumber i,
maxoff,
separator = InvalidOffsetNumber;
Size totalsize = 0;
Size lsize = 0,
size;
static char tupstore[2 * BLCKSZ];
char *ptr;
IndexTuple itup,
leftrightmost = NULL;
static ginxlogSplit data;
Datum value;
bool isnull;
Page page;
Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
Page rpage = BufferGetPage(rbuf);
Size pageSize = PageGetPageSize(lpage);
*prdata = rdata;
data.leftChildBlkno = ( GinPageIsLeaf(lpage) ) ?
InvalidOffsetNumber : GinItemPointerGetBlockNumber( &(btree->entry->t_tid) );
data.updateBlkno = entryPreparePage( btree, lpage, off );
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid));
data.updateBlkno = entryPreparePage(btree, lpage, off);
maxoff = PageGetMaxOffsetNumber(lpage);
ptr = tupstore;
ptr = tupstore;
for(i=FirstOffsetNumber; i<=maxoff; i++) {
if ( i==off ) {
size = MAXALIGN( IndexTupleSize(btree->entry) );
for (i = FirstOffsetNumber; i <= maxoff; i++)
{
if (i == off)
{
size = MAXALIGN(IndexTupleSize(btree->entry));
memcpy(ptr, btree->entry, size);
ptr+=size;
ptr += size;
totalsize += size + sizeof(ItemIdData);
}
itup = (IndexTuple)PageGetItem(lpage, PageGetItemId(lpage, i));
size = MAXALIGN( IndexTupleSize(itup) );
itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
size = MAXALIGN(IndexTupleSize(itup));
memcpy(ptr, itup, size);
ptr+=size;
ptr += size;
totalsize += size + sizeof(ItemIdData);
}
if ( off==maxoff+1 ) {
size = MAXALIGN( IndexTupleSize(btree->entry) );
if (off == maxoff + 1)
{
size = MAXALIGN(IndexTupleSize(btree->entry));
memcpy(ptr, btree->entry, size);
ptr+=size;
ptr += size;
totalsize += size + sizeof(ItemIdData);
}
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize );
GinInitPage( lpage, GinPageGetOpaque(rpage)->flags, pageSize );
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
ptr = tupstore;
maxoff++;
maxoff++;
lsize = 0;
page = lpage;
for(i=FirstOffsetNumber; i<=maxoff; i++) {
itup = (IndexTuple)ptr;
for (i = FirstOffsetNumber; i <= maxoff; i++)
{
itup = (IndexTuple) ptr;
if ( lsize > totalsize/2 ) {
if ( separator==InvalidOffsetNumber )
separator = i-1;
if (lsize > totalsize / 2)
{
if (separator == InvalidOffsetNumber)
separator = i - 1;
page = rpage;
} else {
}
else
{
leftrightmost = itup;
lsize += MAXALIGN( IndexTupleSize(itup) ) + sizeof(ItemIdData);
lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
}
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
ptr += MAXALIGN( IndexTupleSize(itup) );
RelationGetRelationName(btree->index));
ptr += MAXALIGN(IndexTupleSize(itup));
}
value = index_getattr(leftrightmost, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
btree->entry = GinFormTuple( btree->ginstate, value, NULL, 0);
ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber( lbuf ), InvalidOffsetNumber);
btree->rightblkno = BufferGetBlockNumber( rbuf );
btree->entry = GinFormTuple(btree->ginstate, value, NULL, 0);
ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber);
btree->rightblkno = BufferGetBlockNumber(rbuf);
data.node = btree->index->rd_node;
data.rootBlkno = InvalidBlockNumber;
data.lblkno = BufferGetBlockNumber( lbuf );
data.rblkno = BufferGetBlockNumber( rbuf );
data.lblkno = BufferGetBlockNumber(lbuf);
data.rblkno = BufferGetBlockNumber(rbuf);
data.separator = separator;
data.nitem = maxoff;
data.isData = FALSE;
@@ -458,23 +509,28 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
return lpage;
}
/*
/*
* return newly allocate rightmost tuple
*/
IndexTuple
ginPageGetLinkItup(Buffer buf) {
IndexTuple itup, nitup;
Page page = BufferGetPage(buf);
ginPageGetLinkItup(Buffer buf)
{
IndexTuple itup,
nitup;
Page page = BufferGetPage(buf);
itup = getRightMostTuple( page );
if ( GinPageIsLeaf(page) && !GinIsPostingTree(itup) ) {
nitup = (IndexTuple)palloc( MAXALIGN(GinGetOrigSizePosting(itup)) );
memcpy( nitup, itup, GinGetOrigSizePosting(itup) );
itup = getRightMostTuple(page);
if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
{
nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup)));
memcpy(nitup, itup, GinGetOrigSizePosting(itup));
nitup->t_info &= ~INDEX_SIZE_MASK;
nitup->t_info |= GinGetOrigSizePosting(itup);
} else {
nitup = (IndexTuple)palloc( MAXALIGN(IndexTupleSize(itup)) );
memcpy( nitup, itup, IndexTupleSize(itup) );
}
else
{
nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup)));
memcpy(nitup, itup, IndexTupleSize(itup));
}
ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber);
@@ -486,23 +542,25 @@ ginPageGetLinkItup(Buffer buf) {
* Also called from ginxlog, should not use btree
*/
void
entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) {
Page page;
IndexTuple itup;
entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
{
Page page;
IndexTuple itup;
page = BufferGetPage(root);
itup = ginPageGetLinkItup( lbuf );
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
itup = ginPageGetLinkItup(lbuf);
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index root page");
itup = ginPageGetLinkItup( rbuf );
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
itup = ginPageGetLinkItup(rbuf);
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index root page");
}
void
prepareEntryScan( GinBtree btree, Relation index, Datum value, GinState *ginstate) {
prepareEntryScan(GinBtree btree, Relation index, Datum value, GinState *ginstate)
{
memset(btree, 0, sizeof(GinBtreeData));
btree->isMoveRight = entryIsMoveRight;
@@ -524,4 +582,3 @@ prepareEntryScan( GinBtree btree, Relation index, Datum value, GinState *ginstat
btree->fullScan = FALSE;
btree->isBuild = FALSE;
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* ginget.c
* fetch tuples from a GIN scan.
* fetch tuples from a GIN scan.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.2 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.3 2006/10/04 00:29:47 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -18,15 +18,17 @@
#include "utils/memutils.h"
static OffsetNumber
findItemInPage( Page page, ItemPointer item, OffsetNumber off ) {
findItemInPage(Page page, ItemPointer item, OffsetNumber off)
{
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
int res;
int res;
for(; off<=maxoff; off++) {
res = compareItemPointers( item, (ItemPointer)GinDataPageGetItem(page, off) );
Assert( res>= 0 );
for (; off <= maxoff; off++)
{
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, off));
Assert(res >= 0);
if ( res == 0 )
if (res == 0)
return off;
}
@@ -38,24 +40,29 @@ findItemInPage( Page page, ItemPointer item, OffsetNumber off ) {
* Stop* functions unlock buffer (but don't release!)
*/
static void
startScanEntry( Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall ) {
if ( entry->master != NULL ) {
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall)
{
if (entry->master != NULL)
{
entry->isFinished = entry->master->isFinished;
return;
}
if ( firstCall ) {
/* at first call we should find entry, and
begin scan of posting tree or just store posting list in memory */
if (firstCall)
{
/*
* at first call we should find entry, and begin scan of posting tree
* or just store posting list in memory
*/
GinBtreeData btreeEntry;
GinBtreeStack *stackEntry;
Page page;
bool needUnlock = TRUE;
GinBtreeStack *stackEntry;
Page page;
bool needUnlock = TRUE;
prepareEntryScan( &btreeEntry, index, entry->entry, ginstate );
prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
btreeEntry.searchMode = TRUE;
stackEntry = ginFindLeafPage(&btreeEntry, NULL);
page = BufferGetPage( stackEntry->buffer );
page = BufferGetPage(stackEntry->buffer);
entry->isFinished = TRUE;
entry->buffer = InvalidBuffer;
@@ -65,103 +72,115 @@ startScanEntry( Relation index, GinState *ginstate, GinScanEntry entry, bool fir
entry->reduceResult = FALSE;
entry->predictNumberResult = 0;
if ( btreeEntry.findItem( &btreeEntry, stackEntry ) ) {
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
if (btreeEntry.findItem(&btreeEntry, stackEntry))
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
if ( GinIsPostingTree(itup) ) {
if (GinIsPostingTree(itup))
{
BlockNumber rootPostingTree = GinGetPostingTree(itup);
GinPostingTreeScan *gdi;
Page page;
Page page;
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
needUnlock = FALSE;
gdi = prepareScanPostingTree( index, rootPostingTree, TRUE );
needUnlock = FALSE;
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
entry->buffer = scanBeginPostingTree( gdi );
IncrBufferRefCount( entry->buffer );
entry->buffer = scanBeginPostingTree(gdi);
IncrBufferRefCount(entry->buffer);
page = BufferGetPage( entry->buffer );
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
page = BufferGetPage(entry->buffer);
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
freeGinBtreeStack( gdi->stack );
pfree( gdi );
freeGinBtreeStack(gdi->stack);
pfree(gdi);
entry->isFinished = FALSE;
} else if ( GinGetNPosting(itup) > 0 ) {
}
else if (GinGetNPosting(itup) > 0)
{
entry->nlist = GinGetNPosting(itup);
entry->list = (ItemPointerData*)palloc( sizeof(ItemPointerData) * entry->nlist );
memcpy( entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist );
entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
entry->isFinished = FALSE;
}
}
if ( needUnlock )
if (needUnlock)
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
freeGinBtreeStack( stackEntry );
} else if ( entry->buffer != InvalidBuffer ) {
freeGinBtreeStack(stackEntry);
}
else if (entry->buffer != InvalidBuffer)
{
/* we should find place were we was stopped */
BlockNumber blkno;
Page page;
Page page;
LockBuffer( entry->buffer, GIN_SHARE );
LockBuffer(entry->buffer, GIN_SHARE);
if ( !ItemPointerIsValid( &entry->curItem ) )
if (!ItemPointerIsValid(&entry->curItem))
/* start position */
return;
Assert( entry->offset!=InvalidOffsetNumber );
Assert(entry->offset != InvalidOffsetNumber);
page = BufferGetPage( entry->buffer );
page = BufferGetPage(entry->buffer);
/* try to find curItem in current buffer */
if ( (entry->offset=findItemInPage(page , &entry->curItem, entry->offset))!=InvalidOffsetNumber )
if ((entry->offset = findItemInPage(page, &entry->curItem, entry->offset)) != InvalidOffsetNumber)
return;
/* walk to right */
while( (blkno = GinPageGetOpaque( page )->rightlink)!=InvalidBlockNumber ) {
LockBuffer( entry->buffer, GIN_UNLOCK );
entry->buffer = ReleaseAndReadBuffer( entry->buffer, index, blkno );
LockBuffer( entry->buffer, GIN_SHARE );
page = BufferGetPage( entry->buffer );
while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber)
{
LockBuffer(entry->buffer, GIN_UNLOCK);
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
LockBuffer(entry->buffer, GIN_SHARE);
page = BufferGetPage(entry->buffer);
if ( (entry->offset=findItemInPage(page , &entry->curItem, FirstOffsetNumber))!=InvalidOffsetNumber )
if ((entry->offset = findItemInPage(page, &entry->curItem, FirstOffsetNumber)) != InvalidOffsetNumber)
return;
}
elog(ERROR,"Logic error: lost previously founded ItemId");
elog(ERROR, "Logic error: lost previously founded ItemId");
}
}
static void
stopScanEntry( GinScanEntry entry ) {
if ( entry->buffer != InvalidBuffer )
LockBuffer( entry->buffer, GIN_UNLOCK );
stopScanEntry(GinScanEntry entry)
{
if (entry->buffer != InvalidBuffer)
LockBuffer(entry->buffer, GIN_UNLOCK);
}
static void
startScanKey( Relation index, GinState *ginstate, GinScanKey key ) {
uint32 i;
startScanKey(Relation index, GinState *ginstate, GinScanKey key)
{
uint32 i;
for(i=0;i<key->nentries;i++)
startScanEntry( index, ginstate, key->scanEntry+i, key->firstCall );
if ( key->firstCall ) {
memset( key->entryRes, TRUE, sizeof(bool) * key->nentries );
for (i = 0; i < key->nentries; i++)
startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall);
if (key->firstCall)
{
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
key->isFinished = FALSE;
key->firstCall = FALSE;
if ( GinFuzzySearchLimit > 0 ) {
if (GinFuzzySearchLimit > 0)
{
/*
* If all of keys more than treshold we will try to reduce
* result, we hope (and only hope, for intersection operation of array
* our supposition isn't true), that total result will not more
* than minimal predictNumberResult.
* If all of keys more than treshold we will try to reduce result,
* we hope (and only hope, for intersection operation of array our
* supposition isn't true), that total result will not more than
* minimal predictNumberResult.
*/
for(i=0;i<key->nentries;i++)
if ( key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit )
return;
for(i=0;i<key->nentries;i++)
if ( key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit ) {
for (i = 0; i < key->nentries; i++)
if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
return;
for (i = 0; i < key->nentries; i++)
if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
{
key->scanEntry[i].predictNumberResult /= key->nentries;
key->scanEntry[i].reduceResult = TRUE;
}
@@ -170,50 +189,60 @@ startScanKey( Relation index, GinState *ginstate, GinScanKey key ) {
}
static void
stopScanKey( GinScanKey key ) {
uint32 i;
stopScanKey(GinScanKey key)
{
uint32 i;
for(i=0;i<key->nentries;i++)
stopScanEntry( key->scanEntry+i );
for (i = 0; i < key->nentries; i++)
stopScanEntry(key->scanEntry + i);
}
static void
startScan( IndexScanDesc scan ) {
uint32 i;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
startScan(IndexScanDesc scan)
{
uint32 i;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
for(i=0; i<so->nkeys; i++)
startScanKey( scan->indexRelation, &so->ginstate, so->keys + i );
for (i = 0; i < so->nkeys; i++)
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
}
static void
stopScan( IndexScanDesc scan ) {
uint32 i;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
stopScan(IndexScanDesc scan)
{
uint32 i;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
for(i=0; i<so->nkeys; i++)
stopScanKey( so->keys + i );
for (i = 0; i < so->nkeys; i++)
stopScanKey(so->keys + i);
}
static void
entryGetNextItem( Relation index, GinScanEntry entry ) {
Page page = BufferGetPage( entry->buffer );
entryGetNextItem(Relation index, GinScanEntry entry)
{
Page page = BufferGetPage(entry->buffer);
entry->offset++;
if ( entry->offset <= GinPageGetOpaque( page )->maxoff && GinPageGetOpaque( page )->maxoff >= FirstOffsetNumber ) {
entry->curItem = *(ItemPointerData*)GinDataPageGetItem(page, entry->offset);
} else {
BlockNumber blkno = GinPageGetOpaque( page )->rightlink;
LockBuffer( entry->buffer, GIN_UNLOCK );
if ( blkno == InvalidBlockNumber ) {
ReleaseBuffer( entry->buffer );
if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
{
entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset);
}
else
{
BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
LockBuffer(entry->buffer, GIN_UNLOCK);
if (blkno == InvalidBlockNumber)
{
ReleaseBuffer(entry->buffer);
entry->buffer = InvalidBuffer;
entry->isFinished = TRUE;
} else {
entry->buffer = ReleaseAndReadBuffer( entry->buffer, index, blkno );
LockBuffer( entry->buffer, GIN_SHARE );
}
else
{
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
LockBuffer(entry->buffer, GIN_SHARE);
entry->offset = InvalidOffsetNumber;
entryGetNextItem(index, entry);
}
@@ -221,29 +250,37 @@ entryGetNextItem( Relation index, GinScanEntry entry ) {
}
#define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
/*
* Sets entry->curItem to new found heap item pointer for one
* Sets entry->curItem to new found heap item pointer for one
* entry of one scan key
*/
static bool
entryGetItem( Relation index, GinScanEntry entry ) {
if ( entry->master ) {
entryGetItem(Relation index, GinScanEntry entry)
{
if (entry->master)
{
entry->isFinished = entry->master->isFinished;
entry->curItem = entry->master->curItem;
} else if ( entry->list ) {
}
else if (entry->list)
{
entry->offset++;
if ( entry->offset <= entry->nlist )
entry->curItem = entry->list[ entry->offset - 1 ];
else {
ItemPointerSet( &entry->curItem, InvalidBlockNumber, InvalidOffsetNumber );
if (entry->offset <= entry->nlist)
entry->curItem = entry->list[entry->offset - 1];
else
{
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
entry->isFinished = TRUE;
}
} else {
do {
}
else
{
do
{
entryGetNextItem(index, entry);
} while ( entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry) );
} while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
}
return entry->isFinished;
@@ -254,155 +291,180 @@ entryGetItem( Relation index, GinScanEntry entry ) {
* returns isFinished!
*/
static bool
keyGetItem( Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey key ) {
uint32 i;
GinScanEntry entry;
bool res;
MemoryContext oldCtx;
keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
{
uint32 i;
GinScanEntry entry;
bool res;
MemoryContext oldCtx;
if ( key->isFinished )
if (key->isFinished)
return TRUE;
do {
/* move forward from previously value and set new curItem,
which is minimal from entries->curItems */
ItemPointerSetMax( &key->curItem );
for(i=0;i<key->nentries;i++) {
entry = key->scanEntry+i;
if ( key->entryRes[i] ) {
if ( entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE ) {
if (compareItemPointers( &entry->curItem, &key->curItem ) < 0)
do
{
/*
* move forward from previously value and set new curItem, which is
* minimal from entries->curItems
*/
ItemPointerSetMax(&key->curItem);
for (i = 0; i < key->nentries; i++)
{
entry = key->scanEntry + i;
if (key->entryRes[i])
{
if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE)
{
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
key->curItem = entry->curItem;
} else
}
else
key->entryRes[i] = FALSE;
} else if ( entry->isFinished == FALSE ) {
if (compareItemPointers( &entry->curItem, &key->curItem ) < 0)
}
else if (entry->isFinished == FALSE)
{
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
key->curItem = entry->curItem;
}
}
}
if ( ItemPointerIsMax( &key->curItem ) ) {
if (ItemPointerIsMax(&key->curItem))
{
/* all entries are finished */
key->isFinished = TRUE;
return TRUE;
}
if ( key->nentries == 1 ) {
if (key->nentries == 1)
{
/* we can do not call consistentFn !! */
key->entryRes[0] = TRUE;
return FALSE;
}
/* setting up array for consistentFn */
for(i=0;i<key->nentries;i++) {
entry = key->scanEntry+i;
if ( entry->isFinished == FALSE && compareItemPointers( &entry->curItem, &key->curItem )==0 )
for (i = 0; i < key->nentries; i++)
{
entry = key->scanEntry + i;
if (entry->isFinished == FALSE && compareItemPointers(&entry->curItem, &key->curItem) == 0)
key->entryRes[i] = TRUE;
else
key->entryRes[i] = FALSE;
}
oldCtx = MemoryContextSwitchTo(tempCtx);
res = DatumGetBool( FunctionCall3(
&ginstate->consistentFn,
PointerGetDatum( key->entryRes ),
UInt16GetDatum( key->strategy ),
key->query
));
res = DatumGetBool(FunctionCall3(
&ginstate->consistentFn,
PointerGetDatum(key->entryRes),
UInt16GetDatum(key->strategy),
key->query
));
MemoryContextSwitchTo(oldCtx);
MemoryContextReset(tempCtx);
} while( !res );
} while (!res);
return FALSE;
}
/*
* Get heap item pointer from scan
* returns true if found
* Get heap item pointer from scan
* returns true if found
*/
static bool
scanGetItem( IndexScanDesc scan, ItemPointerData *item ) {
uint32 i;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
scanGetItem(IndexScanDesc scan, ItemPointerData *item)
{
uint32 i;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
ItemPointerSetMin( item );
for(i=0;i<so->nkeys;i++) {
GinScanKey key = so->keys+i;
ItemPointerSetMin(item);
for (i = 0; i < so->nkeys; i++)
{
GinScanKey key = so->keys + i;
if ( keyGetItem( scan->indexRelation, &so->ginstate, so->tempCtx, key )==FALSE ) {
if ( compareItemPointers( item, &key->curItem ) < 0 )
if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, key) == FALSE)
{
if (compareItemPointers(item, &key->curItem) < 0)
*item = key->curItem;
} else
return FALSE; /* finshed one of keys */
}
else
return FALSE; /* finshed one of keys */
}
for(i=1;i<=so->nkeys;i++) {
GinScanKey key = so->keys+i-1;
for(;;) {
int cmp = compareItemPointers( item, &key->curItem );
for (i = 1; i <= so->nkeys; i++)
{
GinScanKey key = so->keys + i - 1;
if ( cmp == 0 )
for (;;)
{
int cmp = compareItemPointers(item, &key->curItem);
if (cmp == 0)
break;
else if ( cmp > 0 ) {
if ( keyGetItem( scan->indexRelation, &so->ginstate, so->tempCtx, key )==TRUE )
return FALSE; /* finshed one of keys */
} else { /* returns to begin */
else if (cmp > 0)
{
if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, key) == TRUE)
return FALSE; /* finshed one of keys */
}
else
{ /* returns to begin */
*item = key->curItem;
i=0;
i = 0;
break;
}
}
}
return TRUE;
return TRUE;
}
#define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL )
Datum
gingetmulti(PG_FUNCTION_ARGS) {
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
Datum
gingetmulti(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
if ( GinIsNewKey(scan) )
newScanKey( scan );
if (GinIsNewKey(scan))
newScanKey(scan);
startScan( scan );
startScan(scan);
*returned_tids = 0;
do {
if ( scanGetItem( scan, tids + *returned_tids ) )
do
{
if (scanGetItem(scan, tids + *returned_tids))
(*returned_tids)++;
else
break;
} while ( *returned_tids < max_tids );
} while (*returned_tids < max_tids);
stopScan( scan );
stopScan(scan);
PG_RETURN_BOOL(*returned_tids == max_tids);
}
Datum
gingettuple(PG_FUNCTION_ARGS) {
gingettuple(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
bool res;
bool res;
if ( dir != ForwardScanDirection )
if (dir != ForwardScanDirection)
elog(ERROR, "Gin doesn't support other scan directions than forward");
if ( GinIsNewKey(scan) )
newScanKey( scan );
startScan( scan );
if (GinIsNewKey(scan))
newScanKey(scan);
startScan(scan);
res = scanGetItem(scan, &scan->xs_ctup.t_self);
stopScan( scan );
stopScan(scan);
PG_RETURN_BOOL(res);
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* gininsert.c
* insert routines for the postgres inverted index access method.
* insert routines for the postgres inverted index access method.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.4 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.5 2006/10/04 00:29:47 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -19,12 +19,13 @@
#include "miscadmin.h"
#include "utils/memutils.h"
typedef struct {
GinState ginstate;
double indtuples;
MemoryContext tmpCtx;
MemoryContext funcCtx;
BuildAccumulator accum;
typedef struct
{
GinState ginstate;
double indtuples;
MemoryContext tmpCtx;
MemoryContext funcCtx;
BuildAccumulator accum;
} GinBuildState;
/*
@@ -32,24 +33,26 @@ typedef struct {
* suppose that items[] fits to page
*/
static BlockNumber
createPostingTree( Relation index, ItemPointerData *items, uint32 nitems ) {
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
{
BlockNumber blkno;
Buffer buffer = GinNewBuffer(index);
Page page;
Buffer buffer = GinNewBuffer(index);
Page page;
START_CRIT_SECTION();
GinInitBuffer( buffer, GIN_DATA|GIN_LEAF );
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
page = BufferGetPage(buffer);
blkno = BufferGetBlockNumber(buffer);
memcpy( GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems );
memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
GinPageGetOpaque(page)->maxoff = nitems;
if (!index->rd_istemp) {
XLogRecPtr recptr;
if (!index->rd_istemp)
{
XLogRecPtr recptr;
XLogRecData rdata[2];
ginxlogCreatePostingTree data;
ginxlogCreatePostingTree data;
data.node = index->rd_node;
data.blkno = blkno;
@@ -71,7 +74,7 @@ createPostingTree( Relation index, ItemPointerData *items, uint32 nitems ) {
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
}
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
@@ -89,21 +92,25 @@ createPostingTree( Relation index, ItemPointerData *items, uint32 nitems ) {
* GinFormTuple().
*/
static IndexTuple
addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild) {
bool isnull;
Datum key = index_getattr(old, FirstOffsetNumber, ginstate->tupdesc, &isnull);
IndexTuple res = GinFormTuple(ginstate, key, NULL, nitem + GinGetNPosting(old));
addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild)
{
bool isnull;
Datum key = index_getattr(old, FirstOffsetNumber, ginstate->tupdesc, &isnull);
IndexTuple res = GinFormTuple(ginstate, key, NULL, nitem + GinGetNPosting(old));
if ( res ) {
if (res)
{
/* good, small enough */
MergeItemPointers( GinGetPosting(res),
GinGetPosting(old), GinGetNPosting(old),
items, nitem
);
MergeItemPointers(GinGetPosting(res),
GinGetPosting(old), GinGetNPosting(old),
items, nitem
);
GinSetNPosting(res, nitem + GinGetNPosting(old));
} else {
}
else
{
BlockNumber postingRoot;
GinPostingTreeScan *gdi;
@@ -112,7 +119,7 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
postingRoot = createPostingTree(index, GinGetPosting(old), GinGetNPosting(old));
GinSetPostingTree(res, postingRoot);
gdi = prepareScanPostingTree(index, postingRoot, FALSE);
gdi = prepareScanPostingTree(index, postingRoot, FALSE);
gdi->btree.isBuild = isBuild;
insertItemPointer(gdi, items, nitem);
@@ -124,36 +131,39 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
}
/*
* Inserts only one entry to the index, but it can adds more that 1
* ItemPointer.
* Inserts only one entry to the index, but it can adds more that 1
* ItemPointer.
*/
static void
ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild) {
GinBtreeData btree;
ginEntryInsert(Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild)
{
GinBtreeData btree;
GinBtreeStack *stack;
IndexTuple itup;
Page page;
IndexTuple itup;
Page page;
prepareEntryScan( &btree, index, value, ginstate );
prepareEntryScan(&btree, index, value, ginstate);
stack = ginFindLeafPage(&btree, NULL);
page = BufferGetPage( stack->buffer );
page = BufferGetPage(stack->buffer);
if ( btree.findItem( &btree, stack ) ) {
if (btree.findItem(&btree, stack))
{
/* found entry */
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
if ( GinIsPostingTree(itup) ) {
if (GinIsPostingTree(itup))
{
/* lock root of posting tree */
GinPostingTreeScan *gdi;
BlockNumber rootPostingTree = GinGetPostingTree(itup);
BlockNumber rootPostingTree = GinGetPostingTree(itup);
/* release all stack */
LockBuffer(stack->buffer, GIN_UNLOCK);
freeGinBtreeStack( stack );
freeGinBtreeStack(stack);
/* insert into posting tree */
gdi = prepareScanPostingTree( index, rootPostingTree, FALSE );
gdi = prepareScanPostingTree(index, rootPostingTree, FALSE);
gdi->btree.isBuild = isBuild;
insertItemPointer(gdi, items, nitem);
@@ -163,23 +173,26 @@ ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData
itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild);
btree.isDelete = TRUE;
} else {
}
else
{
/* We suppose, that tuple can store at list one itempointer */
itup = GinFormTuple( ginstate, value, items, 1);
if ( itup==NULL || IndexTupleSize(itup) >= GinMaxItemSize )
itup = GinFormTuple(ginstate, value, items, 1);
if (itup == NULL || IndexTupleSize(itup) >= GinMaxItemSize)
elog(ERROR, "huge tuple");
if ( nitem>1 ) {
if (nitem > 1)
{
IndexTuple previtup = itup;
itup = addItemPointersToTuple(index, ginstate, stack, previtup, items+1, nitem-1, isBuild);
itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild);
pfree(previtup);
}
}
btree.entry = itup;
ginInsertValue(&btree, stack);
pfree( itup );
pfree(itup);
}
/*
@@ -187,48 +200,53 @@ ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData
* Function isnt use during normal insert
*/
static uint32
ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr) {
Datum *entries;
uint32 nentries;
ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr)
{
Datum *entries;
uint32 nentries;
MemoryContext oldCtx;
oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
entries = extractEntriesSU( buildstate->accum.ginstate, value, &nentries);
entries = extractEntriesSU(buildstate->accum.ginstate, value, &nentries);
MemoryContextSwitchTo(oldCtx);
if ( nentries==0 )
if (nentries == 0)
/* nothing to insert */
return 0;
ginInsertRecordBA( &buildstate->accum, heapptr, entries, nentries);
ginInsertRecordBA(&buildstate->accum, heapptr, entries, nentries);
MemoryContextReset(buildstate->funcCtx);
return nentries;
}
static void
static void
ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
bool *isnull, bool tupleIsAlive, void *state) {
bool *isnull, bool tupleIsAlive, void *state)
{
GinBuildState *buildstate = (GinBuildState*)state;
GinBuildState *buildstate = (GinBuildState *) state;
MemoryContext oldCtx;
if ( *isnull )
if (*isnull)
return;
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
buildstate->indtuples += ginHeapTupleBulkInsert(buildstate, *values, &htup->t_self);
/* we use only half maintenance_work_mem, because there is some leaks
during insertion and extract values */
if ( buildstate->accum.allocatedMemory >= maintenance_work_mem*1024L/2L ) {
ItemPointerData *list;
Datum entry;
uint32 nlist;
/*
* we use only half maintenance_work_mem, because there is some leaks
* during insertion and extract values
*/
if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L / 2L)
{
ItemPointerData *list;
Datum entry;
uint32 nlist;
while( (list=ginGetEntry(&buildstate->accum, &entry, &nlist)) != NULL )
while ((list = ginGetEntry(&buildstate->accum, &entry, &nlist)) != NULL)
ginEntryInsert(index, &buildstate->ginstate, entry, list, nlist, TRUE);
MemoryContextReset(buildstate->tmpCtx);
@@ -239,22 +257,23 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
}
Datum
ginbuild(PG_FUNCTION_ARGS) {
Relation heap = (Relation) PG_GETARG_POINTER(0);
Relation index = (Relation) PG_GETARG_POINTER(1);
ginbuild(PG_FUNCTION_ARGS)
{
Relation heap = (Relation) PG_GETARG_POINTER(0);
Relation index = (Relation) PG_GETARG_POINTER(1);
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
double reltuples;
GinBuildState buildstate;
Buffer buffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
ItemPointerData *list;
Datum entry;
uint32 nlist;
MemoryContext oldCtx;
if (RelationGetNumberOfBlocks(index) != 0)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
RelationGetRelationName(index));
initGinState(&buildstate.ginstate, index);
@@ -262,10 +281,11 @@ ginbuild(PG_FUNCTION_ARGS) {
buffer = GinNewBuffer(index);
START_CRIT_SECTION();
GinInitBuffer(buffer, GIN_LEAF);
if (!index->rd_istemp) {
XLogRecPtr recptr;
if (!index->rd_istemp)
{
XLogRecPtr recptr;
XLogRecData rdata;
Page page;
Page page;
rdata.buffer = InvalidBuffer;
rdata.data = (char *) &(index->rd_node);
@@ -279,7 +299,7 @@ ginbuild(PG_FUNCTION_ARGS) {
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
}
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
@@ -293,26 +313,26 @@ ginbuild(PG_FUNCTION_ARGS) {
* inserted into the index
*/
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
"Gin build temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
"Gin build temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx,
"Gin build temporary context for user-defined function",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
"Gin build temporary context for user-defined function",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
buildstate.accum.ginstate = &buildstate.ginstate;
ginInitBA( &buildstate.accum );
ginInitBA(&buildstate.accum);
/* do the heap scan */
reltuples = IndexBuildHeapScan(heap, index, indexInfo,
ginBuildCallback, (void *) &buildstate);
ginBuildCallback, (void *) &buildstate);
oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
while( (list=ginGetEntry(&buildstate.accum, &entry, &nlist)) != NULL )
while ((list = ginGetEntry(&buildstate.accum, &entry, &nlist)) != NULL)
ginEntryInsert(index, &buildstate.ginstate, entry, list, nlist, TRUE);
MemoryContextSwitchTo(oldCtx);
@@ -333,55 +353,58 @@ ginbuild(PG_FUNCTION_ARGS) {
* Inserts value during normal insertion
*/
static uint32
ginHeapTupleInsert( Relation index, GinState *ginstate, Datum value, ItemPointer item) {
Datum *entries;
uint32 i,nentries;
ginHeapTupleInsert(Relation index, GinState *ginstate, Datum value, ItemPointer item)
{
Datum *entries;
uint32 i,
nentries;
entries = extractEntriesSU( ginstate, value, &nentries);
entries = extractEntriesSU(ginstate, value, &nentries);
if ( nentries==0 )
if (nentries == 0)
/* nothing to insert */
return 0;
for(i=0;i<nentries;i++)
for (i = 0; i < nentries; i++)
ginEntryInsert(index, ginstate, entries[i], item, 1, FALSE);
return nentries;
}
Datum
gininsert(PG_FUNCTION_ARGS) {
Relation index = (Relation) PG_GETARG_POINTER(0);
Datum *values = (Datum *) PG_GETARG_POINTER(1);
bool *isnull = (bool *) PG_GETARG_POINTER(2);
gininsert(PG_FUNCTION_ARGS)
{
Relation index = (Relation) PG_GETARG_POINTER(0);
Datum *values = (Datum *) PG_GETARG_POINTER(1);
bool *isnull = (bool *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
#ifdef NOT_USED
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
bool checkUnique = PG_GETARG_BOOL(5);
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
bool checkUnique = PG_GETARG_BOOL(5);
#endif
GinState ginstate;
GinState ginstate;
MemoryContext oldCtx;
MemoryContext insertCtx;
uint32 res;
uint32 res;
if ( *isnull )
if (*isnull)
PG_RETURN_BOOL(false);
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
"Gin insert temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
"Gin insert temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
oldCtx = MemoryContextSwitchTo(insertCtx);
initGinState(&ginstate, index);
res = ginHeapTupleInsert(index, &ginstate, *values, ht_ctid);
res = ginHeapTupleInsert(index, &ginstate, *values, ht_ctid);
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
PG_RETURN_BOOL(res>0);
PG_RETURN_BOOL(res > 0);
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* ginscan.c
* routines to manage scans inverted index relations
* routines to manage scans inverted index relations
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.5 2006/09/14 11:26:49 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.6 2006/10/04 00:29:48 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -19,11 +19,12 @@
#include "utils/memutils.h"
Datum
ginbeginscan(PG_FUNCTION_ARGS) {
Relation rel = (Relation) PG_GETARG_POINTER(0);
int keysz = PG_GETARG_INT32(1);
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2);
Datum
ginbeginscan(PG_FUNCTION_ARGS)
{
Relation rel = (Relation) PG_GETARG_POINTER(0);
int keysz = PG_GETARG_INT32(1);
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2);
IndexScanDesc scan;
scan = RelationGetIndexScan(rel, keysz, scankey);
@@ -32,22 +33,25 @@ ginbeginscan(PG_FUNCTION_ARGS) {
}
static void
fillScanKey( GinState *ginstate, GinScanKey key, Datum query,
Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy ) {
uint32 i,j;
fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy)
{
uint32 i,
j;
key->nentries = nEntryValues;
key->entryRes = (bool*)palloc0( sizeof(bool) * nEntryValues );
key->scanEntry = (GinScanEntry) palloc( sizeof(GinScanEntryData) * nEntryValues );
key->entryRes = (bool *) palloc0(sizeof(bool) * nEntryValues);
key->scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData) * nEntryValues);
key->strategy = strategy;
key->query = query;
key->firstCall= TRUE;
ItemPointerSet( &(key->curItem), InvalidBlockNumber, InvalidOffsetNumber );
key->firstCall = TRUE;
ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber);
for(i=0; i<nEntryValues; i++) {
for (i = 0; i < nEntryValues; i++)
{
key->scanEntry[i].pval = key->entryRes + i;
key->scanEntry[i].entry = entryValues[i];
ItemPointerSet( &(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber );
ItemPointerSet(&(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber);
key->scanEntry[i].offset = InvalidOffsetNumber;
key->scanEntry[i].buffer = InvalidBuffer;
key->scanEntry[i].list = NULL;
@@ -55,8 +59,9 @@ fillScanKey( GinState *ginstate, GinScanKey key, Datum query,
/* link to the equals entry in current scan key */
key->scanEntry[i].master = NULL;
for( j=0; j<i; j++)
if ( compareEntries( ginstate, entryValues[i], entryValues[j] ) == 0 ) {
for (j = 0; j < i; j++)
if (compareEntries(ginstate, entryValues[i], entryValues[j]) == 0)
{
key->scanEntry[i].master = key->scanEntry + j;
break;
}
@@ -66,23 +71,27 @@ fillScanKey( GinState *ginstate, GinScanKey key, Datum query,
#ifdef NOT_USED
static void
resetScanKeys(GinScanKey keys, uint32 nkeys) {
uint32 i, j;
resetScanKeys(GinScanKey keys, uint32 nkeys)
{
uint32 i,
j;
if ( keys == NULL )
if (keys == NULL)
return;
for(i=0;i<nkeys;i++) {
GinScanKey key = keys + i;
for (i = 0; i < nkeys; i++)
{
GinScanKey key = keys + i;
key->firstCall = TRUE;
ItemPointerSet( &(key->curItem), InvalidBlockNumber, InvalidOffsetNumber );
ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber);
for(j=0;j<key->nentries;j++) {
if ( key->scanEntry[j].buffer != InvalidBuffer )
ReleaseBuffer( key->scanEntry[i].buffer );
for (j = 0; j < key->nentries; j++)
{
if (key->scanEntry[j].buffer != InvalidBuffer)
ReleaseBuffer(key->scanEntry[i].buffer);
ItemPointerSet( &(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber );
ItemPointerSet(&(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber);
key->scanEntry[j].offset = InvalidOffsetNumber;
key->scanEntry[j].buffer = InvalidBuffer;
key->scanEntry[j].list = NULL;
@@ -90,111 +99,121 @@ resetScanKeys(GinScanKey keys, uint32 nkeys) {
}
}
}
#endif
static void
freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes) {
uint32 i, j;
freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes)
{
uint32 i,
j;
if ( keys == NULL )
if (keys == NULL)
return;
for(i=0;i<nkeys;i++) {
GinScanKey key = keys + i;
for (i = 0; i < nkeys; i++)
{
GinScanKey key = keys + i;
for(j=0;j<key->nentries;j++) {
if ( key->scanEntry[j].buffer != InvalidBuffer )
ReleaseBuffer( key->scanEntry[j].buffer );
if ( removeRes && key->scanEntry[j].list )
for (j = 0; j < key->nentries; j++)
{
if (key->scanEntry[j].buffer != InvalidBuffer)
ReleaseBuffer(key->scanEntry[j].buffer);
if (removeRes && key->scanEntry[j].list)
pfree(key->scanEntry[j].list);
}
if ( removeRes )
if (removeRes)
pfree(key->entryRes);
pfree(key->scanEntry);
}
pfree(keys);
}
void
newScanKey( IndexScanDesc scan ) {
ScanKey scankey = scan->keyData;
newScanKey(IndexScanDesc scan)
{
ScanKey scankey = scan->keyData;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
int i;
uint32 nkeys = 0;
int i;
uint32 nkeys = 0;
so->keys = (GinScanKey) palloc( scan->numberOfKeys * sizeof(GinScanKeyData) );
so->keys = (GinScanKey) palloc(scan->numberOfKeys * sizeof(GinScanKeyData));
if (scan->numberOfKeys < 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("GIN indexes do not support whole-index scans")));
errmsg("GIN indexes do not support whole-index scans")));
for(i=0; i<scan->numberOfKeys; i++) {
Datum* entryValues;
uint32 nEntryValues;
for (i = 0; i < scan->numberOfKeys; i++)
{
Datum *entryValues;
uint32 nEntryValues;
if ( scankey[i].sk_flags & SK_ISNULL )
if (scankey[i].sk_flags & SK_ISNULL)
elog(ERROR, "Gin doesn't support NULL as scan key");
Assert( scankey[i].sk_attno == 1 );
Assert(scankey[i].sk_attno == 1);
entryValues = (Datum*)DatumGetPointer(
FunctionCall3(
&so->ginstate.extractQueryFn,
scankey[i].sk_argument,
PointerGetDatum( &nEntryValues ),
UInt16GetDatum(scankey[i].sk_strategy)
)
);
if ( entryValues==NULL || nEntryValues == 0 )
entryValues = (Datum *) DatumGetPointer(
FunctionCall3(
&so->ginstate.extractQueryFn,
scankey[i].sk_argument,
PointerGetDatum(&nEntryValues),
UInt16GetDatum(scankey[i].sk_strategy)
)
);
if (entryValues == NULL || nEntryValues == 0)
/* full scan... */
continue;
fillScanKey( &so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
entryValues, nEntryValues, scankey[i].sk_strategy );
fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
entryValues, nEntryValues, scankey[i].sk_strategy);
nkeys++;
}
so->nkeys = nkeys;
if ( so->nkeys == 0 )
if (so->nkeys == 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("GIN index doesn't support search with void query")));
errmsg("GIN index doesn't support search with void query")));
pgstat_count_index_scan(&scan->xs_pgstat_info);
}
Datum
ginrescan(PG_FUNCTION_ARGS) {
ginrescan(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1);
GinScanOpaque so;
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1);
GinScanOpaque so;
so = (GinScanOpaque) scan->opaque;
if ( so == NULL ) {
if (so == NULL)
{
/* if called from ginbeginscan */
so = (GinScanOpaque)palloc( sizeof(GinScanOpaqueData) );
so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData));
so->tempCtx = AllocSetContextCreate(CurrentMemoryContext,
"Gin scan temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
"Gin scan temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
initGinState(&so->ginstate, scan->indexRelation);
scan->opaque = so;
} else {
}
else
{
freeScanKeys(so->keys, so->nkeys, TRUE);
freeScanKeys(so->markPos, so->nkeys, FALSE);
}
so->markPos=so->keys=NULL;
so->markPos = so->keys = NULL;
if ( scankey && scan->numberOfKeys > 0 ) {
if (scankey && scan->numberOfKeys > 0)
{
memmove(scan->keyData, scankey,
scan->numberOfKeys * sizeof(ScanKeyData));
scan->numberOfKeys * sizeof(ScanKeyData));
}
PG_RETURN_VOID();
@@ -202,13 +221,15 @@ ginrescan(PG_FUNCTION_ARGS) {
Datum
ginendscan(PG_FUNCTION_ARGS) {
ginendscan(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GinScanOpaque so = (GinScanOpaque) scan->opaque;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
if ( so != NULL ) {
freeScanKeys(so->keys, so->nkeys, TRUE);
freeScanKeys(so->markPos, so->nkeys, FALSE);
if (so != NULL)
{
freeScanKeys(so->keys, so->nkeys, TRUE);
freeScanKeys(so->markPos, so->nkeys, FALSE);
MemoryContextDelete(so->tempCtx);
@@ -219,22 +240,28 @@ ginendscan(PG_FUNCTION_ARGS) {
}
static GinScanKey
copyScanKeys( GinScanKey keys, uint32 nkeys ) {
copyScanKeys(GinScanKey keys, uint32 nkeys)
{
GinScanKey newkeys;
uint32 i, j;
uint32 i,
j;
newkeys = (GinScanKey)palloc( sizeof(GinScanKeyData) * nkeys );
memcpy( newkeys, keys, sizeof(GinScanKeyData) * nkeys );
newkeys = (GinScanKey) palloc(sizeof(GinScanKeyData) * nkeys);
memcpy(newkeys, keys, sizeof(GinScanKeyData) * nkeys);
for(i=0;i<nkeys;i++) {
newkeys[i].scanEntry = (GinScanEntry)palloc(sizeof(GinScanEntryData) * keys[i].nentries );
memcpy( newkeys[i].scanEntry, keys[i].scanEntry, sizeof(GinScanEntryData) * keys[i].nentries );
for (i = 0; i < nkeys; i++)
{
newkeys[i].scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData) * keys[i].nentries);
memcpy(newkeys[i].scanEntry, keys[i].scanEntry, sizeof(GinScanEntryData) * keys[i].nentries);
for (j = 0; j < keys[i].nentries; j++)
{
if (keys[i].scanEntry[j].buffer != InvalidBuffer)
IncrBufferRefCount(keys[i].scanEntry[j].buffer);
if (keys[i].scanEntry[j].master)
{
int masterN = keys[i].scanEntry[j].master - keys[i].scanEntry;
for(j=0;j<keys[i].nentries; j++) {
if ( keys[i].scanEntry[j].buffer != InvalidBuffer )
IncrBufferRefCount( keys[i].scanEntry[j].buffer );
if ( keys[i].scanEntry[j].master ) {
int masterN = keys[i].scanEntry[j].master - keys[i].scanEntry;
newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN;
}
}
@@ -243,24 +270,26 @@ copyScanKeys( GinScanKey keys, uint32 nkeys ) {
return newkeys;
}
Datum
ginmarkpos(PG_FUNCTION_ARGS) {
Datum
ginmarkpos(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GinScanOpaque so = (GinScanOpaque) scan->opaque;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->markPos, so->nkeys, FALSE);
so->markPos = copyScanKeys( so->keys, so->nkeys );
so->markPos = copyScanKeys(so->keys, so->nkeys);
PG_RETURN_VOID();
}
Datum
ginrestrpos(PG_FUNCTION_ARGS) {
Datum
ginrestrpos(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GinScanOpaque so = (GinScanOpaque) scan->opaque;
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->keys, so->nkeys, FALSE);
so->keys = copyScanKeys( so->markPos, so->nkeys );
so->keys = copyScanKeys(so->markPos, so->nkeys);
PG_RETURN_VOID();
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* ginutil.c
* utilities routines for the postgres inverted index access method.
* utilities routines for the postgres inverted index access method.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.6 2006/09/05 18:25:10 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.7 2006/10/04 00:29:48 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -19,26 +19,27 @@
#include "access/reloptions.h"
#include "storage/freespace.h"
void
initGinState( GinState *state, Relation index ) {
if ( index->rd_att->natts != 1 )
elog(ERROR, "numberOfAttributes %d != 1",
index->rd_att->natts);
void
initGinState(GinState *state, Relation index)
{
if (index->rd_att->natts != 1)
elog(ERROR, "numberOfAttributes %d != 1",
index->rd_att->natts);
state->tupdesc = index->rd_att;
fmgr_info_copy(&(state->compareFn),
index_getprocinfo(index, 1, GIN_COMPARE_PROC),
CurrentMemoryContext);
index_getprocinfo(index, 1, GIN_COMPARE_PROC),
CurrentMemoryContext);
fmgr_info_copy(&(state->extractValueFn),
index_getprocinfo(index, 1, GIN_EXTRACTVALUE_PROC),
CurrentMemoryContext);
index_getprocinfo(index, 1, GIN_EXTRACTVALUE_PROC),
CurrentMemoryContext);
fmgr_info_copy(&(state->extractQueryFn),
index_getprocinfo(index, 1, GIN_EXTRACTQUERY_PROC),
CurrentMemoryContext);
index_getprocinfo(index, 1, GIN_EXTRACTQUERY_PROC),
CurrentMemoryContext);
fmgr_info_copy(&(state->consistentFn),
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
CurrentMemoryContext);
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
CurrentMemoryContext);
}
/*
@@ -48,13 +49,16 @@ initGinState( GinState *state, Relation index ) {
*/
Buffer
GinNewBuffer(Relation index) {
Buffer buffer;
bool needLock;
GinNewBuffer(Relation index)
{
Buffer buffer;
bool needLock;
/* First, try to get a page from FSM */
for(;;) {
for (;;)
{
BlockNumber blkno = GetFreeIndexPage(&index->rd_node);
if (blkno == InvalidBlockNumber)
break;
@@ -64,14 +68,15 @@ GinNewBuffer(Relation index) {
* We have to guard against the possibility that someone else already
* recycled this page; the buffer may be locked if so.
*/
if (ConditionalLockBuffer(buffer)) {
Page page = BufferGetPage(buffer);
if (ConditionalLockBuffer(buffer))
{
Page page = BufferGetPage(buffer);
if (PageIsNew(page))
return buffer; /* OK to use, if never initialized */
return buffer; /* OK to use, if never initialized */
if (GinPageIsDeleted(page))
return buffer; /* OK to use */
return buffer; /* OK to use */
LockBuffer(buffer, GIN_UNLOCK);
}
@@ -95,36 +100,39 @@ GinNewBuffer(Relation index) {
}
void
GinInitPage(Page page, uint32 f, Size pageSize) {
GinInitPage(Page page, uint32 f, Size pageSize)
{
GinPageOpaque opaque;
PageInit(page, pageSize, sizeof(GinPageOpaqueData));
opaque = GinPageGetOpaque(page);
memset( opaque, 0, sizeof(GinPageOpaqueData) );
opaque->flags = f;
memset(opaque, 0, sizeof(GinPageOpaqueData));
opaque->flags = f;
opaque->rightlink = InvalidBlockNumber;
}
void
GinInitBuffer(Buffer b, uint32 f) {
GinInitPage( BufferGetPage(b), f, BufferGetPageSize(b) );
GinInitBuffer(Buffer b, uint32 f)
{
GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
}
int
compareEntries(GinState *ginstate, Datum a, Datum b) {
compareEntries(GinState *ginstate, Datum a, Datum b)
{
return DatumGetInt32(
FunctionCall2(
&ginstate->compareFn,
a, b
)
FunctionCall2(
&ginstate->compareFn,
a, b
)
);
}
static FmgrInfo* cmpDatumPtr=NULL;
static FmgrInfo *cmpDatumPtr = NULL;
#if defined(__INTEL_COMPILER) && (defined(__ia64__) || defined(__ia64))
/*
#if defined(__INTEL_COMPILER) && (defined(__ia64__) || defined(__ia64))
/*
* Intel Compiler on Intel Itanium with -O2 has a bug around
* change static variable by user function called from
* libc func: it doesn't change. So mark it as volatile.
@@ -132,7 +140,7 @@ static FmgrInfo* cmpDatumPtr=NULL;
* It's a pity, but it's impossible to define optimization
* level here.
*/
#define VOLATILE volatile
#define VOLATILE volatile
#else
#define VOLATILE
#endif
@@ -140,57 +148,64 @@ static FmgrInfo* cmpDatumPtr=NULL;
static bool VOLATILE needUnique = FALSE;
static int
cmpEntries(const void * a, const void * b) {
int res = DatumGetInt32(
FunctionCall2(
cmpDatumPtr,
*(Datum*)a,
*(Datum*)b
)
cmpEntries(const void *a, const void *b)
{
int res = DatumGetInt32(
FunctionCall2(
cmpDatumPtr,
*(Datum *) a,
*(Datum *) b
)
);
if ( res == 0 )
if (res == 0)
needUnique = TRUE;
return res;
}
Datum*
extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries) {
Datum *entries;
Datum *
extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries)
{
Datum *entries;
entries = (Datum*)DatumGetPointer(
FunctionCall2(
&ginstate->extractValueFn,
value,
PointerGetDatum( nentries )
)
);
entries = (Datum *) DatumGetPointer(
FunctionCall2(
&ginstate->extractValueFn,
value,
PointerGetDatum(nentries)
)
);
if ( entries == NULL )
if (entries == NULL)
*nentries = 0;
if ( *nentries > 1 ) {
if (*nentries > 1)
{
cmpDatumPtr = &ginstate->compareFn;
needUnique = FALSE;
qsort(entries, *nentries, sizeof(Datum), cmpEntries);
qsort(entries, *nentries, sizeof(Datum), cmpEntries);
}
return entries;
}
Datum*
extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries) {
Datum *entries = extractEntriesS(ginstate, value, nentries);
Datum *
extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries)
{
Datum *entries = extractEntriesS(ginstate, value, nentries);
if ( *nentries>1 && needUnique ) {
Datum *ptr, *res;
if (*nentries > 1 && needUnique)
{
Datum *ptr,
*res;
ptr = res = entries;
while( ptr - entries < *nentries ) {
if ( compareEntries(ginstate, *ptr, *res ) != 0 )
while (ptr - entries < *nentries)
{
if (compareEntries(ginstate, *ptr, *res) != 0)
*(++res) = *ptr++;
else
ptr++;
@@ -206,13 +221,14 @@ extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries) {
* It's analog of PageGetTempPage(), but copies whole page
*/
Page
GinPageGetCopyPage( Page page ) {
Size pageSize = PageGetPageSize( page );
Page tmppage;
GinPageGetCopyPage(Page page)
{
Size pageSize = PageGetPageSize(page);
Page tmppage;
tmppage = (Page) palloc(pageSize);
memcpy(tmppage, page, pageSize);
tmppage=(Page)palloc( pageSize );
memcpy( tmppage, page, pageSize );
return tmppage;
}

View File

@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* ginvacuum.c
* delete & vacuum routines for the postgres GIN
* delete & vacuum routines for the postgres GIN
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.6 2006/09/21 20:31:21 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.7 2006/10/04 00:29:48 momjian Exp $
*-------------------------------------------------------------------------
*/
@@ -21,42 +21,50 @@
#include "storage/freespace.h"
#include "commands/vacuum.h"
typedef struct {
Relation index;
IndexBulkDeleteResult *result;
IndexBulkDeleteCallback callback;
void *callback_state;
GinState ginstate;
typedef struct
{
Relation index;
IndexBulkDeleteResult *result;
IndexBulkDeleteCallback callback;
void *callback_state;
GinState ginstate;
} GinVacuumState;
/*
* Cleans array of ItemPointer (removes dead pointers)
* Results are always stored in *cleaned, which will be allocated
* if its needed. In case of *cleaned!=NULL caller is resposible to
* if its needed. In case of *cleaned!=NULL caller is resposible to
* enough space. *cleaned and items may point to the same
* memory addres.
*/
static uint32
ginVacuumPostingList( GinVacuumState *gvs, ItemPointerData *items, uint32 nitem, ItemPointerData **cleaned ) {
uint32 i,j=0;
ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, uint32 nitem, ItemPointerData **cleaned)
{
uint32 i,
j = 0;
/*
* just scan over ItemPointer array
*/
for(i=0;i<nitem;i++) {
if ( gvs->callback(items+i, gvs->callback_state) ) {
for (i = 0; i < nitem; i++)
{
if (gvs->callback(items + i, gvs->callback_state))
{
gvs->result->tuples_removed += 1;
if ( !*cleaned ) {
*cleaned = (ItemPointerData*)palloc(sizeof(ItemPointerData)*nitem);
if ( i!=0 )
memcpy( *cleaned, items, sizeof(ItemPointerData)*i);
if (!*cleaned)
{
*cleaned = (ItemPointerData *) palloc(sizeof(ItemPointerData) * nitem);
if (i != 0)
memcpy(*cleaned, items, sizeof(ItemPointerData) * i);
}
} else {
}
else
{
gvs->result->num_index_tuples += 1;
if (i!=j)
if (i != j)
(*cleaned)[j] = items[i];
j++;
}
@@ -69,56 +77,65 @@ ginVacuumPostingList( GinVacuumState *gvs, ItemPointerData *items, uint32 nitem,
* fills WAL record for vacuum leaf page
*/
static void
xlogVacuumPage(Relation index, Buffer buffer) {
Page page = BufferGetPage( buffer );
XLogRecPtr recptr;
xlogVacuumPage(Relation index, Buffer buffer)
{
Page page = BufferGetPage(buffer);
XLogRecPtr recptr;
XLogRecData rdata[3];
ginxlogVacuumPage data;
char *backup;
char itups[BLCKSZ];
uint32 len=0;
ginxlogVacuumPage data;
char *backup;
char itups[BLCKSZ];
uint32 len = 0;
Assert( GinPageIsLeaf( page ) );
Assert(GinPageIsLeaf(page));
if (index->rd_istemp)
return;
return;
data.node = index->rd_node;
data.blkno = BufferGetBlockNumber(buffer);
if ( GinPageIsData( page ) ) {
backup = GinDataPageGetData( page );
data.nitem = GinPageGetOpaque( page )->maxoff;
if ( data.nitem )
len = MAXALIGN( sizeof(ItemPointerData)*data.nitem );
} else {
char *ptr;
if (GinPageIsData(page))
{
backup = GinDataPageGetData(page);
data.nitem = GinPageGetOpaque(page)->maxoff;
if (data.nitem)
len = MAXALIGN(sizeof(ItemPointerData) * data.nitem);
}
else
{
char *ptr;
OffsetNumber i;
ptr = backup = itups;
for(i=FirstOffsetNumber;i<=PageGetMaxOffsetNumber(page);i++) {
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
memcpy( ptr, itup, IndexTupleSize( itup ) );
ptr += MAXALIGN( IndexTupleSize( itup ) );
for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++)
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
memcpy(ptr, itup, IndexTupleSize(itup));
ptr += MAXALIGN(IndexTupleSize(itup));
}
data.nitem = PageGetMaxOffsetNumber(page);
len = ptr-backup;
len = ptr - backup;
}
rdata[0].buffer = buffer;
rdata[0].buffer_std = ( GinPageIsData( page ) ) ? FALSE : TRUE;
rdata[0].buffer_std = (GinPageIsData(page)) ? FALSE : TRUE;
rdata[0].len = 0;
rdata[0].data = NULL;
rdata[0].next = rdata + 1;
rdata[1].buffer = InvalidBuffer;
rdata[1].len = sizeof(ginxlogVacuumPage);
rdata[1].data = (char*)&data;
rdata[1].data = (char *) &data;
if ( len == 0 ) {
if (len == 0)
{
rdata[1].next = NULL;
} else {
}
else
{
rdata[1].next = rdata + 2;
rdata[2].buffer = InvalidBuffer;
@@ -133,71 +150,84 @@ xlogVacuumPage(Relation index, Buffer buffer) {
}
static bool
ginVacuumPostingTreeLeaves( GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer ) {
Buffer buffer = ReadBuffer( gvs->index, blkno );
Page page = BufferGetPage( buffer );
bool hasVoidPage = FALSE;
ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer)
{
Buffer buffer = ReadBuffer(gvs->index, blkno);
Page page = BufferGetPage(buffer);
bool hasVoidPage = FALSE;
/*
/*
* We should be sure that we don't concurrent with inserts, insert process
* never release root page until end (but it can unlock it and lock again).
* If we lock root with with LockBufferForCleanup, new scan process can't begin,
* but previous may run.
* ginmarkpos/start* keeps buffer pinned, so we will wait for it.
* We lock only one posting tree in whole index, so, it's concurrent enough..
* Side effect: after this is full complete, tree is unused by any other process
* never release root page until end (but it can unlock it and lock
* again). If we lock root with with LockBufferForCleanup, new scan
* process can't begin, but previous may run. ginmarkpos/start* keeps
* buffer pinned, so we will wait for it. We lock only one posting tree in
* whole index, so, it's concurrent enough.. Side effect: after this is
* full complete, tree is unused by any other process
*/
LockBufferForCleanup( buffer );
LockBufferForCleanup(buffer);
Assert( GinPageIsData(page) );
Assert(GinPageIsData(page));
if ( GinPageIsLeaf(page) ) {
OffsetNumber newMaxOff, oldMaxOff = GinPageGetOpaque(page)->maxoff;
if (GinPageIsLeaf(page))
{
OffsetNumber newMaxOff,
oldMaxOff = GinPageGetOpaque(page)->maxoff;
ItemPointerData *cleaned = NULL;
newMaxOff = ginVacuumPostingList( gvs,
(ItemPointer)GinDataPageGetData(page), oldMaxOff, &cleaned );
newMaxOff = ginVacuumPostingList(gvs,
(ItemPointer) GinDataPageGetData(page), oldMaxOff, &cleaned);
/* saves changes about deleted tuple ... */
if ( oldMaxOff != newMaxOff ) {
if (oldMaxOff != newMaxOff)
{
START_CRIT_SECTION();
if ( newMaxOff > 0 )
memcpy( GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff );
pfree( cleaned );
if (newMaxOff > 0)
memcpy(GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff);
pfree(cleaned);
GinPageGetOpaque(page)->maxoff = newMaxOff;
xlogVacuumPage(gvs->index, buffer);
xlogVacuumPage(gvs->index, buffer);
MarkBufferDirty( buffer );
MarkBufferDirty(buffer);
END_CRIT_SECTION();
/* if root is a leaf page, we don't desire futher processing */
if ( !isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber )
/* if root is a leaf page, we don't desire futher processing */
if (!isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
hasVoidPage = TRUE;
}
} else {
}
else
{
OffsetNumber i;
bool isChildHasVoid = FALSE;
bool isChildHasVoid = FALSE;
for( i=FirstOffsetNumber ; i <= GinPageGetOpaque(page)->maxoff ; i++ ) {
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page, i);
if ( ginVacuumPostingTreeLeaves( gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL ) )
for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
{
PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i);
if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL))
isChildHasVoid = TRUE;
}
if ( isChildHasVoid )
if (isChildHasVoid)
hasVoidPage = TRUE;
}
/* if we have root and theres void pages in tree, then we don't release lock
to go further processing and guarantee that tree is unused */
if ( !(isRoot && hasVoidPage) ) {
UnlockReleaseBuffer( buffer );
} else {
Assert( rootBuffer );
/*
* if we have root and theres void pages in tree, then we don't release
* lock to go further processing and guarantee that tree is unused
*/
if (!(isRoot && hasVoidPage))
{
UnlockReleaseBuffer(buffer);
}
else
{
Assert(rootBuffer);
*rootBuffer = buffer;
}
@@ -205,49 +235,54 @@ ginVacuumPostingTreeLeaves( GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
}
static void
ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno,
BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot ) {
Buffer dBuffer = ReadBuffer( gvs->index, deleteBlkno );
Buffer lBuffer = (leftBlkno==InvalidBlockNumber) ? InvalidBuffer : ReadBuffer( gvs->index, leftBlkno );
Buffer pBuffer = ReadBuffer( gvs->index, parentBlkno );
Page page, parentPage;
ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno,
BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot)
{
Buffer dBuffer = ReadBuffer(gvs->index, deleteBlkno);
Buffer lBuffer = (leftBlkno == InvalidBlockNumber) ? InvalidBuffer : ReadBuffer(gvs->index, leftBlkno);
Buffer pBuffer = ReadBuffer(gvs->index, parentBlkno);
Page page,
parentPage;
LockBuffer( dBuffer, GIN_EXCLUSIVE );
if ( !isParentRoot ) /* parent is already locked by LockBufferForCleanup() */
LockBuffer( pBuffer, GIN_EXCLUSIVE );
LockBuffer(dBuffer, GIN_EXCLUSIVE);
if (!isParentRoot) /* parent is already locked by
* LockBufferForCleanup() */
LockBuffer(pBuffer, GIN_EXCLUSIVE);
START_CRIT_SECTION();
if ( leftBlkno!= InvalidBlockNumber ) {
if (leftBlkno != InvalidBlockNumber)
{
BlockNumber rightlink;
LockBuffer( lBuffer, GIN_EXCLUSIVE );
LockBuffer(lBuffer, GIN_EXCLUSIVE);
page = BufferGetPage( dBuffer );
page = BufferGetPage(dBuffer);
rightlink = GinPageGetOpaque(page)->rightlink;
page = BufferGetPage( lBuffer );
page = BufferGetPage(lBuffer);
GinPageGetOpaque(page)->rightlink = rightlink;
}
parentPage = BufferGetPage( pBuffer );
parentPage = BufferGetPage(pBuffer);
PageDeletePostingItem(parentPage, myoff);
page = BufferGetPage( dBuffer );
page = BufferGetPage(dBuffer);
GinPageGetOpaque(page)->flags = GIN_DELETED;
if (!gvs->index->rd_istemp) {
XLogRecPtr recptr;
if (!gvs->index->rd_istemp)
{
XLogRecPtr recptr;
XLogRecData rdata[4];
ginxlogDeletePage data;
int n;
ginxlogDeletePage data;
int n;
data.node = gvs->index->rd_node;
data.blkno = deleteBlkno;
data.parentBlkno = parentBlkno;
data.parentOffset = myoff;
data.leftBlkno = leftBlkno;
data.rightLink = GinPageGetOpaque(page)->rightlink;
data.leftBlkno = leftBlkno;
data.rightLink = GinPageGetOpaque(page)->rightlink;
rdata[0].buffer = dBuffer;
rdata[0].buffer_std = FALSE;
@@ -261,20 +296,22 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
rdata[1].len = 0;
rdata[1].next = rdata + 2;
if ( leftBlkno!= InvalidBlockNumber ) {
if (leftBlkno != InvalidBlockNumber)
{
rdata[2].buffer = lBuffer;
rdata[2].buffer_std = FALSE;
rdata[2].data = NULL;
rdata[2].len = 0;
rdata[2].next = rdata + 3;
n = 3;
} else
}
else
n = 2;
rdata[n].buffer = InvalidBuffer;
rdata[n].buffer_std = FALSE;
rdata[n].len = sizeof(ginxlogDeletePage);
rdata[n].data = (char*)&data;
rdata[n].data = (char *) &data;
rdata[n].next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
@@ -282,122 +319,141 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
PageSetTLI(page, ThisTimeLineID);
PageSetLSN(parentPage, recptr);
PageSetTLI(parentPage, ThisTimeLineID);
if ( leftBlkno!= InvalidBlockNumber ) {
page = BufferGetPage( lBuffer );
if (leftBlkno != InvalidBlockNumber)
{
page = BufferGetPage(lBuffer);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
}
MarkBufferDirty( pBuffer );
if ( !isParentRoot )
LockBuffer( pBuffer, GIN_UNLOCK );
ReleaseBuffer( pBuffer );
MarkBufferDirty(pBuffer);
if (!isParentRoot)
LockBuffer(pBuffer, GIN_UNLOCK);
ReleaseBuffer(pBuffer);
if ( leftBlkno!= InvalidBlockNumber ) {
MarkBufferDirty( lBuffer );
UnlockReleaseBuffer( lBuffer );
if (leftBlkno != InvalidBlockNumber)
{
MarkBufferDirty(lBuffer);
UnlockReleaseBuffer(lBuffer);
}
MarkBufferDirty( dBuffer );
UnlockReleaseBuffer( dBuffer );
MarkBufferDirty(dBuffer);
UnlockReleaseBuffer(dBuffer);
END_CRIT_SECTION();
gvs->result->pages_deleted++;
}
typedef struct DataPageDeleteStack {
struct DataPageDeleteStack *child;
struct DataPageDeleteStack *parent;
typedef struct DataPageDeleteStack
{
struct DataPageDeleteStack *child;
struct DataPageDeleteStack *parent;
BlockNumber blkno;
bool isRoot;
BlockNumber blkno;
bool isRoot;
} DataPageDeleteStack;
/*
* scans posting tree and deletes empty pages
*/
static bool
ginScanToDelete( GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff ) {
DataPageDeleteStack *me;
Buffer buffer;
Page page;
bool meDelete = FALSE;
ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff)
{
DataPageDeleteStack *me;
Buffer buffer;
Page page;
bool meDelete = FALSE;
if ( isRoot ) {
if (isRoot)
{
me = parent;
} else {
if ( ! parent->child ) {
me = (DataPageDeleteStack*)palloc0(sizeof(DataPageDeleteStack));
me->parent=parent;
}
else
{
if (!parent->child)
{
me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack));
me->parent = parent;
parent->child = me;
me->blkno = InvalidBlockNumber;
} else
}
else
me = parent->child;
}
buffer = ReadBuffer( gvs->index, blkno );
page = BufferGetPage( buffer );
buffer = ReadBuffer(gvs->index, blkno);
page = BufferGetPage(buffer);
Assert( GinPageIsData(page) );
Assert(GinPageIsData(page));
if ( !GinPageIsLeaf(page) ) {
if (!GinPageIsLeaf(page))
{
OffsetNumber i;
for(i=FirstOffsetNumber;i<=GinPageGetOpaque(page)->maxoff;i++) {
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page, i);
for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
{
PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i);
if ( ginScanToDelete( gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i ) )
if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i))
i--;
}
}
if ( GinPageGetOpaque(page)->maxoff < FirstOffsetNumber ) {
if ( !( me->blkno == InvalidBlockNumber && GinPageRightMost(page) ) ) {
if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
{
if (!(me->blkno == InvalidBlockNumber && GinPageRightMost(page)))
{
/* we never delete right most branch */
Assert( !isRoot );
if ( GinPageGetOpaque(page)->maxoff < FirstOffsetNumber ) {
ginDeletePage( gvs, blkno, me->blkno, me->parent->blkno, myoff, me->parent->isRoot );
Assert(!isRoot);
if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
{
ginDeletePage(gvs, blkno, me->blkno, me->parent->blkno, myoff, me->parent->isRoot);
meDelete = TRUE;
}
}
}
ReleaseBuffer( buffer );
ReleaseBuffer(buffer);
if ( !meDelete )
if (!meDelete)
me->blkno = blkno;
return meDelete;
}
static void
ginVacuumPostingTree( GinVacuumState *gvs, BlockNumber rootBlkno ) {
Buffer rootBuffer = InvalidBuffer;
DataPageDeleteStack root, *ptr, *tmp;
ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
{
Buffer rootBuffer = InvalidBuffer;
DataPageDeleteStack root,
*ptr,
*tmp;
if ( ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer)==FALSE ) {
Assert( rootBuffer == InvalidBuffer );
if (ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer) == FALSE)
{
Assert(rootBuffer == InvalidBuffer);
return;
}
memset(&root,0,sizeof(DataPageDeleteStack));
memset(&root, 0, sizeof(DataPageDeleteStack));
root.blkno = rootBlkno;
root.isRoot = TRUE;
vacuum_delay_point();
ginScanToDelete( gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber );
ginScanToDelete(gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber);
ptr = root.child;
while( ptr ) {
while (ptr)
{
tmp = ptr->child;
pfree( ptr );
pfree(ptr);
ptr = tmp;
}
UnlockReleaseBuffer( rootBuffer );
UnlockReleaseBuffer(rootBuffer);
}
/*
@@ -406,48 +462,65 @@ ginVacuumPostingTree( GinVacuumState *gvs, BlockNumber rootBlkno ) {
* then page is copied into temprorary one.
*/
static Page
ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) {
Page origpage = BufferGetPage( buffer ), tmppage;
OffsetNumber i, maxoff = PageGetMaxOffsetNumber( origpage );
ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot)
{
Page origpage = BufferGetPage(buffer),
tmppage;
OffsetNumber i,
maxoff = PageGetMaxOffsetNumber(origpage);
tmppage = origpage;
*nroot=0;
*nroot = 0;
for(i=FirstOffsetNumber; i<= maxoff; i++) {
IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
for (i = FirstOffsetNumber; i <= maxoff; i++)
{
IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
if ( GinIsPostingTree(itup) ) {
/* store posting tree's roots for further processing,
we can't vacuum it just now due to risk of deadlocks with scans/inserts */
roots[ *nroot ] = GinItemPointerGetBlockNumber(&itup->t_tid);
if (GinIsPostingTree(itup))
{
/*
* store posting tree's roots for further processing, we can't
* vacuum it just now due to risk of deadlocks with scans/inserts
*/
roots[*nroot] = GinItemPointerGetBlockNumber(&itup->t_tid);
(*nroot)++;
} else if ( GinGetNPosting(itup) > 0 ) {
/* if we already create temrorary page, we will make changes in place */
ItemPointerData *cleaned = (tmppage==origpage) ? NULL : GinGetPosting(itup );
uint32 newN = ginVacuumPostingList( gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned );
if ( GinGetNPosting(itup) != newN ) {
bool isnull;
Datum value;
}
else if (GinGetNPosting(itup) > 0)
{
/*
* if we already create temrorary page, we will make changes in
* place
*/
ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup);
uint32 newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned);
if (GinGetNPosting(itup) != newN)
{
bool isnull;
Datum value;
/*
* Some ItemPointers was deleted, so we should remake our tuple
* Some ItemPointers was deleted, so we should remake our
* tuple
*/
if ( tmppage==origpage ) {
if (tmppage == origpage)
{
/*
* On first difference we create temprorary page in memory
* and copies content in to it.
*/
tmppage=GinPageGetCopyPage ( origpage );
tmppage = GinPageGetCopyPage(origpage);
if ( newN > 0 ) {
Size pos = ((char*)GinGetPosting(itup)) - ((char*)origpage);
memcpy( tmppage+pos, cleaned, sizeof(ItemPointerData)*newN );
if (newN > 0)
{
Size pos = ((char *) GinGetPosting(itup)) - ((char *) origpage);
memcpy(tmppage + pos, cleaned, sizeof(ItemPointerData) * newN);
}
pfree( cleaned );
pfree(cleaned);
/* set itup pointer to new page */
itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
@@ -457,30 +530,31 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
itup = GinFormTuple(&gvs->ginstate, value, GinGetPosting(itup), newN);
PageIndexTupleDelete(tmppage, i);
if ( PageAddItem( tmppage, (Item)itup, IndexTupleSize(itup), i, LP_USED ) != i )
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(gvs->index));
if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, LP_USED) != i)
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(gvs->index));
pfree( itup );
pfree(itup);
}
}
}
return ( tmppage==origpage ) ? NULL : tmppage;
return (tmppage == origpage) ? NULL : tmppage;
}
Datum
ginbulkdelete(PG_FUNCTION_ARGS) {
ginbulkdelete(PG_FUNCTION_ARGS)
{
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
void *callback_state = (void *) PG_GETARG_POINTER(3);
Relation index = info->index;
BlockNumber blkno = GIN_ROOT_BLKNO;
GinVacuumState gvs;
Buffer buffer;
BlockNumber rootOfPostingTree[ BLCKSZ/ (sizeof(IndexTupleData)+sizeof(ItemId)) ];
uint32 nRoot;
BlockNumber blkno = GIN_ROOT_BLKNO;
GinVacuumState gvs;
Buffer buffer;
BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
uint32 nRoot;
/* first time through? */
if (stats == NULL)
@@ -494,107 +568,117 @@ ginbulkdelete(PG_FUNCTION_ARGS) {
gvs.callback_state = callback_state;
initGinState(&gvs.ginstate, index);
buffer = ReadBuffer( index, blkno );
buffer = ReadBuffer(index, blkno);
/* find leaf page */
for(;;) {
Page page = BufferGetPage( buffer );
IndexTuple itup;
for (;;)
{
Page page = BufferGetPage(buffer);
IndexTuple itup;
LockBuffer(buffer,GIN_SHARE);
LockBuffer(buffer, GIN_SHARE);
Assert( !GinPageIsData(page) );
Assert(!GinPageIsData(page));
if ( GinPageIsLeaf(page) ) {
LockBuffer(buffer,GIN_UNLOCK);
LockBuffer(buffer,GIN_EXCLUSIVE);
if (GinPageIsLeaf(page))
{
LockBuffer(buffer, GIN_UNLOCK);
LockBuffer(buffer, GIN_EXCLUSIVE);
if ( blkno==GIN_ROOT_BLKNO && !GinPageIsLeaf(page) ) {
LockBuffer(buffer,GIN_UNLOCK);
continue; /* check it one more */
if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page))
{
LockBuffer(buffer, GIN_UNLOCK);
continue; /* check it one more */
}
break;
break;
}
Assert( PageGetMaxOffsetNumber(page) >= FirstOffsetNumber );
Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
blkno = GinItemPointerGetBlockNumber(&(itup)->t_tid);
Assert( blkno!= InvalidBlockNumber );
Assert(blkno != InvalidBlockNumber);
LockBuffer(buffer,GIN_UNLOCK);
buffer = ReleaseAndReadBuffer( buffer, index, blkno );
LockBuffer(buffer, GIN_UNLOCK);
buffer = ReleaseAndReadBuffer(buffer, index, blkno);
}
/* right now we found leftmost page in entry's BTree */
for(;;) {
Page page = BufferGetPage( buffer );
Page resPage;
uint32 i;
for (;;)
{
Page page = BufferGetPage(buffer);
Page resPage;
uint32 i;
Assert( !GinPageIsData(page) );
Assert(!GinPageIsData(page));
resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot);
blkno = GinPageGetOpaque( page )->rightlink;
blkno = GinPageGetOpaque(page)->rightlink;
if ( resPage ) {
if (resPage)
{
START_CRIT_SECTION();
PageRestoreTempPage( resPage, page );
xlogVacuumPage(gvs.index, buffer);
MarkBufferDirty( buffer );
PageRestoreTempPage(resPage, page);
xlogVacuumPage(gvs.index, buffer);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
} else {
}
else
{
UnlockReleaseBuffer(buffer);
}
vacuum_delay_point();
for(i=0; i<nRoot; i++) {
ginVacuumPostingTree( &gvs, rootOfPostingTree[i] );
for (i = 0; i < nRoot; i++)
{
ginVacuumPostingTree(&gvs, rootOfPostingTree[i]);
vacuum_delay_point();
}
if ( blkno==InvalidBlockNumber ) /*rightmost page*/
if (blkno == InvalidBlockNumber) /* rightmost page */
break;
buffer = ReadBuffer( index, blkno );
LockBuffer(buffer,GIN_EXCLUSIVE);
buffer = ReadBuffer(index, blkno);
LockBuffer(buffer, GIN_EXCLUSIVE);
}
PG_RETURN_POINTER(gvs.result);
}
Datum
ginvacuumcleanup(PG_FUNCTION_ARGS) {
Datum
ginvacuumcleanup(PG_FUNCTION_ARGS)
{
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
Relation index = info->index;
bool needLock;
BlockNumber npages,
Relation index = info->index;
bool needLock;
BlockNumber npages,
blkno;
BlockNumber totFreePages,
nFreePages,
*freePages,
maxFreePages;
maxFreePages;
BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO;
lastFilledBlock = GIN_ROOT_BLKNO;
/* Set up all-zero stats if ginbulkdelete wasn't called */
if (stats == NULL)
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
/*
* XXX we always report the heap tuple count as the number of index
* entries. This is bogus if the index is partial, but it's real hard
* to tell how many distinct heap entries are referenced by a GIN index.
* entries. This is bogus if the index is partial, but it's real hard to
* tell how many distinct heap entries are referenced by a GIN index.
*/
stats->num_index_tuples = info->num_heap_tuples;
/*
* If vacuum full, we already have exclusive lock on the index.
* Otherwise, need lock unless it's local to this backend.
* If vacuum full, we already have exclusive lock on the index. Otherwise,
* need lock unless it's local to this backend.
*/
if (info->vacuum_full)
needLock = false;
@@ -614,32 +698,38 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
totFreePages = nFreePages = 0;
freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++) {
Buffer buffer;
Page page;
for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++)
{
Buffer buffer;
Page page;
vacuum_delay_point();
buffer = ReadBuffer(index, blkno);
LockBuffer(buffer, GIN_SHARE);
page = (Page) BufferGetPage(buffer);
if ( GinPageIsDeleted(page) ) {
if (GinPageIsDeleted(page))
{
if (nFreePages < maxFreePages)
freePages[nFreePages++] = blkno;
totFreePages++;
} else
}
else
lastFilledBlock = blkno;
UnlockReleaseBuffer(buffer);
}
lastBlock = npages - 1;
if (info->vacuum_full && nFreePages > 0) {
if (info->vacuum_full && nFreePages > 0)
{
/* try to truncate index */
int i;
for (i = 0; i < nFreePages; i++)
if (freePages[i] >= lastFilledBlock) {
int i;
for (i = 0; i < nFreePages; i++)
if (freePages[i] >= lastFilledBlock)
{
totFreePages = nFreePages = i;
break;
}
@@ -661,4 +751,3 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
PG_RETURN_POINTER(stats);
}

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.4 2006/08/07 16:57:56 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.5 2006/10/04 00:29:48 momjian Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
@@ -17,12 +17,13 @@
#include "access/heapam.h"
#include "utils/memutils.h"
static MemoryContext opCtx; /* working memory for operations */
static MemoryContext opCtx; /* working memory for operations */
static MemoryContext topCtx;
typedef struct ginIncompleteSplit {
RelFileNode node;
BlockNumber leftBlkno;
typedef struct ginIncompleteSplit
{
RelFileNode node;
BlockNumber leftBlkno;
BlockNumber rightBlkno;
BlockNumber rootBlkno;
} ginIncompleteSplit;
@@ -30,10 +31,11 @@ typedef struct ginIncompleteSplit {
static List *incomplete_splits;
static void
pushIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber rightBlkno, BlockNumber rootBlkno) {
ginIncompleteSplit *split;
pushIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber rightBlkno, BlockNumber rootBlkno)
{
ginIncompleteSplit *split;
MemoryContextSwitchTo( topCtx );
MemoryContextSwitchTo(topCtx);
split = palloc(sizeof(ginIncompleteSplit));
@@ -44,17 +46,20 @@ pushIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber rightBl
incomplete_splits = lappend(incomplete_splits, split);
MemoryContextSwitchTo( opCtx );
MemoryContextSwitchTo(opCtx);
}
static void
forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno) {
forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno)
{
ListCell *l;
foreach(l, incomplete_splits) {
ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l);
foreach(l, incomplete_splits)
{
ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l);
if ( RelFileNodeEquals(node, split->node) && leftBlkno == split->leftBlkno && updateBlkno == split->rightBlkno ) {
if (RelFileNodeEquals(node, split->node) && leftBlkno == split->leftBlkno && updateBlkno == split->rightBlkno)
{
incomplete_splits = list_delete_ptr(incomplete_splits, split);
break;
}
@@ -62,7 +67,8 @@ forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updat
}
static void
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) {
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
Relation reln;
Buffer buffer;
@@ -83,9 +89,10 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) {
}
static void
ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record) {
ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree*)XLogRecGetData(record);
ItemPointerData *items = (ItemPointerData*)(XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree));
ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
{
ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
ItemPointerData *items = (ItemPointerData *) (XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree));
Relation reln;
Buffer buffer;
Page page;
@@ -95,8 +102,8 @@ ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record) {
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DATA|GIN_LEAF);
memcpy( GinDataPageGetData(page), items, sizeof(ItemPointerData) * data->nitem );
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * data->nitem);
GinPageGetOpaque(page)->maxoff = data->nitem;
PageSetLSN(page, lsn);
@@ -107,8 +114,9 @@ ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record) {
}
static void
ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) {
ginxlogInsert *data = (ginxlogInsert*)XLogRecGetData(record);
ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
{
ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
Relation reln;
Buffer buffer;
Page page;
@@ -122,64 +130,73 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) {
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
if ( data->isData ) {
Assert( data->isDelete == FALSE );
Assert( GinPageIsData( page ) );
if (data->isData)
{
Assert(data->isDelete == FALSE);
Assert(GinPageIsData(page));
if ( data->isLeaf ) {
if (data->isLeaf)
{
OffsetNumber i;
ItemPointerData *items = (ItemPointerData*)( XLogRecGetData(record) + sizeof(ginxlogInsert) );
ItemPointerData *items = (ItemPointerData *) (XLogRecGetData(record) + sizeof(ginxlogInsert));
Assert( GinPageIsLeaf( page ) );
Assert( data->updateBlkno == InvalidBlockNumber );
Assert(GinPageIsLeaf(page));
Assert(data->updateBlkno == InvalidBlockNumber);
for(i=0;i<data->nitem;i++)
GinDataPageAddItem( page, items+i, data->offset + i );
} else {
for (i = 0; i < data->nitem; i++)
GinDataPageAddItem(page, items + i, data->offset + i);
}
else
{
PostingItem *pitem;
Assert( !GinPageIsLeaf( page ) );
Assert(!GinPageIsLeaf(page));
if ( data->updateBlkno != InvalidBlockNumber ) {
/* update link to right page after split */
pitem = (PostingItem*)GinDataPageGetItem(page, data->offset);
PostingItemSetBlockNumber( pitem, data->updateBlkno );
if (data->updateBlkno != InvalidBlockNumber)
{
/* update link to right page after split */
pitem = (PostingItem *) GinDataPageGetItem(page, data->offset);
PostingItemSetBlockNumber(pitem, data->updateBlkno);
}
pitem = (PostingItem*)( XLogRecGetData(record) + sizeof(ginxlogInsert) );
pitem = (PostingItem *) (XLogRecGetData(record) + sizeof(ginxlogInsert));
GinDataPageAddItem( page, pitem, data->offset );
GinDataPageAddItem(page, pitem, data->offset);
if ( data->updateBlkno != InvalidBlockNumber )
forgetIncompleteSplit(data->node, PostingItemGetBlockNumber( pitem ), data->updateBlkno);
if (data->updateBlkno != InvalidBlockNumber)
forgetIncompleteSplit(data->node, PostingItemGetBlockNumber(pitem), data->updateBlkno);
}
} else {
IndexTuple itup;
}
else
{
IndexTuple itup;
Assert( !GinPageIsData( page ) );
Assert(!GinPageIsData(page));
if ( data->updateBlkno != InvalidBlockNumber ) {
/* update link to right page after split */
Assert( !GinPageIsLeaf( page ) );
Assert( data->offset>=FirstOffsetNumber && data->offset<=PageGetMaxOffsetNumber(page) );
if (data->updateBlkno != InvalidBlockNumber)
{
/* update link to right page after split */
Assert(!GinPageIsLeaf(page));
Assert(data->offset >= FirstOffsetNumber && data->offset <= PageGetMaxOffsetNumber(page));
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, data->offset));
ItemPointerSet(&itup->t_tid, data->updateBlkno, InvalidOffsetNumber);
}
if ( data->isDelete ) {
Assert( GinPageIsLeaf( page ) );
Assert( data->offset>=FirstOffsetNumber && data->offset<=PageGetMaxOffsetNumber(page) );
if (data->isDelete)
{
Assert(GinPageIsLeaf(page));
Assert(data->offset >= FirstOffsetNumber && data->offset <= PageGetMaxOffsetNumber(page));
PageIndexTupleDelete(page, data->offset);
}
itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogInsert) );
itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsert));
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), data->offset, LP_USED) == InvalidOffsetNumber )
elog(ERROR, "failed to add item to index page in %u/%u/%u",
data->node.spcNode, data->node.dbNode, data->node.relNode );
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), data->offset, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page in %u/%u/%u",
data->node.spcNode, data->node.dbNode, data->node.relNode);
if ( !data->isLeaf && data->updateBlkno != InvalidBlockNumber )
forgetIncompleteSplit(data->node, GinItemPointerGetBlockNumber( &itup->t_tid ), data->updateBlkno);
if (!data->isLeaf && data->updateBlkno != InvalidBlockNumber)
forgetIncompleteSplit(data->node, GinItemPointerGetBlockNumber(&itup->t_tid), data->updateBlkno);
}
PageSetLSN(page, lsn);
@@ -190,18 +207,21 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) {
}
static void
ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
ginxlogSplit *data = (ginxlogSplit*)XLogRecGetData(record);
ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
{
ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
Relation reln;
Buffer lbuffer, rbuffer;
Page lpage, rpage;
Buffer lbuffer,
rbuffer;
Page lpage,
rpage;
uint32 flags = 0;
reln = XLogOpenRelation(data->node);
if ( data->isLeaf )
if (data->isLeaf)
flags |= GIN_LEAF;
if ( data->isData )
if (data->isData)
flags |= GIN_DATA;
lbuffer = XLogReadBuffer(reln, data->lblkno, data->isRootSplit);
@@ -214,50 +234,57 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
rpage = (Page) BufferGetPage(rbuffer);
GinInitBuffer(rbuffer, flags);
GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber( rbuffer );
GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
GinPageGetOpaque(rpage)->rightlink = data->rrlink;
if ( data->isData ) {
char *ptr = XLogRecGetData(record) + sizeof(ginxlogSplit);
Size sizeofitem = GinSizeOfItem(lpage);
if (data->isData)
{
char *ptr = XLogRecGetData(record) + sizeof(ginxlogSplit);
Size sizeofitem = GinSizeOfItem(lpage);
OffsetNumber i;
ItemPointer bound;
ItemPointer bound;
for(i=0;i<data->separator;i++) {
GinDataPageAddItem( lpage, ptr, InvalidOffsetNumber );
for (i = 0; i < data->separator; i++)
{
GinDataPageAddItem(lpage, ptr, InvalidOffsetNumber);
ptr += sizeofitem;
}
for(i=data->separator;i<data->nitem;i++) {
GinDataPageAddItem( rpage, ptr, InvalidOffsetNumber );
for (i = data->separator; i < data->nitem; i++)
{
GinDataPageAddItem(rpage, ptr, InvalidOffsetNumber);
ptr += sizeofitem;
}
/* set up right key */
bound = GinDataPageGetRightBound(lpage);
if ( data->isLeaf )
*bound = *(ItemPointerData*)GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff);
if (data->isLeaf)
*bound = *(ItemPointerData *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff);
else
*bound = ((PostingItem*)GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff))->key;
*bound = ((PostingItem *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff))->key;
bound = GinDataPageGetRightBound(rpage);
*bound = data->rightbound;
} else {
IndexTuple itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogSplit) );
}
else
{
IndexTuple itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogSplit));
OffsetNumber i;
for(i=0;i<data->separator;i++) {
if ( PageAddItem( lpage, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
elog(ERROR, "failed to add item to index page in %u/%u/%u",
data->node.spcNode, data->node.dbNode, data->node.relNode );
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) );
for (i = 0; i < data->separator; i++)
{
if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page in %u/%u/%u",
data->node.spcNode, data->node.dbNode, data->node.relNode);
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
}
for(i=data->separator;i<data->nitem;i++) {
if ( PageAddItem( rpage, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
elog(ERROR, "failed to add item to index page in %u/%u/%u",
data->node.spcNode, data->node.dbNode, data->node.relNode );
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) );
for (i = data->separator; i < data->nitem; i++)
{
if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page in %u/%u/%u",
data->node.spcNode, data->node.dbNode, data->node.relNode);
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
}
}
@@ -269,20 +296,24 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
PageSetTLI(lpage, ThisTimeLineID);
MarkBufferDirty(lbuffer);
if ( !data->isLeaf && data->updateBlkno != InvalidBlockNumber )
if (!data->isLeaf && data->updateBlkno != InvalidBlockNumber)
forgetIncompleteSplit(data->node, data->leftChildBlkno, data->updateBlkno);
if ( data->isRootSplit ) {
Buffer rootBuf = XLogReadBuffer(reln, data->rootBlkno, false);
Page rootPage = BufferGetPage( rootBuf );
if (data->isRootSplit)
{
Buffer rootBuf = XLogReadBuffer(reln, data->rootBlkno, false);
Page rootPage = BufferGetPage(rootBuf);
GinInitBuffer( rootBuf, flags & ~GIN_LEAF );
GinInitBuffer(rootBuf, flags & ~GIN_LEAF);
if ( data->isData ) {
Assert( data->rootBlkno != GIN_ROOT_BLKNO );
if (data->isData)
{
Assert(data->rootBlkno != GIN_ROOT_BLKNO);
dataFillRoot(NULL, rootBuf, lbuffer, rbuffer);
} else {
Assert( data->rootBlkno == GIN_ROOT_BLKNO );
}
else
{
Assert(data->rootBlkno == GIN_ROOT_BLKNO);
entryFillRoot(NULL, rootBuf, lbuffer, rbuffer);
}
@@ -291,7 +322,8 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
MarkBufferDirty(rootBuf);
UnlockReleaseBuffer(rootBuf);
} else
}
else
pushIncompleteSplit(data->node, data->lblkno, data->rblkno, data->rootBlkno);
UnlockReleaseBuffer(rbuffer);
@@ -299,8 +331,9 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
}
static void
ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record) {
ginxlogVacuumPage *data = (ginxlogVacuumPage*)XLogRecGetData(record);
ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record)
{
ginxlogVacuumPage *data = (ginxlogVacuumPage *) XLogRecGetData(record);
Relation reln;
Buffer buffer;
Page page;
@@ -314,25 +347,30 @@ ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record) {
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
if ( GinPageIsData( page ) ) {
memcpy( GinDataPageGetData(page), XLogRecGetData(record) + sizeof(ginxlogVacuumPage),
GinSizeOfItem(page) * data->nitem );
if (GinPageIsData(page))
{
memcpy(GinDataPageGetData(page), XLogRecGetData(record) + sizeof(ginxlogVacuumPage),
GinSizeOfItem(page) *data->nitem);
GinPageGetOpaque(page)->maxoff = data->nitem;
} else {
OffsetNumber i, *tod;
IndexTuple itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogVacuumPage) );
}
else
{
OffsetNumber i,
*tod;
IndexTuple itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogVacuumPage));
tod = (OffsetNumber*)palloc( sizeof(OffsetNumber) * PageGetMaxOffsetNumber(page) );
for(i=FirstOffsetNumber;i<=PageGetMaxOffsetNumber(page);i++)
tod[i-1] = i;
tod = (OffsetNumber *) palloc(sizeof(OffsetNumber) * PageGetMaxOffsetNumber(page));
for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++)
tod[i - 1] = i;
PageIndexMultiDelete(page, tod, PageGetMaxOffsetNumber(page));
for(i=0;i<data->nitem;i++) {
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
elog(ERROR, "failed to add item to index page in %u/%u/%u",
data->node.spcNode, data->node.dbNode, data->node.relNode );
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) );
PageIndexMultiDelete(page, tod, PageGetMaxOffsetNumber(page));
for (i = 0; i < data->nitem; i++)
{
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page in %u/%u/%u",
data->node.spcNode, data->node.dbNode, data->node.relNode);
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
}
}
@@ -344,17 +382,19 @@ ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record) {
}
static void
ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) {
ginxlogDeletePage *data = (ginxlogDeletePage*)XLogRecGetData(record);
ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
{
ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
Relation reln;
Buffer buffer;
Page page;
reln = XLogOpenRelation(data->node);
if ( !( record->xl_info & XLR_BKP_BLOCK_1) ) {
if (!(record->xl_info & XLR_BKP_BLOCK_1))
{
buffer = XLogReadBuffer(reln, data->blkno, false);
page = BufferGetPage( buffer );
page = BufferGetPage(buffer);
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->flags = GIN_DELETED;
PageSetLSN(page, lsn);
@@ -363,9 +403,10 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) {
UnlockReleaseBuffer(buffer);
}
if ( !( record->xl_info & XLR_BKP_BLOCK_2) ) {
if (!(record->xl_info & XLR_BKP_BLOCK_2))
{
buffer = XLogReadBuffer(reln, data->parentBlkno, false);
page = BufferGetPage( buffer );
page = BufferGetPage(buffer);
Assert(GinPageIsData(page));
Assert(!GinPageIsLeaf(page));
PageDeletePostingItem(page, data->parentOffset);
@@ -375,9 +416,10 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) {
UnlockReleaseBuffer(buffer);
}
if ( !( record->xl_info & XLR_BKP_BLOCK_2) && data->leftBlkno != InvalidBlockNumber ) {
if (!(record->xl_info & XLR_BKP_BLOCK_2) && data->leftBlkno != InvalidBlockNumber)
{
buffer = XLogReadBuffer(reln, data->leftBlkno, false);
page = BufferGetPage( buffer );
page = BufferGetPage(buffer);
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->rightlink = data->rightLink;
PageSetLSN(page, lsn);
@@ -387,28 +429,30 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) {
}
}
void
gin_redo(XLogRecPtr lsn, XLogRecord *record) {
uint8 info = record->xl_info & ~XLR_INFO_MASK;
void
gin_redo(XLogRecPtr lsn, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
topCtx = MemoryContextSwitchTo(opCtx);
switch (info) {
case XLOG_GIN_CREATE_INDEX:
switch (info)
{
case XLOG_GIN_CREATE_INDEX:
ginRedoCreateIndex(lsn, record);
break;
case XLOG_GIN_CREATE_PTREE:
case XLOG_GIN_CREATE_PTREE:
ginRedoCreatePTree(lsn, record);
break;
case XLOG_GIN_INSERT:
case XLOG_GIN_INSERT:
ginRedoInsert(lsn, record);
break;
case XLOG_GIN_SPLIT:
case XLOG_GIN_SPLIT:
ginRedoSplit(lsn, record);
break;
case XLOG_GIN_VACUUM_PAGE:
case XLOG_GIN_VACUUM_PAGE:
ginRedoVacuumPage(lsn, record);
break;
case XLOG_GIN_DELETE_PAGE:
case XLOG_GIN_DELETE_PAGE:
ginRedoDeletePage(lsn, record);
break;
default:
@@ -419,110 +463,122 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record) {
}
static void
desc_node( StringInfo buf, RelFileNode node, BlockNumber blkno ) {
appendStringInfo(buf,"node: %u/%u/%u blkno: %u",
node.spcNode, node.dbNode, node.relNode, blkno);
desc_node(StringInfo buf, RelFileNode node, BlockNumber blkno)
{
appendStringInfo(buf, "node: %u/%u/%u blkno: %u",
node.spcNode, node.dbNode, node.relNode, blkno);
}
void
gin_desc(StringInfo buf, uint8 xl_info, char *rec) {
uint8 info = xl_info & ~XLR_INFO_MASK;
void
gin_desc(StringInfo buf, uint8 xl_info, char *rec)
{
uint8 info = xl_info & ~XLR_INFO_MASK;
switch (info) {
case XLOG_GIN_CREATE_INDEX:
appendStringInfo(buf,"Create index, ");
desc_node(buf, *(RelFileNode*)rec, GIN_ROOT_BLKNO );
switch (info)
{
case XLOG_GIN_CREATE_INDEX:
appendStringInfo(buf, "Create index, ");
desc_node(buf, *(RelFileNode *) rec, GIN_ROOT_BLKNO);
break;
case XLOG_GIN_CREATE_PTREE:
appendStringInfo(buf,"Create posting tree, ");
desc_node(buf, ((ginxlogCreatePostingTree*)rec)->node, ((ginxlogCreatePostingTree*)rec)->blkno );
case XLOG_GIN_CREATE_PTREE:
appendStringInfo(buf, "Create posting tree, ");
desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
break;
case XLOG_GIN_INSERT:
appendStringInfo(buf,"Insert item, ");
desc_node(buf, ((ginxlogInsert*)rec)->node, ((ginxlogInsert*)rec)->blkno );
appendStringInfo(buf," offset: %u nitem: %u isdata: %c isleaf %c isdelete %c updateBlkno:%u",
((ginxlogInsert*)rec)->offset,
((ginxlogInsert*)rec)->nitem,
( ((ginxlogInsert*)rec)->isData ) ? 'T' : 'F',
( ((ginxlogInsert*)rec)->isLeaf ) ? 'T' : 'F',
( ((ginxlogInsert*)rec)->isDelete ) ? 'T' : 'F',
((ginxlogInsert*)rec)->updateBlkno
);
case XLOG_GIN_INSERT:
appendStringInfo(buf, "Insert item, ");
desc_node(buf, ((ginxlogInsert *) rec)->node, ((ginxlogInsert *) rec)->blkno);
appendStringInfo(buf, " offset: %u nitem: %u isdata: %c isleaf %c isdelete %c updateBlkno:%u",
((ginxlogInsert *) rec)->offset,
((ginxlogInsert *) rec)->nitem,
(((ginxlogInsert *) rec)->isData) ? 'T' : 'F',
(((ginxlogInsert *) rec)->isLeaf) ? 'T' : 'F',
(((ginxlogInsert *) rec)->isDelete) ? 'T' : 'F',
((ginxlogInsert *) rec)->updateBlkno
);
break;
case XLOG_GIN_SPLIT:
appendStringInfo(buf,"Page split, ");
desc_node(buf, ((ginxlogSplit*)rec)->node, ((ginxlogSplit*)rec)->lblkno );
appendStringInfo(buf," isrootsplit: %c", ( ((ginxlogSplit*)rec)->isRootSplit ) ? 'T' : 'F');
case XLOG_GIN_SPLIT:
appendStringInfo(buf, "Page split, ");
desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->isRootSplit) ? 'T' : 'F');
break;
case XLOG_GIN_VACUUM_PAGE:
appendStringInfo(buf,"Vacuum page, ");
desc_node(buf, ((ginxlogVacuumPage*)rec)->node, ((ginxlogVacuumPage*)rec)->blkno );
case XLOG_GIN_VACUUM_PAGE:
appendStringInfo(buf, "Vacuum page, ");
desc_node(buf, ((ginxlogVacuumPage *) rec)->node, ((ginxlogVacuumPage *) rec)->blkno);
break;
case XLOG_GIN_DELETE_PAGE:
appendStringInfo(buf,"Delete page, ");
desc_node(buf, ((ginxlogDeletePage*)rec)->node, ((ginxlogDeletePage*)rec)->blkno );
case XLOG_GIN_DELETE_PAGE:
appendStringInfo(buf, "Delete page, ");
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
break;
default:
elog(PANIC, "gin_desc: unknown op code %u", info);
}
}
void
gin_xlog_startup(void) {
void
gin_xlog_startup(void)
{
incomplete_splits = NIL;
opCtx = AllocSetContextCreate(CurrentMemoryContext,
"GIN recovery temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
"GIN recovery temporary context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
}
static void
ginContinueSplit( ginIncompleteSplit *split ) {
ginContinueSplit(ginIncompleteSplit *split)
{
GinBtreeData btree;
Relation reln;
Buffer buffer;
GinBtreeStack stack;
GinBtreeStack stack;
/* elog(NOTICE,"ginContinueSplit root:%u l:%u r:%u", split->rootBlkno, split->leftBlkno, split->rightBlkno); */
/*
* elog(NOTICE,"ginContinueSplit root:%u l:%u r:%u", split->rootBlkno,
* split->leftBlkno, split->rightBlkno);
*/
reln = XLogOpenRelation(split->node);
buffer = XLogReadBuffer(reln, split->leftBlkno, false);
buffer = XLogReadBuffer(reln, split->leftBlkno, false);
if ( split->rootBlkno == GIN_ROOT_BLKNO ) {
prepareEntryScan( &btree, reln, (Datum)0, NULL );
btree.entry = ginPageGetLinkItup( buffer );
} else {
Page page = BufferGetPage( buffer );
if (split->rootBlkno == GIN_ROOT_BLKNO)
{
prepareEntryScan(&btree, reln, (Datum) 0, NULL);
btree.entry = ginPageGetLinkItup(buffer);
}
else
{
Page page = BufferGetPage(buffer);
prepareDataScan( &btree, reln );
prepareDataScan(&btree, reln);
PostingItemSetBlockNumber( &(btree.pitem), split->leftBlkno );
if ( GinPageIsLeaf(page) )
btree.pitem.key = *(ItemPointerData*)GinDataPageGetItem(page,
GinPageGetOpaque(page)->maxoff);
PostingItemSetBlockNumber(&(btree.pitem), split->leftBlkno);
if (GinPageIsLeaf(page))
btree.pitem.key = *(ItemPointerData *) GinDataPageGetItem(page,
GinPageGetOpaque(page)->maxoff);
else
btree.pitem.key = ((PostingItem*)GinDataPageGetItem(page,
GinPageGetOpaque(page)->maxoff))->key;
btree.pitem.key = ((PostingItem *) GinDataPageGetItem(page,
GinPageGetOpaque(page)->maxoff))->key;
}
btree.rightblkno = split->rightBlkno;
btree.rightblkno = split->rightBlkno;
stack.blkno = split->leftBlkno;
stack.buffer = buffer;
stack.off = InvalidOffsetNumber;
stack.parent = NULL;
findParents( &btree, &stack, split->rootBlkno);
ginInsertValue( &btree, stack.parent );
findParents(&btree, &stack, split->rootBlkno);
ginInsertValue(&btree, stack.parent);
UnlockReleaseBuffer( buffer );
UnlockReleaseBuffer(buffer);
}
void
gin_xlog_cleanup(void) {
void
gin_xlog_cleanup(void)
{
ListCell *l;
MemoryContext topCtx;
@@ -531,8 +587,9 @@ gin_xlog_cleanup(void) {
foreach(l, incomplete_splits)
{
ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l);
ginContinueSplit( split );
MemoryContextReset( opCtx );
ginContinueSplit(split);
MemoryContextReset(opCtx);
}
MemoryContextSwitchTo(topCtx);

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.142 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.143 2006/10/04 00:29:48 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -185,7 +185,7 @@ gistbuildCallback(Relation index,
/* form an index tuple and point it at the heap tuple */
itup = gistFormTuple(&buildstate->giststate, index,
values, isnull, true /* size is currently bogus */);
values, isnull, true /* size is currently bogus */ );
itup->t_tid = htup->t_self;
/*
@@ -199,7 +199,7 @@ gistbuildCallback(Relation index,
* after initial build do not.
*/
gistdoinsert(index, itup,
RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
&buildstate->giststate);
buildstate->indtuples += 1;
@@ -236,7 +236,7 @@ gistinsert(PG_FUNCTION_ARGS)
initGISTstate(&giststate, r);
itup = gistFormTuple(&giststate, r,
values, isnull, true /* size is currently bogus */);
values, isnull, true /* size is currently bogus */ );
itup->t_tid = *ht_ctid;
gistdoinsert(r, itup, 0, &giststate);
@@ -285,18 +285,17 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
bool is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false;
/*
* if (!is_leaf) remove old key:
* This node's key has been modified, either because a child split
* occurred or because we needed to adjust our key for an insert in a
* child node. Therefore, remove the old version of this node's key.
* if (!is_leaf) remove old key: This node's key has been modified, either
* because a child split occurred or because we needed to adjust our key
* for an insert in a child node. Therefore, remove the old version of
* this node's key.
*
* for WAL replay, in the non-split case we handle this by
* setting up a one-element todelete array; in the split case, it's
* handled implicitly because the tuple vector passed to gistSplit
* won't include this tuple.
* for WAL replay, in the non-split case we handle this by setting up a
* one-element todelete array; in the split case, it's handled implicitly
* because the tuple vector passed to gistSplit won't include this tuple.
*
* XXX: If we want to change fillfactors between node and leaf,
* fillfactor = (is_leaf ? state->leaf_fillfactor : state->node_fillfactor)
* XXX: If we want to change fillfactors between node and leaf, fillfactor
* = (is_leaf ? state->leaf_fillfactor : state->node_fillfactor)
*/
if (gistnospace(state->stack->page, state->itup, state->ituplen,
is_leaf ? InvalidOffsetNumber : state->stack->childoffnum,
@@ -307,80 +306,88 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
int tlen;
SplitedPageLayout *dist = NULL,
*ptr;
BlockNumber rrlink = InvalidBlockNumber;
BlockNumber rrlink = InvalidBlockNumber;
GistNSN oldnsn;
is_splitted = true;
/*
* Form index tuples vector to split:
* remove old tuple if t's needed and add new tuples to vector
* Form index tuples vector to split: remove old tuple if t's needed
* and add new tuples to vector
*/
itvec = gistextractpage(state->stack->page, &tlen);
if ( !is_leaf ) {
if (!is_leaf)
{
/* on inner page we should remove old tuple */
int pos = state->stack->childoffnum - FirstOffsetNumber;
int pos = state->stack->childoffnum - FirstOffsetNumber;
tlen--;
if ( pos != tlen )
memmove( itvec+pos, itvec + pos + 1, sizeof( IndexTuple ) * (tlen-pos) );
tlen--;
if (pos != tlen)
memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
}
itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen);
dist = gistSplit(state->r, state->stack->page, itvec, tlen, giststate);
state->itup = (IndexTuple*)palloc( sizeof(IndexTuple) * tlen);
state->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * tlen);
state->ituplen = 0;
if (state->stack->blkno != GIST_ROOT_BLKNO) {
/* if non-root split then we should not allocate new buffer,
but we must create temporary page to operate */
if (state->stack->blkno != GIST_ROOT_BLKNO)
{
/*
* if non-root split then we should not allocate new buffer, but
* we must create temporary page to operate
*/
dist->buffer = state->stack->buffer;
dist->page = PageGetTempPage( BufferGetPage(dist->buffer), sizeof(GISTPageOpaqueData) );
dist->page = PageGetTempPage(BufferGetPage(dist->buffer), sizeof(GISTPageOpaqueData));
/*clean all flags except F_LEAF */
/* clean all flags except F_LEAF */
GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
}
/* make new pages and fills them */
for (ptr = dist; ptr; ptr = ptr->next) {
int i;
char *data;
for (ptr = dist; ptr; ptr = ptr->next)
{
int i;
char *data;
/* get new page */
if ( ptr->buffer == InvalidBuffer ) {
ptr->buffer = gistNewBuffer( state->r );
GISTInitBuffer( ptr->buffer, (is_leaf) ? F_LEAF : 0 );
if (ptr->buffer == InvalidBuffer)
{
ptr->buffer = gistNewBuffer(state->r);
GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
ptr->page = BufferGetPage(ptr->buffer);
}
ptr->block.blkno = BufferGetBlockNumber( ptr->buffer );
ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
/* fill page, we can do it becouse all this pages are new (ie not linked in tree
or masked by temp page */
data = (char*)(ptr->list);
for(i=0;i<ptr->block.num;i++) {
if ( PageAddItem(ptr->page, (Item)data, IndexTupleSize((IndexTuple)data), i+FirstOffsetNumber, LP_USED) == InvalidOffsetNumber )
/*
* fill page, we can do it becouse all this pages are new (ie not
* linked in tree or masked by temp page
*/
data = (char *) (ptr->list);
for (i = 0; i < ptr->block.num; i++)
{
if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->r));
data += IndexTupleSize((IndexTuple)data);
data += IndexTupleSize((IndexTuple) data);
}
/* set up ItemPointer and remmeber it for parent */
ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
state->itup[ state->ituplen ] = ptr->itup;
state->itup[state->ituplen] = ptr->itup;
state->ituplen++;
}
/* saves old rightlink */
if ( state->stack->blkno != GIST_ROOT_BLKNO )
rrlink = GistPageGetOpaque(dist->page)->rightlink;
if (state->stack->blkno != GIST_ROOT_BLKNO)
rrlink = GistPageGetOpaque(dist->page)->rightlink;
START_CRIT_SECTION();
/*
* must mark buffers dirty before XLogInsert, even though we'll
* still be changing their opaque fields below.
* set up right links.
* must mark buffers dirty before XLogInsert, even though we'll still
* be changing their opaque fields below. set up right links.
*/
for (ptr = dist; ptr; ptr = ptr->next)
for (ptr = dist; ptr; ptr = ptr->next)
{
MarkBufferDirty(ptr->buffer);
GistPageGetOpaque(ptr->page)->rightlink = (ptr->next) ?
@@ -388,9 +395,10 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
}
/* restore splitted non-root page */
if ( state->stack->blkno != GIST_ROOT_BLKNO ) {
PageRestoreTempPage( dist->page, BufferGetPage( dist->buffer ) );
dist->page = BufferGetPage( dist->buffer );
if (state->stack->blkno != GIST_ROOT_BLKNO)
{
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
}
if (!state->r->rd_istemp)
@@ -419,25 +427,27 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
/* set up NSN */
oldnsn = GistPageGetOpaque(dist->page)->nsn;
if ( state->stack->blkno == GIST_ROOT_BLKNO )
if (state->stack->blkno == GIST_ROOT_BLKNO)
/* if root split we should put initial value */
oldnsn = PageGetLSN(dist->page);
for (ptr = dist; ptr; ptr = ptr->next) {
for (ptr = dist; ptr; ptr = ptr->next)
{
/* only for last set oldnsn */
GistPageGetOpaque(ptr->page)->nsn = (ptr->next) ?
PageGetLSN(ptr->page) : oldnsn;
}
/*
* release buffers, if it was a root split then
* release all buffers because we create all buffers
/*
* release buffers, if it was a root split then release all buffers
* because we create all buffers
*/
ptr = ( state->stack->blkno == GIST_ROOT_BLKNO ) ? dist : dist->next;
for(; ptr; ptr = ptr->next)
ptr = (state->stack->blkno == GIST_ROOT_BLKNO) ? dist : dist->next;
for (; ptr; ptr = ptr->next)
UnlockReleaseBuffer(ptr->buffer);
if (state->stack->blkno == GIST_ROOT_BLKNO) {
if (state->stack->blkno == GIST_ROOT_BLKNO)
{
gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key));
state->needInsertComplete = false;
}
@@ -470,7 +480,7 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
}
rdata = formUpdateRdata(state->r->rd_node, state->stack->buffer,
offs, noffs,
offs, noffs,
state->itup, state->ituplen,
&(state->key));
@@ -922,16 +932,16 @@ gistSplit(Relation r,
GistSplitVector v;
GistEntryVector *entryvec;
int i;
SplitedPageLayout *res = NULL;
SplitedPageLayout *res = NULL;
/* generate the item array */
entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
entryvec->n = len + 1;
memset( v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts );
memset( v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts );
gistSplitByKey(r, page, itup, len, giststate,
&v, entryvec, 0);
memset(v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
memset(v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
gistSplitByKey(r, page, itup, len, giststate,
&v, entryvec, 0);
/* form left and right vector */
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
@@ -952,19 +962,20 @@ gistSplit(Relation r,
{
ROTATEDIST(res);
res->block.num = v.splitVector.spl_nright;
res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &( res->lenlist ) );
res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
res->itup = (v.spl_rightvalid) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false)
: gist_form_invalid_tuple(GIST_ROOT_BLKNO);
}
if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
{
SplitedPageLayout *resptr, *subres;
SplitedPageLayout *resptr,
*subres;
resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
/* install on list's tail */
while( resptr->next )
/* install on list's tail */
while (resptr->next)
resptr = resptr->next;
resptr->next = res;
@@ -974,7 +985,7 @@ gistSplit(Relation r,
{
ROTATEDIST(res);
res->block.num = v.splitVector.spl_nleft;
res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &( res->lenlist ) );
res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
res->itup = (v.spl_leftvalid) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false)
: gist_form_invalid_tuple(GIST_ROOT_BLKNO);
}

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.60 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.61 2006/10/04 00:29:48 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -360,8 +360,7 @@ gistindex_keytest(IndexTuple tuple,
IncrIndexProcessed();
/*
* Tuple doesn't restore after crash recovery because of incomplete
* insert
* Tuple doesn't restore after crash recovery because of incomplete insert
*/
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
return true;
@@ -378,14 +377,18 @@ gistindex_keytest(IndexTuple tuple,
giststate->tupdesc,
&isNull);
if ( key->sk_flags & SK_ISNULL ) {
/* is the compared-to datum NULL? on non-leaf page it's possible
to have nulls in childs :( */
if (key->sk_flags & SK_ISNULL)
{
/*
* is the compared-to datum NULL? on non-leaf page it's possible
* to have nulls in childs :(
*/
if ( isNull || !GistPageIsLeaf(p) )
if (isNull || !GistPageIsLeaf(p))
return true;
return false;
} else if ( isNull )
}
else if (isNull)
return false;
gistdentryinit(giststate, key->sk_attno - 1, &de,

View File

@@ -10,7 +10,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.8 2006/09/10 00:29:34 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.9 2006/10/04 00:29:48 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -112,7 +112,8 @@ gist_box_consistent(PG_FUNCTION_ARGS)
}
static void
adjustBox( BOX *b, BOX *addon ) {
adjustBox(BOX *b, BOX *addon)
{
if (b->high.x < addon->high.x)
b->high.x = addon->high.x;
if (b->low.x > addon->low.x)
@@ -146,7 +147,7 @@ gist_box_union(PG_FUNCTION_ARGS)
for (i = 1; i < numranges; i++)
{
cur = DatumGetBoxP(entryvec->vector[i].key);
adjustBox( pageunion, cur );
adjustBox(pageunion, cur);
}
*sizep = sizeof(BOX);
@@ -210,67 +211,79 @@ compare_KB(const void *a, const void *b)
}
static void
chooseLR( GIST_SPLITVEC *v,
OffsetNumber *list1, int nlist1, BOX *union1,
OffsetNumber *list2, int nlist2, BOX *union2 )
chooseLR(GIST_SPLITVEC *v,
OffsetNumber *list1, int nlist1, BOX *union1,
OffsetNumber *list2, int nlist2, BOX *union2)
{
bool firstToLeft = true;
bool firstToLeft = true;
if ( v->spl_ldatum_exists || v->spl_rdatum_exists ) {
if ( v->spl_ldatum_exists && v->spl_rdatum_exists ) {
BOX LRl = *union1, LRr = *union2;
BOX RLl = *union2, RLr = *union1;
double sizeLR, sizeRL;
if (v->spl_ldatum_exists || v->spl_rdatum_exists)
{
if (v->spl_ldatum_exists && v->spl_rdatum_exists)
{
BOX LRl = *union1,
LRr = *union2;
BOX RLl = *union2,
RLr = *union1;
double sizeLR,
sizeRL;
adjustBox( &LRl, DatumGetBoxP( v->spl_ldatum ) );
adjustBox( &LRr, DatumGetBoxP( v->spl_rdatum ) );
adjustBox( &RLl, DatumGetBoxP( v->spl_ldatum ) );
adjustBox( &RLr, DatumGetBoxP( v->spl_rdatum ) );
adjustBox(&LRl, DatumGetBoxP(v->spl_ldatum));
adjustBox(&LRr, DatumGetBoxP(v->spl_rdatum));
adjustBox(&RLl, DatumGetBoxP(v->spl_ldatum));
adjustBox(&RLr, DatumGetBoxP(v->spl_rdatum));
sizeLR = size_box( DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr)) );
sizeRL = size_box( DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr)) );
sizeLR = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr)));
sizeRL = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr)));
if ( sizeLR > sizeRL )
if (sizeLR > sizeRL)
firstToLeft = false;
} else {
float p1, p2;
GISTENTRY oldUnion, addon;
}
else
{
float p1,
p2;
GISTENTRY oldUnion,
addon;
gistentryinit(oldUnion, ( v->spl_ldatum_exists ) ? v->spl_ldatum : v->spl_rdatum,
gistentryinit(oldUnion, (v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum,
NULL, NULL, InvalidOffsetNumber, FALSE);
gistentryinit(addon, BoxPGetDatum(union1), NULL, NULL, InvalidOffsetNumber, FALSE);
DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&union1), PointerGetDatum(&p1));
DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&union1), PointerGetDatum(&p1));
gistentryinit(addon, BoxPGetDatum(union2), NULL, NULL, InvalidOffsetNumber, FALSE);
DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&union2), PointerGetDatum(&p2));
if ( (v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2) )
firstToLeft = false;
if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2))
firstToLeft = false;
}
}
if ( firstToLeft ) {
if (firstToLeft)
{
v->spl_left = list1;
v->spl_right = list2;
v->spl_nleft = nlist1;
v->spl_nright = nlist2;
if ( v->spl_ldatum_exists )
adjustBox(union1, DatumGetBoxP( v->spl_ldatum ) );
if (v->spl_ldatum_exists)
adjustBox(union1, DatumGetBoxP(v->spl_ldatum));
v->spl_ldatum = BoxPGetDatum(union1);
if ( v->spl_rdatum_exists )
adjustBox(union2, DatumGetBoxP( v->spl_rdatum ) );
if (v->spl_rdatum_exists)
adjustBox(union2, DatumGetBoxP(v->spl_rdatum));
v->spl_rdatum = BoxPGetDatum(union2);
} else {
}
else
{
v->spl_left = list2;
v->spl_right = list1;
v->spl_nleft = nlist2;
v->spl_nright = nlist1;
if ( v->spl_ldatum_exists )
adjustBox(union2, DatumGetBoxP( v->spl_ldatum ) );
if (v->spl_ldatum_exists)
adjustBox(union2, DatumGetBoxP(v->spl_ldatum));
v->spl_ldatum = BoxPGetDatum(union2);
if ( v->spl_rdatum_exists )
adjustBox(union1, DatumGetBoxP( v->spl_rdatum ) );
if (v->spl_rdatum_exists)
adjustBox(union1, DatumGetBoxP(v->spl_rdatum));
v->spl_rdatum = BoxPGetDatum(union1);
}
@@ -326,7 +339,7 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
))
allisequal = false;
adjustBox( &pageunion, cur );
adjustBox(&pageunion, cur);
}
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
@@ -359,12 +372,12 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
}
}
if ( v->spl_ldatum_exists )
adjustBox( unionL, DatumGetBoxP( v->spl_ldatum ) );
if (v->spl_ldatum_exists)
adjustBox(unionL, DatumGetBoxP(v->spl_ldatum));
v->spl_ldatum = BoxPGetDatum(unionL);
if ( v->spl_rdatum_exists )
adjustBox( unionR, DatumGetBoxP( v->spl_rdatum ) );
if (v->spl_rdatum_exists)
adjustBox(unionR, DatumGetBoxP(v->spl_rdatum));
v->spl_rdatum = BoxPGetDatum(unionR);
v->spl_ldatum_exists = v->spl_rdatum_exists = false;
@@ -471,13 +484,13 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
}
if (direction == 'x')
chooseLR( v,
listL, posL, unionL,
listR, posR, unionR );
else
chooseLR( v,
listB, posB, unionB,
listT, posT, unionT );
chooseLR(v,
listL, posL, unionL,
listR, posR, unionR);
else
chooseLR(v,
listB, posB, unionB,
listT, posT, unionT);
PG_RETURN_POINTER(v);
}

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.64 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.65 2006/10/04 00:29:48 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -228,12 +228,12 @@ gistendscan(PG_FUNCTION_ARGS)
static void
gistfreestack(GISTSearchStack *s)
{
{
while (s != NULL)
{
GISTSearchStack *p = s->next;
pfree(s);
s = p;
}
}

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistsplit.c,v 1.2 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistsplit.c,v 1.3 2006/10/04 00:29:48 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,12 +16,13 @@
#include "access/gist_private.h"
typedef struct {
Datum *attr;
int len;
typedef struct
{
Datum *attr;
int len;
OffsetNumber *entries;
bool *isnull;
bool *equiv;
bool *isnull;
bool *equiv;
} GistSplitUnion;
@@ -29,25 +30,28 @@ typedef struct {
* Forms unions of subkeys after page split, but
* uses only tuples aren't in groups of equalent tuples
*/
static void
gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
GistSplitUnion *gsvp, int startkey) {
IndexTuple *cleanedItVec;
int i, cleanedLen=0;
static void
gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
GistSplitUnion *gsvp, int startkey)
{
IndexTuple *cleanedItVec;
int i,
cleanedLen = 0;
cleanedItVec = (IndexTuple*)palloc(sizeof(IndexTuple) * gsvp->len);
cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
for(i=0;i<gsvp->len;i++) {
if ( gsvp->equiv && gsvp->equiv[gsvp->entries[i]])
for (i = 0; i < gsvp->len; i++)
{
if (gsvp->equiv && gsvp->equiv[gsvp->entries[i]])
continue;
cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
}
gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, startkey,
gsvp->attr, gsvp->isnull);
gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, startkey,
gsvp->attr, gsvp->isnull);
pfree( cleanedItVec );
pfree(cleanedItVec);
}
/*
@@ -56,7 +60,7 @@ gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
static void
gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl, int attno)
{
GistSplitUnion gsvp;
GistSplitUnion gsvp;
gsvp.equiv = spl->spl_equiv;
@@ -76,34 +80,40 @@ gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl, i
}
/*
* find group in vector with equivalent value
* find group in vector with equivalent value
*/
static int
gistfindgroup(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, GistSplitVector *spl, int attno)
{
int i;
GISTENTRY entry;
int len=0;
int len = 0;
/*
* attno key is always not null (see gistSplitByKey), so we may not check for
* nulls
* attno key is always not null (see gistSplitByKey), so we may not check
* for nulls
*/
gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, (OffsetNumber) 0, FALSE);
for (i = 0; i < spl->splitVector.spl_nleft; i++) {
float penalty = gistpenalty(giststate, attno, &entry, false,
&valvec[spl->splitVector.spl_left[i]], false);
if ( penalty == 0.0 ) {
for (i = 0; i < spl->splitVector.spl_nleft; i++)
{
float penalty = gistpenalty(giststate, attno, &entry, false,
&valvec[spl->splitVector.spl_left[i]], false);
if (penalty == 0.0)
{
spl->spl_equiv[spl->splitVector.spl_left[i]] = true;
len++;
}
}
gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, (OffsetNumber) 0, FALSE);
for (i = 0; i < spl->splitVector.spl_nright; i++) {
float penalty = gistpenalty(giststate, attno, &entry, false,
&valvec[spl->splitVector.spl_right[i]], false);
if ( penalty == 0.0 ) {
for (i = 0; i < spl->splitVector.spl_nright; i++)
{
float penalty = gistpenalty(giststate, attno, &entry, false,
&valvec[spl->splitVector.spl_right[i]], false);
if (penalty == 0.0)
{
spl->spl_equiv[spl->splitVector.spl_right[i]] = true;
len++;
}
@@ -113,24 +123,32 @@ gistfindgroup(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, GistSplitVect
}
static void
cleanupOffsets( OffsetNumber *a, int *len, bool *equiv, int *LenEquiv ) {
int curlen,i;
OffsetNumber *curwpos;
cleanupOffsets(OffsetNumber *a, int *len, bool *equiv, int *LenEquiv)
{
int curlen,
i;
OffsetNumber *curwpos;
curlen = *len;
curwpos = a;
for (i = 0; i < *len; i++) {
if ( equiv[ a[i] ] == FALSE ) {
for (i = 0; i < *len; i++)
{
if (equiv[a[i]] == FALSE)
{
*curwpos = a[i];
curwpos++;
} else {
}
else
{
/* corner case: we shouldn't make void array */
if ( curlen==1 ) {
equiv[ a[i] ] = FALSE; /* mark item as non-equivalent */
i--; /* redo the same */
if (curlen == 1)
{
equiv[a[i]] = FALSE; /* mark item as non-equivalent */
i--; /* redo the same */
*LenEquiv -= 1;
continue;
} else
}
else
curlen--;
}
}
@@ -139,33 +157,37 @@ cleanupOffsets( OffsetNumber *a, int *len, bool *equiv, int *LenEquiv ) {
}
static void
placeOne( Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno ) {
placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno)
{
GISTENTRY identry[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
bool toLeft = true;
bool toLeft = true;
gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, identry, isnull);
for(;attno<giststate->tupdesc->natts;attno++) {
float lpenalty, rpenalty;
for (; attno < giststate->tupdesc->natts; attno++)
{
float lpenalty,
rpenalty;
GISTENTRY entry;
gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry+attno, isnull[ attno ]);
gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry+attno, isnull[ attno ]);
gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry + attno, isnull[attno]);
gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry + attno, isnull[attno]);
if ( lpenalty != rpenalty ) {
if ( lpenalty > rpenalty )
if (lpenalty != rpenalty)
{
if (lpenalty > rpenalty)
toLeft = false;
break;
}
}
if ( toLeft )
v->splitVector.spl_left[ v->splitVector.spl_nleft++ ] = off;
if (toLeft)
v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
else
v->splitVector.spl_right[ v->splitVector.spl_nright++ ] = off;
v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
}
#define SWAPVAR( s, d, t ) \
@@ -176,71 +198,83 @@ do { \
} while(0)
/*
* adjust left and right unions according to splits by previous
* split by firsts columns. This function is called only in case
* adjust left and right unions according to splits by previous
* split by firsts columns. This function is called only in case
* when pickSplit doesn't support subspplit.
*/
static void
supportSecondarySplit( Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC *sv, Datum oldL, Datum oldR ) {
bool leaveOnLeft = true, tmpBool;
GISTENTRY entryL, entryR, entrySL, entrySR;
gistentryinit(entryL, oldL, r, NULL, 0, FALSE);
gistentryinit(entryR, oldR, r, NULL, 0, FALSE);
gistentryinit(entrySL, sv->spl_ldatum , r, NULL, 0, FALSE);
gistentryinit(entrySR, sv->spl_rdatum , r, NULL, 0, FALSE);
supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
{
bool leaveOnLeft = true,
tmpBool;
GISTENTRY entryL,
entryR,
entrySL,
entrySR;
if ( sv->spl_ldatum_exists && sv->spl_rdatum_exists ) {
float penalty1, penalty2;
gistentryinit(entryL, oldL, r, NULL, 0, FALSE);
gistentryinit(entryR, oldR, r, NULL, 0, FALSE);
gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
{
float penalty1,
penalty2;
penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
if ( penalty1 > penalty2 )
if (penalty1 > penalty2)
leaveOnLeft = false;
} else {
GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
float penalty1, penalty2;
}
else
{
GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
float penalty1,
penalty2;
/*
* there is only one previously defined union,
* so we just choose swap or not by lowest penalty
* there is only one previously defined union, so we just choose swap
* or not by lowest penalty
*/
penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
if ( penalty1 < penalty2 )
leaveOnLeft = ( sv->spl_ldatum_exists ) ? true : false;
if (penalty1 < penalty2)
leaveOnLeft = (sv->spl_ldatum_exists) ? true : false;
else
leaveOnLeft = ( sv->spl_rdatum_exists ) ? true : false;
leaveOnLeft = (sv->spl_rdatum_exists) ? true : false;
}
if ( leaveOnLeft == false ) {
if (leaveOnLeft == false)
{
/*
* swap left and right
* swap left and right
*/
OffsetNumber *off, noff;
Datum datum;
OffsetNumber *off,
noff;
Datum datum;
SWAPVAR( sv->spl_left, sv->spl_right, off );
SWAPVAR( sv->spl_nleft, sv->spl_nright, noff );
SWAPVAR( sv->spl_ldatum, sv->spl_rdatum, datum );
gistentryinit(entrySL, sv->spl_ldatum , r, NULL, 0, FALSE);
gistentryinit(entrySR, sv->spl_rdatum , r, NULL, 0, FALSE);
SWAPVAR(sv->spl_left, sv->spl_right, off);
SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
}
if ( sv->spl_ldatum_exists )
if (sv->spl_ldatum_exists)
gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
&sv->spl_ldatum, &tmpBool);
&sv->spl_ldatum, &tmpBool);
if ( sv->spl_rdatum_exists )
if (sv->spl_rdatum_exists)
gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
&sv->spl_rdatum, &tmpBool);
&sv->spl_rdatum, &tmpBool);
sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
}
@@ -251,20 +285,21 @@ supportSecondarySplit( Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVE
* get better split.
* Returns TRUE and v->spl_equiv = NULL if left and right unions of attno columns are the same,
* so caller may find better split
* Returns TRUE and v->spl_equiv != NULL if there is tuples which may be freely moved
* Returns TRUE and v->spl_equiv != NULL if there is tuples which may be freely moved
*/
static bool
gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
IndexTuple *itup, int len, GISTSTATE *giststate)
{
GIST_SPLITVEC *sv = &v->splitVector;
/*
* now let the user-defined picksplit function set up the split vector; in
* entryvec have no null value!!
*/
sv->spl_ldatum_exists = ( v->spl_lisnull[ attno ] ) ? false : true;
sv->spl_rdatum_exists = ( v->spl_risnull[ attno ] ) ? false : true;
sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
sv->spl_ldatum = v->spl_lattr[attno];
sv->spl_rdatum = v->spl_rattr[attno];
@@ -278,11 +313,12 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
if( sv->spl_ldatum_exists || sv->spl_rdatum_exists ) {
elog(LOG,"PickSplit method of %d columns of index '%s' doesn't support secondary split",
attno + 1, RelationGetRelationName(r) );
if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
{
elog(LOG, "PickSplit method of %d columns of index '%s' doesn't support secondary split",
attno + 1, RelationGetRelationName(r));
supportSecondarySplit( r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno] );
supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
}
v->spl_lattr[attno] = sv->spl_ldatum;
@@ -296,53 +332,64 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
*/
v->spl_equiv = NULL;
if (giststate->tupdesc->natts > 1 && attno+1 != giststate->tupdesc->natts)
if (giststate->tupdesc->natts > 1 && attno + 1 != giststate->tupdesc->natts)
{
if ( gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum) ) {
if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
{
/*
* Left and right key's unions are equial, so
* we can get better split by following columns. Note,
* unions for attno columns are already done.
* Left and right key's unions are equial, so we can get better
* split by following columns. Note, unions for attno columns are
* already done.
*/
return true;
} else {
}
else
{
int LenEquiv;
v->spl_equiv = (bool *) palloc0(sizeof(bool) * (entryvec->n+1));
v->spl_equiv = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
LenEquiv = gistfindgroup(r, giststate, entryvec->vector, v, attno);
/*
* if possible, we should distribute equivalent tuples
*/
if (LenEquiv == 0 ) {
* if possible, we should distribute equivalent tuples
*/
if (LenEquiv == 0)
{
gistunionsubkey(giststate, itup, v, attno + 1);
} else {
cleanupOffsets( sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv );
cleanupOffsets( sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv );
}
else
{
cleanupOffsets(sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv);
cleanupOffsets(sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv);
gistunionsubkey(giststate, itup, v, attno + 1);
if (LenEquiv == 1 ) {
if (LenEquiv == 1)
{
/*
* In case with one tuple we just choose left-right
* by penalty. It's simplify user-defined pickSplit
* In case with one tuple we just choose left-right by
* penalty. It's simplify user-defined pickSplit
*/
OffsetNumber toMove = InvalidOffsetNumber;
for(toMove=FirstOffsetNumber;toMove<entryvec->n;toMove++)
if ( v->spl_equiv[ toMove ] )
for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
if (v->spl_equiv[toMove])
break;
Assert( toMove < entryvec->n );
placeOne( r, giststate, v, itup[ toMove-1 ], toMove, attno+1 );
/* redo gistunionsubkey(): it will not degradate performance,
* because it's very rarely */
Assert(toMove < entryvec->n);
placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
/*
* redo gistunionsubkey(): it will not degradate
* performance, because it's very rarely
*/
v->spl_equiv = NULL;
gistunionsubkey(giststate, itup, v, attno + 1);
return false;
} else if ( LenEquiv > 1 )
}
else if (LenEquiv > 1)
return true;
}
}
@@ -352,60 +399,65 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
}
/*
* simple split page
* simple split page
*/
static void
gistSplitHalf(GIST_SPLITVEC *v, int len) {
int i;
gistSplitHalf(GIST_SPLITVEC *v, int len)
{
int i;
v->spl_nright = v->spl_nleft = 0;
v->spl_nright = v->spl_nleft = 0;
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->spl_right= (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
for(i = 1; i <= len; i++)
if ( i<len/2 )
v->spl_right[ v->spl_nright++ ] = i;
v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
for (i = 1; i <= len; i++)
if (i < len / 2)
v->spl_right[v->spl_nright++] = i;
else
v->spl_left[ v->spl_nleft++ ] = i;
v->spl_left[v->spl_nleft++] = i;
}
/*
* if it was invalid tuple then we need special processing.
* We move all invalid tuples on right page.
* We move all invalid tuples on right page.
*
* if there is no place on left page, gistSplit will be called one more
* if there is no place on left page, gistSplit will be called one more
* time for left page.
*
* Normally, we never exec this code, but after crash replay it's possible
* to get 'invalid' tuples (probability is low enough)
*/
static void
gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, int len) {
int i;
static OffsetNumber offInvTuples[ MaxOffsetNumber ];
int nOffInvTuples = 0;
gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, int len)
{
int i;
static OffsetNumber offInvTuples[MaxOffsetNumber];
int nOffInvTuples = 0;
for (i = 1; i <= len; i++)
if ( GistTupleIsInvalid(itup[i - 1]) )
offInvTuples[ nOffInvTuples++ ] = i;
if (GistTupleIsInvalid(itup[i - 1]))
offInvTuples[nOffInvTuples++] = i;
if ( nOffInvTuples == len ) {
if (nOffInvTuples == len)
{
/* corner case, all tuples are invalid */
v->spl_rightvalid= v->spl_leftvalid = false;
gistSplitHalf( &v->splitVector, len );
} else {
GistSplitUnion gsvp;
v->spl_rightvalid = v->spl_leftvalid = false;
gistSplitHalf(&v->splitVector, len);
}
else
{
GistSplitUnion gsvp;
v->splitVector.spl_right = offInvTuples;
v->splitVector.spl_nright = nOffInvTuples;
v->spl_rightvalid = false;
v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->splitVector.spl_nleft = 0;
for(i = 1; i <= len; i++)
if ( !GistTupleIsInvalid(itup[i - 1]) )
v->splitVector.spl_left[ v->splitVector.spl_nleft++ ] = i;
for (i = 1; i <= len; i++)
if (!GistTupleIsInvalid(itup[i - 1]))
v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
v->spl_leftvalid = true;
gsvp.equiv = NULL;
gsvp.attr = v->spl_lattr;
gsvp.len = v->splitVector.spl_nleft;
@@ -418,52 +470,58 @@ gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, i
/*
* trys to split page by attno key, in a case of null
* values move its to separate page.
* values move its to separate page.
*/
void
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
GistSplitVector *v, GistEntryVector *entryvec, int attno) {
int i;
static OffsetNumber offNullTuples[ MaxOffsetNumber ];
int nOffNullTuples = 0;
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
GistSplitVector *v, GistEntryVector *entryvec, int attno)
{
int i;
static OffsetNumber offNullTuples[MaxOffsetNumber];
int nOffNullTuples = 0;
for (i = 1; i <= len; i++) {
Datum datum;
bool IsNull;
for (i = 1; i <= len; i++)
{
Datum datum;
bool IsNull;
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1])) {
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1]))
{
gistSplitByInvalid(giststate, v, itup, len);
return;
}
datum = index_getattr(itup[i - 1], attno+1, giststate->tupdesc, &IsNull);
datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc, &IsNull);
gistdentryinit(giststate, attno, &(entryvec->vector[i]),
datum, r, page, i,
FALSE, IsNull);
if ( IsNull )
offNullTuples[ nOffNullTuples++ ] = i;
if (IsNull)
offNullTuples[nOffNullTuples++] = i;
}
v->spl_leftvalid = v->spl_rightvalid = true;
if ( nOffNullTuples == len ) {
/*
if (nOffNullTuples == len)
{
/*
* Corner case: All keys in attno column are null, we should try to
* split by keys in next column. It all keys in all columns
* are NULL just split page half by half
* split by keys in next column. It all keys in all columns are NULL
* just split page half by half
*/
v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
if ( attno+1 == r->rd_att->natts )
gistSplitHalf( &v->splitVector, len );
else
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
} else if ( nOffNullTuples > 0 ) {
int j=0;
/*
* We don't want to mix NULLs and not-NULLs keys
* on one page, so move nulls to right page
if (attno + 1 == r->rd_att->natts)
gistSplitHalf(&v->splitVector, len);
else
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
}
else if (nOffNullTuples > 0)
{
int j = 0;
/*
* We don't want to mix NULLs and not-NULLs keys on one page, so move
* nulls to right page
*/
v->splitVector.spl_right = offNullTuples;
v->splitVector.spl_nright = nOffNullTuples;
@@ -471,61 +529,71 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->splitVector.spl_nleft = 0;
for(i = 1; i <= len; i++)
if ( j<v->splitVector.spl_nright && offNullTuples[j] == i )
for (i = 1; i <= len; i++)
if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
j++;
else
v->splitVector.spl_left[ v->splitVector.spl_nleft++ ] = i;
v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
v->spl_equiv = NULL;
gistunionsubkey(giststate, itup, v, attno);
} else {
}
else
{
/*
* all keys are not-null
*/
entryvec->n = len+1;
entryvec->n = len + 1;
if ( gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno+1 != r->rd_att->natts ) {
if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno + 1 != r->rd_att->natts)
{
/*
* Splitting on attno column is not optimized: there is a tuples which can be freely
* left or right page, we will try to split page by
* following columns
* Splitting on attno column is not optimized: there is a tuples
* which can be freely left or right page, we will try to split
* page by following columns
*/
if ( v->spl_equiv == NULL ) {
/* simple case: left and right keys for attno column are equial */
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
} else {
if (v->spl_equiv == NULL)
{
/*
* simple case: left and right keys for attno column are
* equial
*/
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
}
else
{
/* we should clean up vector from already distributed tuples */
IndexTuple *newitup = (IndexTuple*)palloc((len + 1) * sizeof(IndexTuple));
OffsetNumber *map = (OffsetNumber*)palloc((len + 1) * sizeof(IndexTuple));
int newlen = 0;
IndexTuple *newitup = (IndexTuple *) palloc((len + 1) * sizeof(IndexTuple));
OffsetNumber *map = (OffsetNumber *) palloc((len + 1) * sizeof(IndexTuple));
int newlen = 0;
GIST_SPLITVEC backupSplit = v->splitVector;
for(i=0; i<len; i++)
if ( v->spl_equiv[i+1] ) {
map[ newlen ] = i+1;
newitup[ newlen++ ] = itup[i];
for (i = 0; i < len; i++)
if (v->spl_equiv[i + 1])
{
map[newlen] = i + 1;
newitup[newlen++] = itup[i];
}
Assert( newlen>0 );
Assert(newlen > 0);
backupSplit.spl_left = (OffsetNumber*)palloc(sizeof(OffsetNumber)*len);
memcpy( backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber)*v->splitVector.spl_nleft);
backupSplit.spl_right = (OffsetNumber*)palloc(sizeof(OffsetNumber)*len);
memcpy( backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber)*v->splitVector.spl_nright);
backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno+1);
gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno + 1);
/* merge result of subsplit */
for(i=0;i<v->splitVector.spl_nleft;i++)
backupSplit.spl_left[ backupSplit.spl_nleft++ ] = map[ v->splitVector.spl_left[i]-1 ];
for(i=0;i<v->splitVector.spl_nright;i++)
backupSplit.spl_right[ backupSplit.spl_nright++ ] = map[ v->splitVector.spl_right[i]-1 ];
for (i = 0; i < v->splitVector.spl_nleft; i++)
backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
for (i = 0; i < v->splitVector.spl_nright; i++)
backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
v->splitVector = backupSplit;
/* reunion left and right datums */
gistunionsubkey(giststate, itup, v, attno);
}
}
}
}
}

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.19 2006/07/14 14:52:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.20 2006/10/04 00:29:48 momjian Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
@@ -22,8 +22,8 @@
* static *S used for temrorary storage (saves stack and palloc() call)
*/
static Datum attrS[INDEX_MAX_KEYS];
static bool isnullS[INDEX_MAX_KEYS];
static Datum attrS[INDEX_MAX_KEYS];
static bool isnullS[INDEX_MAX_KEYS];
/*
* Write itup vector to page, has no control of free space
@@ -57,14 +57,17 @@ gistfillbuffer(Relation r, Page page, IndexTuple *itup,
bool
gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
{
unsigned int size = freespace, deleted = 0;
unsigned int size = freespace,
deleted = 0;
int i;
for (i = 0; i < len; i++)
size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
if ( todelete != InvalidOffsetNumber ) {
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
if (todelete != InvalidOffsetNumber)
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
}
@@ -72,11 +75,12 @@ gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size f
}
bool
gistfitpage(IndexTuple *itvec, int len) {
int i;
Size size=0;
gistfitpage(IndexTuple *itvec, int len)
{
int i;
Size size = 0;
for(i=0;i<len;i++)
for (i = 0; i < len; i++)
size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
/* TODO: Consider fillfactor */
@@ -119,56 +123,64 @@ gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
*/
IndexTupleData *
gistfillitupvec(IndexTuple *vec, int veclen, int *memlen) {
char *ptr, *ret;
int i;
gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
{
char *ptr,
*ret;
int i;
*memlen = 0;
*memlen=0;
for (i = 0; i < veclen; i++)
*memlen += IndexTupleSize(vec[i]);
ptr = ret = palloc(*memlen);
for (i = 0; i < veclen; i++) {
for (i = 0; i < veclen; i++)
{
memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
ptr += IndexTupleSize(vec[i]);
}
return (IndexTupleData*)ret;
return (IndexTupleData *) ret;
}
/*
* Make unions of keys in IndexTuple vector, return FALSE if itvec contains
* Make unions of keys in IndexTuple vector, return FALSE if itvec contains
* invalid tuple. Resulting Datums aren't compressed.
*/
bool
gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
Datum *attr, bool *isnull ) {
bool
gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
Datum *attr, bool *isnull)
{
int i;
GistEntryVector *evec;
int attrsize;
int attrsize;
evec = (GistEntryVector *) palloc( ( len + 2 ) * sizeof(GISTENTRY) + GEVHDRSZ);
evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
for (i = startkey; i < giststate->tupdesc->natts; i++) {
int j;
for (i = startkey; i < giststate->tupdesc->natts; i++)
{
int j;
evec->n = 0;
if ( !isnull[i] ) {
gistentryinit( evec->vector[evec->n], attr[i],
NULL, NULL, (OffsetNumber) 0,
FALSE);
if (!isnull[i])
{
gistentryinit(evec->vector[evec->n], attr[i],
NULL, NULL, (OffsetNumber) 0,
FALSE);
evec->n++;
}
for (j = 0; j < len; j++) {
Datum datum;
bool IsNull;
for (j = 0; j < len; j++)
{
Datum datum;
bool IsNull;
if (GistTupleIsInvalid(itvec[j]))
return FALSE; /* signals that union with invalid tuple => result is invalid */
if (GistTupleIsInvalid(itvec[j]))
return FALSE; /* signals that union with invalid tuple =>
* result is invalid */
datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
if (IsNull)
@@ -183,19 +195,23 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke
}
/* If this tuple vector was all NULLs, the union is NULL */
if ( evec->n == 0 ) {
if (evec->n == 0)
{
attr[i] = (Datum) 0;
isnull[i] = TRUE;
} else {
if (evec->n == 1) {
}
else
{
if (evec->n == 1)
{
evec->n = 2;
evec->vector[1] = evec->vector[0];
}
}
/* Make union and store in attr array */
attr[i] = FunctionCall2(&giststate->unionFn[i],
PointerGetDatum(evec),
PointerGetDatum(&attrsize));
PointerGetDatum(evec),
PointerGetDatum(&attrsize));
isnull[i] = FALSE;
}
@@ -213,57 +229,67 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
{
memset(isnullS, TRUE, sizeof(bool) * giststate->tupdesc->natts);
if ( !gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS ) )
return gist_form_invalid_tuple(InvalidBlockNumber);
if (!gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS))
return gist_form_invalid_tuple(InvalidBlockNumber);
return gistFormTuple(giststate, r, attrS, isnullS, false);
return gistFormTuple(giststate, r, attrS, isnullS, false);
}
/*
/*
* makes union of two key
*/
void
gistMakeUnionKey( GISTSTATE *giststate, int attno,
GISTENTRY *entry1, bool isnull1,
GISTENTRY *entry2, bool isnull2,
Datum *dst, bool *dstisnull ) {
gistMakeUnionKey(GISTSTATE *giststate, int attno,
GISTENTRY *entry1, bool isnull1,
GISTENTRY *entry2, bool isnull2,
Datum *dst, bool *dstisnull)
{
int dstsize;
int dstsize;
static char storage[ 2 * sizeof(GISTENTRY) + GEVHDRSZ ];
GistEntryVector *evec = (GistEntryVector*)storage;
static char storage[2 * sizeof(GISTENTRY) + GEVHDRSZ];
GistEntryVector *evec = (GistEntryVector *) storage;
evec->n = 2;
if ( isnull1 && isnull2 ) {
if (isnull1 && isnull2)
{
*dstisnull = TRUE;
*dst = (Datum)0;
} else {
if ( isnull1 == FALSE && isnull2 == FALSE ) {
*dst = (Datum) 0;
}
else
{
if (isnull1 == FALSE && isnull2 == FALSE)
{
evec->vector[0] = *entry1;
evec->vector[1] = *entry2;
} else if ( isnull1 == FALSE ) {
}
else if (isnull1 == FALSE)
{
evec->vector[0] = *entry1;
evec->vector[1] = *entry1;
} else {
}
else
{
evec->vector[0] = *entry2;
evec->vector[1] = *entry2;
}
*dstisnull = FALSE;
*dst = FunctionCall2(&giststate->unionFn[attno],
PointerGetDatum(evec),
PointerGetDatum(&dstsize));
PointerGetDatum(evec),
PointerGetDatum(&dstsize));
}
}
bool
gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b) {
bool result;
gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
{
bool result;
FunctionCall3(&giststate->equalFn[attno],
a, b,
PointerGetDatum(&result));
a, b,
PointerGetDatum(&result));
return result;
}
@@ -309,22 +335,24 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
gistDeCompressAtt(giststate, r, addtup, NULL,
(OffsetNumber) 0, addentries, addisnull);
for(i = 0; i < r->rd_att->natts; i++) {
gistMakeUnionKey( giststate, i,
oldentries + i, oldisnull[i],
addentries + i, addisnull[i],
attrS + i, isnullS + i );
for (i = 0; i < r->rd_att->natts; i++)
{
gistMakeUnionKey(giststate, i,
oldentries + i, oldisnull[i],
addentries + i, addisnull[i],
attrS + i, isnullS + i);
if ( neednew )
if (neednew)
/* we already need new key, so we can skip check */
continue;
if ( isnullS[i] )
if (isnullS[i])
/* union of key may be NULL if and only if both keys are NULL */
continue;
if ( !addisnull[i] ) {
if ( oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i])==false )
if (!addisnull[i])
{
if (oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i]) == false)
neednew = true;
}
}
@@ -363,8 +391,8 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
it, NULL, (OffsetNumber) 0,
identry, isnull);
Assert( maxoff >= FirstOffsetNumber );
Assert( !GistPageIsLeaf(p) );
Assert(maxoff >= FirstOffsetNumber);
Assert(!GistPageIsLeaf(p));
for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
{
@@ -484,7 +512,7 @@ gistFormTuple(GISTSTATE *giststate, Relation r,
{
gistcentryinit(giststate, i, &centry[i], attdata[i],
r, NULL, (OffsetNumber) 0,
newValues,
newValues,
FALSE);
compatt[i] = centry[i].key;
}
@@ -500,18 +528,19 @@ gistpenalty(GISTSTATE *giststate, int attno,
GISTENTRY *orig, bool isNullOrig,
GISTENTRY *add, bool isNullAdd)
{
float penalty = 0.0;
float penalty = 0.0;
if ( giststate->penaltyFn[attno].fn_strict==FALSE || ( isNullOrig == FALSE && isNullAdd == FALSE ) )
if (giststate->penaltyFn[attno].fn_strict == FALSE || (isNullOrig == FALSE && isNullAdd == FALSE))
FunctionCall3(&giststate->penaltyFn[attno],
PointerGetDatum(orig),
PointerGetDatum(add),
PointerGetDatum(&penalty));
else if ( isNullOrig && isNullAdd )
else if (isNullOrig && isNullAdd)
penalty = 0.0;
else
penalty = 1e10; /* try to prevent to mix null and non-null value */
penalty = 1e10; /* try to prevent to mix null and non-null
* value */
return penalty;
}

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.27 2006/09/21 20:31:21 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.28 2006/10/04 00:29:48 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -45,19 +45,24 @@ typedef struct
} ArrayTuple;
/*
* Make union of keys on page
* Make union of keys on page
*/
static IndexTuple
PageMakeUnionKey(GistVacuum *gv, Buffer buffer) {
Page page = BufferGetPage( buffer );
PageMakeUnionKey(GistVacuum *gv, Buffer buffer)
{
Page page = BufferGetPage(buffer);
IndexTuple *vec,
tmp, res;
tmp,
res;
int veclen = 0;
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
vec = gistextractpage(page, &veclen);
/* we call gistunion() in temprorary context because user-defined functions called in gistunion()
may do not free all memory */
/*
* we call gistunion() in temprorary context because user-defined
* functions called in gistunion() may do not free all memory
*/
tmp = gistunion(gv->index, vec, veclen, &(gv->giststate));
MemoryContextSwitchTo(oldCtx);
@@ -73,21 +78,25 @@ PageMakeUnionKey(GistVacuum *gv, Buffer buffer) {
}
static void
gistDeleteSubtree( GistVacuum *gv, BlockNumber blkno ) {
Buffer buffer;
Page page;
gistDeleteSubtree(GistVacuum *gv, BlockNumber blkno)
{
Buffer buffer;
Page page;
buffer = ReadBuffer(gv->index, blkno);
LockBuffer(buffer, GIST_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
if ( !GistPageIsLeaf(page) ) {
int i;
if (!GistPageIsLeaf(page))
{
int i;
for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i = OffsetNumberNext(i)) {
ItemId iid = PageGetItemId(page, i);
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
gistDeleteSubtree(gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)));
for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i = OffsetNumberNext(i))
{
ItemId iid = PageGetItemId(page, i);
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
gistDeleteSubtree(gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)));
}
}
@@ -103,7 +112,7 @@ gistDeleteSubtree( GistVacuum *gv, BlockNumber blkno ) {
{
XLogRecData rdata[2];
XLogRecPtr recptr;
gistxlogPageDelete xlrec;
gistxlogPageDelete xlrec;
xlrec.node = gv->index->rd_node;
xlrec.blkno = blkno;
@@ -125,31 +134,34 @@ gistDeleteSubtree( GistVacuum *gv, BlockNumber blkno ) {
}
else
PageSetLSN(page, XLogRecPtrForTemp);
END_CRIT_SECTION();
UnlockReleaseBuffer(buffer);
}
static Page
GistPageGetCopyPage( Page page ) {
Size pageSize = PageGetPageSize( page );
Page tmppage;
static Page
GistPageGetCopyPage(Page page)
{
Size pageSize = PageGetPageSize(page);
Page tmppage;
tmppage=(Page)palloc( pageSize );
memcpy( tmppage, page, pageSize );
tmppage = (Page) palloc(pageSize);
memcpy(tmppage, page, pageSize);
return tmppage;
}
static ArrayTuple
vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon, int curlenaddon) {
vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon, int curlenaddon)
{
ArrayTuple res = {NULL, 0, false};
IndexTuple *vec;
SplitedPageLayout *dist = NULL,
*ptr;
int i, veclen=0;
BlockNumber blkno = BufferGetBlockNumber(buffer);
*ptr;
int i,
veclen = 0;
BlockNumber blkno = BufferGetBlockNumber(buffer);
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
vec = gistextractpage(tempPage, &veclen);
@@ -158,67 +170,73 @@ vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon,
MemoryContextSwitchTo(oldCtx);
if (blkno != GIST_ROOT_BLKNO) {
if (blkno != GIST_ROOT_BLKNO)
{
/* if non-root split then we should not allocate new buffer */
dist->buffer = buffer;
dist->page = tempPage;
/* during vacuum we never split leaf page */
GistPageGetOpaque(dist->page)->flags = 0;
} else
}
else
pfree(tempPage);
res.itup = (IndexTuple *) palloc(sizeof(IndexTuple) * veclen);
res.ituplen = 0;
/* make new pages and fills them */
for (ptr = dist; ptr; ptr = ptr->next) {
char *data;
for (ptr = dist; ptr; ptr = ptr->next)
{
char *data;
if ( ptr->buffer == InvalidBuffer ) {
ptr->buffer = gistNewBuffer( gv->index );
GISTInitBuffer( ptr->buffer, 0 );
if (ptr->buffer == InvalidBuffer)
{
ptr->buffer = gistNewBuffer(gv->index);
GISTInitBuffer(ptr->buffer, 0);
ptr->page = BufferGetPage(ptr->buffer);
}
ptr->block.blkno = BufferGetBlockNumber( ptr->buffer );
ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
data = (char*)(ptr->list);
for(i=0;i<ptr->block.num;i++) {
if ( PageAddItem(ptr->page, (Item)data, IndexTupleSize((IndexTuple)data), i+FirstOffsetNumber, LP_USED) == InvalidOffsetNumber )
data = (char *) (ptr->list);
for (i = 0; i < ptr->block.num; i++)
{
if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, LP_USED) == InvalidOffsetNumber)
elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gv->index));
data += IndexTupleSize((IndexTuple)data);
data += IndexTupleSize((IndexTuple) data);
}
ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
res.itup[ res.ituplen ] = (IndexTuple)palloc(IndexTupleSize(ptr->itup));
memcpy( res.itup[ res.ituplen ], ptr->itup, IndexTupleSize(ptr->itup) );
res.itup[res.ituplen] = (IndexTuple) palloc(IndexTupleSize(ptr->itup));
memcpy(res.itup[res.ituplen], ptr->itup, IndexTupleSize(ptr->itup));
res.ituplen++;
}
START_CRIT_SECTION();
for (ptr = dist; ptr; ptr = ptr->next) {
for (ptr = dist; ptr; ptr = ptr->next)
{
MarkBufferDirty(ptr->buffer);
GistPageGetOpaque(ptr->page)->rightlink = InvalidBlockNumber;
}
/* restore splitted non-root page */
if (blkno != GIST_ROOT_BLKNO) {
PageRestoreTempPage( dist->page, BufferGetPage( dist->buffer ) );
dist->page = BufferGetPage( dist->buffer );
if (blkno != GIST_ROOT_BLKNO)
{
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
dist->page = BufferGetPage(dist->buffer);
}
if (!gv->index->rd_istemp)
{
XLogRecPtr recptr;
XLogRecData *rdata;
ItemPointerData key; /* set key for incomplete
* insert */
ItemPointerData key; /* set key for incomplete insert */
char *xlinfo;
ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
rdata = formSplitRdata(gv->index->rd_node, blkno,
false, &key, dist);
false, &key, dist);
xlinfo = rdata->data;
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
@@ -241,13 +259,12 @@ vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon,
{
/* we must keep the buffer pin on the head page */
if (BufferGetBlockNumber(ptr->buffer) != blkno)
UnlockReleaseBuffer( ptr->buffer );
UnlockReleaseBuffer(ptr->buffer);
}
if (blkno == GIST_ROOT_BLKNO)
{
ItemPointerData key; /* set key for incomplete
* insert */
ItemPointerData key; /* set key for incomplete insert */
ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
@@ -266,7 +283,8 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
{
ArrayTuple res = {NULL, 0, false};
Buffer buffer;
Page page, tempPage = NULL;
Page page,
tempPage = NULL;
OffsetNumber i,
maxoff;
ItemId iid;
@@ -278,7 +296,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
*addon = NULL;
bool needwrite = false;
OffsetNumber offToDelete[MaxOffsetNumber];
BlockNumber blkToDelete[MaxOffsetNumber];
BlockNumber blkToDelete[MaxOffsetNumber];
ItemPointerData *completed = NULL;
int ncompleted = 0,
lencompleted = 16;
@@ -322,7 +340,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
if (chldtuple.ituplen || chldtuple.emptypage)
{
/* update tuple or/and inserts new */
if ( chldtuple.emptypage )
if (chldtuple.emptypage)
blkToDelete[nBlkToDelete++] = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
offToDelete[nOffToDelete++] = i;
PageIndexTupleDelete(tempPage, i);
@@ -333,7 +351,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
if (chldtuple.ituplen)
{
Assert( chldtuple.emptypage == false );
Assert(chldtuple.emptypage == false);
while (curlenaddon + chldtuple.ituplen >= lenaddon)
{
lenaddon *= 2;
@@ -367,56 +385,63 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
}
}
}
Assert( maxoff == PageGetMaxOffsetNumber(tempPage) );
Assert(maxoff == PageGetMaxOffsetNumber(tempPage));
if (curlenaddon)
{
/* insert updated tuples */
if (gistnospace(tempPage, addon, curlenaddon, InvalidOffsetNumber, 0)) {
if (gistnospace(tempPage, addon, curlenaddon, InvalidOffsetNumber, 0))
{
/* there is no space on page to insert tuples */
res = vacuumSplitPage(gv, tempPage, buffer, addon, curlenaddon);
tempPage=NULL; /* vacuumSplitPage() free tempPage */
needwrite = needunion = false; /* gistSplit already forms unions and writes pages */
} else
tempPage = NULL; /* vacuumSplitPage() free tempPage */
needwrite = needunion = false; /* gistSplit already forms
* unions and writes pages */
}
else
/* enough free space */
gistfillbuffer(gv->index, tempPage, addon, curlenaddon, InvalidOffsetNumber);
}
}
/*
* If page is empty, we should remove pointer to it before
* deleting page (except root)
/*
* If page is empty, we should remove pointer to it before deleting page
* (except root)
*/
if ( blkno != GIST_ROOT_BLKNO && ( PageIsEmpty(page) || (tempPage && PageIsEmpty(tempPage)) ) ) {
if (blkno != GIST_ROOT_BLKNO && (PageIsEmpty(page) || (tempPage && PageIsEmpty(tempPage))))
{
/*
* New version of page is empty, so leave it unchanged,
* upper call will mark our page as deleted.
* In case of page split we never will be here...
* New version of page is empty, so leave it unchanged, upper call
* will mark our page as deleted. In case of page split we never will
* be here...
*
* If page was empty it can't become non-empty during processing
* If page was empty it can't become non-empty during processing
*/
res.emptypage = true;
UnlockReleaseBuffer(buffer);
} else {
}
else
{
/* write page and remove its childs if it need */
START_CRIT_SECTION();
if ( tempPage && needwrite ) {
if (tempPage && needwrite)
{
PageRestoreTempPage(tempPage, page);
tempPage = NULL;
}
/* Empty index */
if (PageIsEmpty(page) && blkno == GIST_ROOT_BLKNO )
/* Empty index */
if (PageIsEmpty(page) && blkno == GIST_ROOT_BLKNO)
{
needwrite = true;
GistPageSetLeaf(page);
}
if (needwrite)
{
MarkBufferDirty(buffer);
@@ -446,7 +471,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
END_CRIT_SECTION();
if ( needunion && !PageIsEmpty(page) )
if (needunion && !PageIsEmpty(page))
{
res.itup = (IndexTuple *) palloc(sizeof(IndexTuple));
res.ituplen = 1;
@@ -456,7 +481,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
UnlockReleaseBuffer(buffer);
/* delete empty children, now we havn't any links to pointed subtrees */
for(i=0;i<nBlkToDelete;i++)
for (i = 0; i < nBlkToDelete; i++)
gistDeleteSubtree(gv, blkToDelete[i]);
if (ncompleted && !gv->index->rd_istemp)
@@ -506,9 +531,10 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
/* use heap's tuple count */
Assert(info->num_heap_tuples >= 0);
stats->std.num_index_tuples = info->num_heap_tuples;
/*
* XXX the above is wrong if index is partial. Would it be OK to
* just return NULL, or is there work we must do below?
* XXX the above is wrong if index is partial. Would it be OK to just
* return NULL, or is there work we must do below?
*/
}
@@ -545,8 +571,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
RelationGetRelationName(rel))));
/*
* If vacuum full, we already have exclusive lock on the index.
* Otherwise, need lock unless it's local to this backend.
* If vacuum full, we already have exclusive lock on the index. Otherwise,
* need lock unless it's local to this backend.
*/
if (info->vacuum_full)
needLock = false;
@@ -725,7 +751,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
if (callback(&(idxtuple->t_tid), callback_state))
{
todelete[ntodelete] = i-ntodelete;
todelete[ntodelete] = i - ntodelete;
ntodelete++;
stats->std.tuples_removed += 1;
}
@@ -739,7 +765,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
MarkBufferDirty(buffer);
for(i=0;i<ntodelete;i++)
for (i = 0; i < ntodelete; i++)
PageIndexTupleDelete(page, todelete[i]);
GistMarkTuplesDeleted(page);
@@ -750,7 +776,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
gistxlogPageUpdate *xlinfo;
rdata = formUpdateRdata(rel->rd_node, buffer,
todelete, ntodelete,
todelete, ntodelete,
NULL, 0,
NULL);
xlinfo = (gistxlogPageUpdate *) rdata->next->data;

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.23 2006/08/07 16:57:56 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.24 2006/10/04 00:29:48 momjian Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
@@ -55,11 +55,11 @@ typedef struct gistIncompleteInsert
static MemoryContext opCtx; /* working memory for operations */
static MemoryContext insertCtx; /* holds incomplete_inserts list */
static MemoryContext insertCtx; /* holds incomplete_inserts list */
static List *incomplete_inserts;
#define ItemPointerEQ(a, b) \
#define ItemPointerEQ(a, b) \
( ItemPointerGetOffsetNumber(a) == ItemPointerGetOffsetNumber(b) && \
ItemPointerGetBlockNumber (a) == ItemPointerGetBlockNumber(b) )
@@ -72,8 +72,9 @@ pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
MemoryContext oldCxt;
gistIncompleteInsert *ninsert;
if ( !ItemPointerIsValid(&key) )
/*
if (!ItemPointerIsValid(&key))
/*
* if key is null then we should not store insertion as incomplete,
* because it's a vacuum operation..
*/
@@ -108,8 +109,8 @@ pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
/*
* Stick the new incomplete insert onto the front of the list, not the
* back. This is so that gist_xlog_cleanup will process incompletions
* in last-in-first-out order.
* back. This is so that gist_xlog_cleanup will process incompletions in
* last-in-first-out order.
*/
incomplete_inserts = lcons(ninsert, incomplete_inserts);
@@ -121,10 +122,10 @@ forgetIncompleteInsert(RelFileNode node, ItemPointerData key)
{
ListCell *l;
if ( !ItemPointerIsValid(&key) )
if (!ItemPointerIsValid(&key))
return;
if (incomplete_inserts==NIL)
if (incomplete_inserts == NIL)
return;
foreach(l, incomplete_inserts)
@@ -241,9 +242,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot)
if (GistPageIsLeaf(page) && xlrec.len == 0 && xlrec.data->ntodelete == 0)
GistClearTuplesDeleted(page);
if ( !GistPageIsLeaf(page) && PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && xldata->blkno == GIST_ROOT_BLKNO )
/* all links on non-leaf root page was deleted by vacuum full,
so root page becomes a leaf */
if (!GistPageIsLeaf(page) && PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && xldata->blkno == GIST_ROOT_BLKNO)
/*
* all links on non-leaf root page was deleted by vacuum full, so root
* page becomes a leaf
*/
GistPageSetLeaf(page);
GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
@@ -432,11 +436,11 @@ static void
out_target(StringInfo buf, RelFileNode node, ItemPointerData key)
{
appendStringInfo(buf, "rel %u/%u/%u",
node.spcNode, node.dbNode, node.relNode);
if ( ItemPointerIsValid( &key ) )
node.spcNode, node.dbNode, node.relNode);
if (ItemPointerIsValid(&key))
appendStringInfo(buf, "; tid %u/%u",
ItemPointerGetBlockNumber(&key),
ItemPointerGetOffsetNumber(&key));
ItemPointerGetBlockNumber(&key),
ItemPointerGetOffsetNumber(&key));
}
static void
@@ -450,8 +454,8 @@ static void
out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec)
{
appendStringInfo(buf, "page_delete: rel %u/%u/%u; blkno %u",
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
xlrec->blkno);
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
xlrec->blkno);
}
static void
@@ -460,7 +464,7 @@ out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
appendStringInfo(buf, "page_split: ");
out_target(buf, xlrec->node, xlrec->key);
appendStringInfo(buf, "; block number %u splits to %d pages",
xlrec->origblkno, xlrec->npage);
xlrec->origblkno, xlrec->npage);
}
void
@@ -486,15 +490,15 @@ gist_desc(StringInfo buf, uint8 xl_info, char *rec)
break;
case XLOG_GIST_CREATE_INDEX:
appendStringInfo(buf, "create_index: rel %u/%u/%u",
((RelFileNode *) rec)->spcNode,
((RelFileNode *) rec)->dbNode,
((RelFileNode *) rec)->relNode);
((RelFileNode *) rec)->spcNode,
((RelFileNode *) rec)->dbNode,
((RelFileNode *) rec)->relNode);
break;
case XLOG_GIST_INSERT_COMPLETE:
appendStringInfo(buf, "complete_insert: rel %u/%u/%u",
((gistxlogInsertComplete *) rec)->node.spcNode,
((gistxlogInsertComplete *) rec)->node.dbNode,
((gistxlogInsertComplete *) rec)->node.relNode);
((gistxlogInsertComplete *) rec)->node.spcNode,
((gistxlogInsertComplete *) rec)->node.dbNode,
((gistxlogInsertComplete *) rec)->node.relNode);
break;
default:
appendStringInfo(buf, "unknown gist op code %u", info);
@@ -547,22 +551,25 @@ gistxlogFindPath(Relation index, gistIncompleteInsert *insert)
elog(ERROR, "lost parent for block %u", insert->origblkno);
}
static SplitedPageLayout*
gistMakePageLayout(Buffer *buffers, int nbuffers) {
SplitedPageLayout *res=NULL, *resptr;
static SplitedPageLayout *
gistMakePageLayout(Buffer *buffers, int nbuffers)
{
SplitedPageLayout *res = NULL,
*resptr;
while( nbuffers-- > 0 ) {
Page page = BufferGetPage( buffers[ nbuffers ] );
IndexTuple* vec;
int veclen;
while (nbuffers-- > 0)
{
Page page = BufferGetPage(buffers[nbuffers]);
IndexTuple *vec;
int veclen;
resptr = (SplitedPageLayout*)palloc0( sizeof(SplitedPageLayout) );
resptr = (SplitedPageLayout *) palloc0(sizeof(SplitedPageLayout));
resptr->block.blkno = BufferGetBlockNumber( buffers[ nbuffers ] );
resptr->block.num = PageGetMaxOffsetNumber( page );
resptr->block.blkno = BufferGetBlockNumber(buffers[nbuffers]);
resptr->block.num = PageGetMaxOffsetNumber(page);
vec = gistextractpage( page, &veclen );
resptr->list = gistfillitupvec( vec, veclen, &(resptr->lenlist) );
vec = gistextractpage(page, &veclen);
resptr->list = gistfillitupvec(vec, veclen, &(resptr->lenlist));
resptr->next = res;
res = resptr;
@@ -580,7 +587,7 @@ gistMakePageLayout(Buffer *buffers, int nbuffers) {
* Note that we assume the index is now in a valid state, except for the
* unfinished insertion. In particular it's safe to invoke gistFindPath();
* there shouldn't be any garbage pages for it to run into.
*
*
* To complete insert we can't use basic insertion algorithm because
* during insertion we can't call user-defined support functions of opclass.
* So, we insert 'invalid' tuples without real key and do it by separate algorithm.
@@ -607,7 +614,7 @@ gistContinueInsert(gistIncompleteInsert *insert)
itup[i] = gist_form_invalid_tuple(insert->blkno[i]);
/*
* any insertion of itup[] should make LOG message about
* any insertion of itup[] should make LOG message about
*/
if (insert->origblkno == GIST_ROOT_BLKNO)
@@ -626,7 +633,7 @@ gistContinueInsert(gistIncompleteInsert *insert)
Buffer *buffers;
Page *pages;
int numbuffer;
OffsetNumber *todelete;
OffsetNumber *todelete;
/* construct path */
gistxlogFindPath(index, insert);
@@ -642,21 +649,22 @@ gistContinueInsert(gistIncompleteInsert *insert)
int j,
k,
pituplen = 0;
XLogRecData *rdata;
XLogRecPtr recptr;
Buffer tempbuffer = InvalidBuffer;
int ntodelete = 0;
XLogRecData *rdata;
XLogRecPtr recptr;
Buffer tempbuffer = InvalidBuffer;
int ntodelete = 0;
numbuffer = 1;
buffers[0] = ReadBuffer(index, insert->path[i]);
LockBuffer(buffers[0], GIST_EXCLUSIVE);
/*
* we check buffer, because we restored page earlier
*/
gistcheckpage(index, buffers[0]);
pages[0] = BufferGetPage(buffers[0]);
Assert( !GistPageIsLeaf(pages[0]) );
Assert(!GistPageIsLeaf(pages[0]));
pituplen = PageGetMaxOffsetNumber(pages[0]);
@@ -678,12 +686,12 @@ gistContinueInsert(gistIncompleteInsert *insert)
}
}
if ( ntodelete == 0 )
elog(PANIC,"gistContinueInsert: can't find pointer to page(s)");
if (ntodelete == 0)
elog(PANIC, "gistContinueInsert: can't find pointer to page(s)");
/*
* we check space with subtraction only first tuple to delete, hope,
* that wiil be enough space....
* we check space with subtraction only first tuple to delete,
* hope, that wiil be enough space....
*/
if (gistnospace(pages[0], itup, lenitup, *todelete, 0))
@@ -699,7 +707,7 @@ gistContinueInsert(gistIncompleteInsert *insert)
if (BufferGetBlockNumber(buffers[0]) == GIST_ROOT_BLKNO)
{
Buffer tmp;
Buffer tmp;
/*
* we split root, just copy content from root to new page
@@ -713,44 +721,48 @@ gistContinueInsert(gistIncompleteInsert *insert)
/* fill new page, root will be changed later */
tempbuffer = ReadBuffer(index, P_NEW);
LockBuffer(tempbuffer, GIST_EXCLUSIVE);
memcpy( BufferGetPage(tempbuffer), pages[0], BufferGetPageSize(tempbuffer) );
memcpy(BufferGetPage(tempbuffer), pages[0], BufferGetPageSize(tempbuffer));
/* swap buffers[0] (was root) and temp buffer */
tmp = buffers[0];
buffers[0] = tempbuffer;
tempbuffer = tmp; /* now in tempbuffer GIST_ROOT_BLKNO, it is still unchanged */
tempbuffer = tmp; /* now in tempbuffer GIST_ROOT_BLKNO,
* it is still unchanged */
pages[0] = BufferGetPage(buffers[0]);
}
START_CRIT_SECTION();
for(j=0;j<ntodelete;j++)
for (j = 0; j < ntodelete; j++)
PageIndexTupleDelete(pages[0], todelete[j]);
rdata = formSplitRdata(index->rd_node, insert->path[i],
false, &(insert->key),
gistMakePageLayout( buffers, numbuffer ) );
false, &(insert->key),
gistMakePageLayout(buffers, numbuffer));
} else {
}
else
{
START_CRIT_SECTION();
for(j=0;j<ntodelete;j++)
for (j = 0; j < ntodelete; j++)
PageIndexTupleDelete(pages[0], todelete[j]);
gistfillbuffer(index, pages[0], itup, lenitup, InvalidOffsetNumber);
rdata = formUpdateRdata(index->rd_node, buffers[0],
todelete, ntodelete,
itup, lenitup, &(insert->key));
rdata = formUpdateRdata(index->rd_node, buffers[0],
todelete, ntodelete,
itup, lenitup, &(insert->key));
}
/*
* use insert->key as mark for completion of insert (form*Rdata() above)
* for following possible replays
/*
* use insert->key as mark for completion of insert (form*Rdata()
* above) for following possible replays
*/
/* write pages, we should mark it dirty befor XLogInsert() */
for (j = 0; j < numbuffer; j++) {
for (j = 0; j < numbuffer; j++)
{
GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber;
MarkBufferDirty(buffers[j]);
}
@@ -764,12 +776,14 @@ gistContinueInsert(gistIncompleteInsert *insert)
END_CRIT_SECTION();
lenitup = numbuffer;
for (j = 0; j < numbuffer; j++) {
for (j = 0; j < numbuffer; j++)
{
itup[j] = gist_form_invalid_tuple(BufferGetBlockNumber(buffers[j]));
UnlockReleaseBuffer(buffers[j]);
}
if ( tempbuffer != InvalidBuffer ) {
if (tempbuffer != InvalidBuffer)
{
/*
* it was a root split, so fill it by new values
*/
@@ -780,9 +794,9 @@ gistContinueInsert(gistIncompleteInsert *insert)
}
ereport(LOG,
(errmsg("index %u/%u/%u needs VACUUM FULL or REINDEX to finish crash recovery",
(errmsg("index %u/%u/%u needs VACUUM FULL or REINDEX to finish crash recovery",
insert->node.spcNode, insert->node.dbNode, insert->node.relNode),
errdetail("Incomplete insertion detected during crash replay.")));
errdetail("Incomplete insertion detected during crash replay.")));
}
void

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.47 2006/03/05 15:58:20 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.48 2006/10/04 00:29:48 momjian Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
@@ -138,9 +138,9 @@ hashtext(PG_FUNCTION_ARGS)
Datum result;
/*
* Note: this is currently identical in behavior to hashvarlena, but
* keep it as a separate function in case we someday want to do something
* different in non-C locales. (See also hashbpchar, if so.)
* Note: this is currently identical in behavior to hashvarlena, but keep
* it as a separate function in case we someday want to do something
* different in non-C locales. (See also hashbpchar, if so.)
*/
result = hash_any((unsigned char *) VARDATA(key),
VARSIZE(key) - VARHDRSZ);

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.59 2006/07/03 22:45:36 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.60 2006/10/04 00:29:48 momjian Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
@@ -224,7 +224,7 @@ _hash_metapinit(Relation rel)
/*
* Determine the target fill factor (in tuples per bucket) for this index.
* The idea is to make the fill factor correspond to pages about as full
* as the user-settable fillfactor parameter says. We can compute it
* as the user-settable fillfactor parameter says. We can compute it
* exactly if the index datatype is fixed-width, but for var-width there's
* some guessing involved.
*/

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.219 2006/08/18 16:09:08 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.220 2006/10/04 00:29:48 momjian Exp $
*
*
* INTERFACE ROUTINES
@@ -133,9 +133,9 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
snapshot = scan->rs_snapshot;
/*
* We must hold share lock on the buffer content while examining
* tuple visibility. Afterwards, however, the tuples we have found
* to be visible are guaranteed good as long as we hold the buffer pin.
* We must hold share lock on the buffer content while examining tuple
* visibility. Afterwards, however, the tuples we have found to be
* visible are guaranteed good as long as we hold the buffer pin.
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
@@ -223,7 +223,7 @@ heapgettup(HeapScanDesc scan,
tuple->t_data = NULL;
return;
}
page = 0; /* first page */
page = 0; /* first page */
heapgetpage(scan, page);
lineoff = FirstOffsetNumber; /* first offnum */
scan->rs_inited = true;
@@ -231,8 +231,8 @@ heapgettup(HeapScanDesc scan,
else
{
/* continue from previously returned page/tuple */
page = scan->rs_cblock; /* current page */
lineoff = /* next offnum */
page = scan->rs_cblock; /* current page */
lineoff = /* next offnum */
OffsetNumberNext(ItemPointerGetOffsetNumber(&(tuple->t_self)));
}
@@ -263,7 +263,7 @@ heapgettup(HeapScanDesc scan,
else
{
/* continue from previously returned page/tuple */
page = scan->rs_cblock; /* current page */
page = scan->rs_cblock; /* current page */
}
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
@@ -273,12 +273,12 @@ heapgettup(HeapScanDesc scan,
if (!scan->rs_inited)
{
lineoff = lines; /* final offnum */
lineoff = lines; /* final offnum */
scan->rs_inited = true;
}
else
{
lineoff = /* previous offnum */
lineoff = /* previous offnum */
OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self)));
}
/* page and lineoff now reference the physically previous tid */
@@ -450,7 +450,7 @@ heapgettup_pagemode(HeapScanDesc scan,
tuple->t_data = NULL;
return;
}
page = 0; /* first page */
page = 0; /* first page */
heapgetpage(scan, page);
lineindex = 0;
scan->rs_inited = true;
@@ -458,7 +458,7 @@ heapgettup_pagemode(HeapScanDesc scan,
else
{
/* continue from previously returned page/tuple */
page = scan->rs_cblock; /* current page */
page = scan->rs_cblock; /* current page */
lineindex = scan->rs_cindex + 1;
}
@@ -487,7 +487,7 @@ heapgettup_pagemode(HeapScanDesc scan,
else
{
/* continue from previously returned page/tuple */
page = scan->rs_cblock; /* current page */
page = scan->rs_cblock; /* current page */
}
dp = (Page) BufferGetPage(scan->rs_cbuf);
@@ -721,8 +721,8 @@ try_relation_open(Oid relationId, LOCKMODE lockmode)
LockRelationOid(relationId, lockmode);
/*
* Now that we have the lock, probe to see if the relation really
* exists or not.
* Now that we have the lock, probe to see if the relation really exists
* or not.
*/
if (!SearchSysCacheExists(RELOID,
ObjectIdGetDatum(relationId),
@@ -764,7 +764,7 @@ relation_open_nowait(Oid relationId, LOCKMODE lockmode)
if (!ConditionalLockRelationOid(relationId, lockmode))
{
/* try to throw error by name; relation could be deleted... */
char *relname = get_rel_name(relationId);
char *relname = get_rel_name(relationId);
if (relname)
ereport(ERROR,
@@ -774,8 +774,8 @@ relation_open_nowait(Oid relationId, LOCKMODE lockmode)
else
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on relation with OID %u",
relationId)));
errmsg("could not obtain lock on relation with OID %u",
relationId)));
}
}
@@ -801,8 +801,8 @@ relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
/*
* Check for shared-cache-inval messages before trying to open the
* relation. This is needed to cover the case where the name identifies
* a rel that has been dropped and recreated since the start of our
* relation. This is needed to cover the case where the name identifies a
* rel that has been dropped and recreated since the start of our
* transaction: if we don't flush the old syscache entry then we'll latch
* onto that entry and suffer an error when we do RelationIdGetRelation.
* Note that relation_open does not need to do this, since a relation's
@@ -2723,7 +2723,7 @@ l3:
* heap_inplace_update - update a tuple "in place" (ie, overwrite it)
*
* Overwriting violates both MVCC and transactional safety, so the uses
* of this function in Postgres are extremely limited. Nonetheless we
* of this function in Postgres are extremely limited. Nonetheless we
* find some places to use it.
*
* The tuple cannot change size, and therefore it's reasonable to assume
@@ -2840,6 +2840,7 @@ heap_restrpos(HeapScanDesc scan)
if (!ItemPointerIsValid(&scan->rs_mctid))
{
scan->rs_ctup.t_data = NULL;
/*
* unpin scan buffers
*/
@@ -2852,7 +2853,7 @@ heap_restrpos(HeapScanDesc scan)
else
{
/*
* If we reached end of scan, rs_inited will now be false. We must
* If we reached end of scan, rs_inited will now be false. We must
* reset it to true to keep heapgettup from doing the wrong thing.
*/
scan->rs_inited = true;
@@ -2862,13 +2863,13 @@ heap_restrpos(HeapScanDesc scan)
scan->rs_cindex = scan->rs_mindex;
heapgettup_pagemode(scan,
NoMovementScanDirection,
0, /* needn't recheck scan keys */
0, /* needn't recheck scan keys */
NULL);
}
else
heapgettup(scan,
NoMovementScanDirection,
0, /* needn't recheck scan keys */
0, /* needn't recheck scan keys */
NULL);
}
}
@@ -2920,7 +2921,7 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
}
/*
* Perform XLogInsert for a heap-update operation. Caller must already
* Perform XLogInsert for a heap-update operation. Caller must already
* have modified the buffer(s) and marked them dirty.
*/
static XLogRecPtr
@@ -3173,8 +3174,8 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
if (record->xl_info & XLOG_HEAP_INIT_PAGE)
{
buffer = XLogReadBuffer(reln,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
true);
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
@@ -3183,13 +3184,13 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
else
{
buffer = XLogReadBuffer(reln,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
{
UnlockReleaseBuffer(buffer);
return;
@@ -3308,6 +3309,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move)
/* Set forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
}
/*
* this test is ugly, but necessary to avoid thinking that insert change
* is already applied
@@ -3345,7 +3347,7 @@ newt:;
return;
page = (Page) BufferGetPage(buffer);
if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
{
UnlockReleaseBuffer(buffer);
return;
@@ -3548,9 +3550,9 @@ static void
out_target(StringInfo buf, xl_heaptid *target)
{
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
target->node.spcNode, target->node.dbNode, target->node.relNode,
ItemPointerGetBlockNumber(&(target->tid)),
ItemPointerGetOffsetNumber(&(target->tid)));
target->node.spcNode, target->node.dbNode, target->node.relNode,
ItemPointerGetBlockNumber(&(target->tid)),
ItemPointerGetOffsetNumber(&(target->tid)));
}
void
@@ -3586,8 +3588,8 @@ heap_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "update: ");
out_target(buf, &(xlrec->target));
appendStringInfo(buf, "; new %u/%u",
ItemPointerGetBlockNumber(&(xlrec->newtid)),
ItemPointerGetOffsetNumber(&(xlrec->newtid)));
ItemPointerGetBlockNumber(&(xlrec->newtid)),
ItemPointerGetOffsetNumber(&(xlrec->newtid)));
}
else if (info == XLOG_HEAP_MOVE)
{
@@ -3599,24 +3601,24 @@ heap_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "move: ");
out_target(buf, &(xlrec->target));
appendStringInfo(buf, "; new %u/%u",
ItemPointerGetBlockNumber(&(xlrec->newtid)),
ItemPointerGetOffsetNumber(&(xlrec->newtid)));
ItemPointerGetBlockNumber(&(xlrec->newtid)),
ItemPointerGetOffsetNumber(&(xlrec->newtid)));
}
else if (info == XLOG_HEAP_CLEAN)
{
xl_heap_clean *xlrec = (xl_heap_clean *) rec;
appendStringInfo(buf, "clean: rel %u/%u/%u; blk %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block);
}
else if (info == XLOG_HEAP_NEWPAGE)
{
xl_heap_newpage *xlrec = (xl_heap_newpage *) rec;
appendStringInfo(buf, "newpage: rel %u/%u/%u; blk %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->blkno);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->blkno);
}
else if (info == XLOG_HEAP_LOCK)
{

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.64 2006/09/10 23:33:22 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.65 2006/10/04 00:29:48 momjian Exp $
*
*
* INTERFACE ROUTINES
@@ -1331,7 +1331,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED;
if (length == 0)
return result; /* Can save a lot of work at this point! */
return result; /* Can save a lot of work at this point! */
startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.58 2006/07/31 20:08:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.59 2006/10/04 00:29:48 momjian Exp $
*
* NOTES
* many of the old access method routines have been turned into
@@ -86,7 +86,7 @@ RelationGetIndexScan(Relation indexRelation,
else
scan->keyData = NULL;
scan->is_multiscan = false; /* caller may change this */
scan->is_multiscan = false; /* caller may change this */
scan->kill_prior_tuple = false;
scan->ignore_killed_tuples = true; /* default setting */

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.94 2006/07/31 20:08:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.95 2006/10/04 00:29:48 momjian Exp $
*
* INTERFACE ROUTINES
* index_open - open an index relation by relation OID
@@ -122,7 +122,7 @@ static IndexScanDesc index_beginscan_internal(Relation indexRelation,
* index_open - open an index relation by relation OID
*
* If lockmode is not "NoLock", the specified kind of lock is
* obtained on the index. (Generally, NoLock should only be
* obtained on the index. (Generally, NoLock should only be
* used if the caller knows it has some appropriate lock on the
* index already.)
*
@@ -209,7 +209,7 @@ index_insert(Relation indexRelation,
* index_getnext on this scan; index_getnext_indexitem will not use the
* heapRelation link (nor the snapshot). However, the caller had better
* be holding some kind of lock on the heap relation in any case, to ensure
* no one deletes it (or the index) out from under us. Caller must also
* no one deletes it (or the index) out from under us. Caller must also
* be holding a lock on the index.
*/
IndexScanDesc
@@ -553,7 +553,7 @@ index_getmulti(IndexScanDesc scan,
*
* callback routine tells whether a given main-heap tuple is
* to be deleted
*
*
* return value is an optional palloc'd struct of statistics
* ----------------
*/

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.143 2006/08/25 04:06:46 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.144 2006/10/04 00:29:48 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -252,7 +252,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
*/
htup.t_self = itup->t_tid;
if (heap_fetch(heapRel, SnapshotSelf, &htup, &hbuffer,
false, NULL))
false, NULL))
{
/* Normal case --- it's still live */
ReleaseBuffer(hbuffer);
@@ -355,7 +355,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
* + updates the metapage if a true root or fast root is split.
*
* On entry, we must have the right buffer in which to do the
* insertion, and the buffer must be pinned and write-locked. On return,
* insertion, and the buffer must be pinned and write-locked. On return,
* we will have dropped both the pin and the lock on the buffer.
*
* If 'afteritem' is >0 then the new tuple must be inserted after the
@@ -608,7 +608,7 @@ _bt_insertonpg(Relation rel,
if (!rel->rd_istemp)
{
xl_btree_insert xlrec;
BlockNumber xldownlink;
BlockNumber xldownlink;
xl_btree_metadata xlmeta;
uint8 xlinfo;
XLogRecPtr recptr;
@@ -888,16 +888,17 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
sopaque = (BTPageOpaque) PageGetSpecialPointer(spage);
if (sopaque->btpo_prev != ropaque->btpo_prev)
elog(PANIC, "right sibling's left-link doesn't match");
/*
* Check to see if we can set the SPLIT_END flag in the right-hand
* split page; this can save some I/O for vacuum since it need not
* proceed to the right sibling. We can set the flag if the right
* sibling has a different cycleid: that means it could not be part
* of a group of pages that were all split off from the same ancestor
* sibling has a different cycleid: that means it could not be part of
* a group of pages that were all split off from the same ancestor
* page. If you're confused, imagine that page A splits to A B and
* then again, yielding A C B, while vacuum is in progress. Tuples
* originally in A could now be in either B or C, hence vacuum must
* examine both pages. But if D, our right sibling, has a different
* examine both pages. But if D, our right sibling, has a different
* cycleid then it could not contain any tuples that were in A when
* the vacuum started.
*/
@@ -911,8 +912,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
*
* NO EREPORT(ERROR) till right sibling is updated. We can get away with
* not starting the critical section till here because we haven't been
* scribbling on the original page yet, and we don't care about the
* new sibling until it's linked into the btree.
* scribbling on the original page yet, and we don't care about the new
* sibling until it's linked into the btree.
*/
START_CRIT_SECTION();
@@ -947,8 +948,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
* Direct access to page is not good but faster - we should implement
* some new func in page API. Note we only store the tuples
* themselves, knowing that the item pointers are in the same order
* and can be reconstructed by scanning the tuples. See comments
* for _bt_restore_page().
* and can be reconstructed by scanning the tuples. See comments for
* _bt_restore_page().
*/
xlrec.leftlen = ((PageHeader) leftpage)->pd_special -
((PageHeader) leftpage)->pd_upper;
@@ -1708,17 +1709,17 @@ _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
static void
_bt_vacuum_one_page(Relation rel, Buffer buffer)
{
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable = 0;
OffsetNumber offnum,
minoff,
maxoff;
Page page = BufferGetPage(buffer);
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable = 0;
OffsetNumber offnum,
minoff,
maxoff;
Page page = BufferGetPage(buffer);
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/*
* Scan over all items to see which ones need deleted
* according to LP_DELETE flags.
* Scan over all items to see which ones need deleted according to
* LP_DELETE flags.
*/
minoff = P_FIRSTDATAKEY(opaque);
maxoff = PageGetMaxOffsetNumber(page);
@@ -1726,7 +1727,7 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer)
offnum <= maxoff;
offnum = OffsetNumberNext(offnum))
{
ItemId itemId = PageGetItemId(page, offnum);
ItemId itemId = PageGetItemId(page, offnum);
if (ItemIdDeleted(itemId))
deletable[ndeletable++] = offnum;
@@ -1734,10 +1735,11 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer)
if (ndeletable > 0)
_bt_delitems(rel, buffer, deletable, ndeletable);
/*
* Note: if we didn't find any LP_DELETE items, then the page's
* BTP_HAS_GARBAGE hint bit is falsely set. We do not bother
* expending a separate write to clear it, however. We will clear
* it when we split the page.
* BTP_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
* separate write to clear it, however. We will clear it when we split
* the page.
*/
}

View File

@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.99 2006/07/25 19:13:00 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.100 2006/10/04 00:29:49 momjian Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -124,10 +124,10 @@ _bt_getroot(Relation rel, int access)
/*
* Since the cache might be stale, we check the page more carefully
* here than normal. We *must* check that it's not deleted.
* If it's not alone on its level, then we reject too --- this
* may be overly paranoid but better safe than sorry. Note we
* don't check P_ISROOT, because that's not set in a "fast root".
* here than normal. We *must* check that it's not deleted. If it's
* not alone on its level, then we reject too --- this may be overly
* paranoid but better safe than sorry. Note we don't check P_ISROOT,
* because that's not set in a "fast root".
*/
if (!P_IGNORE(rootopaque) &&
rootopaque->btpo.level == rootlevel &&
@@ -662,18 +662,18 @@ _bt_delitems(Relation rel, Buffer buf,
PageIndexMultiDelete(page, itemnos, nitems);
/*
* We can clear the vacuum cycle ID since this page has certainly
* been processed by the current vacuum scan.
* We can clear the vacuum cycle ID since this page has certainly been
* processed by the current vacuum scan.
*/
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_cycleid = 0;
/*
* Mark the page as not containing any LP_DELETE items. This is not
* certainly true (there might be some that have recently been marked,
* but weren't included in our target-item list), but it will almost
* always be true and it doesn't seem worth an additional page scan
* to check it. Remember that BTP_HAS_GARBAGE is only a hint anyway.
* certainly true (there might be some that have recently been marked, but
* weren't included in our target-item list), but it will almost always be
* true and it doesn't seem worth an additional page scan to check it.
* Remember that BTP_HAS_GARBAGE is only a hint anyway.
*/
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;

View File

@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.151 2006/09/21 20:31:22 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.152 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -55,7 +55,7 @@ typedef struct
BlockNumber *freePages;
int nFreePages; /* number of entries in freePages[] */
int maxFreePages; /* allocated size of freePages[] */
BlockNumber totFreePages; /* true total # of free pages */
BlockNumber totFreePages; /* true total # of free pages */
MemoryContext pagedelcontext;
} BTVacState;
@@ -70,7 +70,7 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback, void *callback_state,
BTCycleId cycleid);
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
BlockNumber orig_blkno);
BlockNumber orig_blkno);
/*
@@ -109,8 +109,8 @@ btbuild(PG_FUNCTION_ARGS)
buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false);
/*
* If building a unique index, put dead tuples in a second spool to
* keep them out of the uniqueness check.
* If building a unique index, put dead tuples in a second spool to keep
* them out of the uniqueness check.
*/
if (indexInfo->ii_Unique)
buildstate.spool2 = _bt_spoolinit(index, false, true);
@@ -146,11 +146,11 @@ btbuild(PG_FUNCTION_ARGS)
#endif /* BTREE_BUILD_STATS */
/*
* If we are reindexing a pre-existing index, it is critical to send out
* a relcache invalidation SI message to ensure all backends re-read the
* index metapage. We expect that the caller will ensure that happens
* (typically as a side effect of updating index stats, but it must
* happen even if the stats don't change!)
* If we are reindexing a pre-existing index, it is critical to send out a
* relcache invalidation SI message to ensure all backends re-read the
* index metapage. We expect that the caller will ensure that happens
* (typically as a side effect of updating index stats, but it must happen
* even if the stats don't change!)
*/
/*
@@ -252,11 +252,11 @@ btgettuple(PG_FUNCTION_ARGS)
if (scan->kill_prior_tuple)
{
/*
* Yes, remember it for later. (We'll deal with all such tuples
* Yes, remember it for later. (We'll deal with all such tuples
* at once right before leaving the index page.) The test for
* numKilled overrun is not just paranoia: if the caller reverses
* direction in the indexscan then the same item might get entered
* multiple times. It's not worth trying to optimize that, so we
* multiple times. It's not worth trying to optimize that, so we
* don't detect it, but instead just forget any excess entries.
*/
if (so->killedItems == NULL)
@@ -316,8 +316,8 @@ btgetmulti(PG_FUNCTION_ARGS)
while (ntids < max_tids)
{
/*
* Advance to next tuple within page. This is the same as the
* easy case in _bt_next().
* Advance to next tuple within page. This is the same as the easy
* case in _bt_next().
*/
if (++so->currPos.itemIndex > so->currPos.lastItem)
{
@@ -373,7 +373,7 @@ btrescan(PG_FUNCTION_ARGS)
so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
else
so->keyData = NULL;
so->killedItems = NULL; /* until needed */
so->killedItems = NULL; /* until needed */
so->numKilled = 0;
scan->opaque = so;
}
@@ -461,9 +461,9 @@ btmarkpos(PG_FUNCTION_ARGS)
/*
* Just record the current itemIndex. If we later step to next page
* before releasing the marked position, _bt_steppage makes a full copy
* of the currPos struct in markPos. If (as often happens) the mark is
* moved before we leave the page, we don't have to do that work.
* before releasing the marked position, _bt_steppage makes a full copy of
* the currPos struct in markPos. If (as often happens) the mark is moved
* before we leave the page, we don't have to do that work.
*/
if (BTScanPosIsValid(so->currPos))
so->markItemIndex = so->currPos.itemIndex;
@@ -485,11 +485,11 @@ btrestrpos(PG_FUNCTION_ARGS)
if (so->markItemIndex >= 0)
{
/*
* The mark position is on the same page we are currently on.
* Just restore the itemIndex.
* The mark position is on the same page we are currently on. Just
* restore the itemIndex.
*/
so->currPos.itemIndex = so->markItemIndex;
}
}
else
{
/* we aren't holding any read locks, but gotta drop the pin */
@@ -527,7 +527,7 @@ Datum
btbulkdelete(PG_FUNCTION_ARGS)
{
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
IndexBulkDeleteResult * volatile stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
void *callback_state = (void *) PG_GETARG_POINTER(3);
Relation rel = info->index;
@@ -569,10 +569,10 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
/*
* If btbulkdelete was called, we need not do anything, just return
* the stats from the latest btbulkdelete call. If it wasn't called,
* we must still do a pass over the index, to recycle any newly-recyclable
* pages and to obtain index statistics.
* If btbulkdelete was called, we need not do anything, just return the
* stats from the latest btbulkdelete call. If it wasn't called, we must
* still do a pass over the index, to recycle any newly-recyclable pages
* and to obtain index statistics.
*
* Since we aren't going to actually delete any leaf items, there's no
* need to go through all the vacuum-cycle-ID pushups.
@@ -586,8 +586,8 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
/*
* During a non-FULL vacuum it's quite possible for us to be fooled by
* concurrent page splits into double-counting some index tuples, so
* disbelieve any total that exceeds the underlying heap's count.
* (We can't check this during btbulkdelete.)
* disbelieve any total that exceeds the underlying heap's count. (We
* can't check this during btbulkdelete.)
*/
if (!info->vacuum_full)
{
@@ -622,8 +622,8 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
bool needLock;
/*
* Reset counts that will be incremented during the scan; needed in
* case of multiple scans during a single VACUUM command
* Reset counts that will be incremented during the scan; needed in case
* of multiple scans during a single VACUUM command
*/
stats->num_index_tuples = 0;
stats->pages_deleted = 0;
@@ -647,24 +647,24 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
ALLOCSET_DEFAULT_MAXSIZE);
/*
* The outer loop iterates over all index pages except the metapage,
* in physical order (we hope the kernel will cooperate in providing
* The outer loop iterates over all index pages except the metapage, in
* physical order (we hope the kernel will cooperate in providing
* read-ahead for speed). It is critical that we visit all leaf pages,
* including ones added after we start the scan, else we might fail to
* delete some deletable tuples. Hence, we must repeatedly check the
* relation length. We must acquire the relation-extension lock while
* doing so to avoid a race condition: if someone else is extending the
* relation, there is a window where bufmgr/smgr have created a new
* all-zero page but it hasn't yet been write-locked by _bt_getbuf().
* If we manage to scan such a page here, we'll improperly assume it can
* be recycled. Taking the lock synchronizes things enough to prevent a
* all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
* we manage to scan such a page here, we'll improperly assume it can be
* recycled. Taking the lock synchronizes things enough to prevent a
* problem: either num_pages won't include the new page, or _bt_getbuf
* already has write lock on the buffer and it will be fully initialized
* before we can examine it. (See also vacuumlazy.c, which has the same
* issue.) Also, we need not worry if a page is added immediately after
* issue.) Also, we need not worry if a page is added immediately after
* we look; the page splitting code already has write-lock on the left
* page before it adds a right page, so we must already have processed
* any tuples due to be moved into such a page.
* page before it adds a right page, so we must already have processed any
* tuples due to be moved into such a page.
*
* We can skip locking for new or temp relations, however, since no one
* else could be accessing them.
@@ -771,7 +771,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
void *callback_state = vstate->callback_state;
Relation rel = info->index;
bool delete_now;
BlockNumber recurse_to;
BlockNumber recurse_to;
Buffer buf;
Page page;
BTPageOpaque opaque;
@@ -796,10 +796,10 @@ restart:
_bt_checkpage(rel, buf);
/*
* If we are recursing, the only case we want to do anything with is
* a live leaf page having the current vacuum cycle ID. Any other state
* implies we already saw the page (eg, deleted it as being empty).
* In particular, we don't want to risk adding it to freePages twice.
* If we are recursing, the only case we want to do anything with is a
* live leaf page having the current vacuum cycle ID. Any other state
* implies we already saw the page (eg, deleted it as being empty). In
* particular, we don't want to risk adding it to freePages twice.
*/
if (blkno != orig_blkno)
{
@@ -838,25 +838,24 @@ restart:
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable;
OffsetNumber offnum,
minoff,
maxoff;
minoff,
maxoff;
/*
* Trade in the initial read lock for a super-exclusive write
* lock on this page. We must get such a lock on every leaf page
* over the course of the vacuum scan, whether or not it actually
* contains any deletable tuples --- see nbtree/README.
* Trade in the initial read lock for a super-exclusive write lock on
* this page. We must get such a lock on every leaf page over the
* course of the vacuum scan, whether or not it actually contains any
* deletable tuples --- see nbtree/README.
*/
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
LockBufferForCleanup(buf);
/*
* Check whether we need to recurse back to earlier pages. What
* we are concerned about is a page split that happened since we
* started the vacuum scan. If the split moved some tuples to a
* lower page then we might have missed 'em. If so, set up for
* tail recursion. (Must do this before possibly clearing
* btpo_cycleid below!)
* Check whether we need to recurse back to earlier pages. What we
* are concerned about is a page split that happened since we started
* the vacuum scan. If the split moved some tuples to a lower page
* then we might have missed 'em. If so, set up for tail recursion.
* (Must do this before possibly clearing btpo_cycleid below!)
*/
if (vstate->cycleid != 0 &&
opaque->btpo_cycleid == vstate->cycleid &&
@@ -866,8 +865,8 @@ restart:
recurse_to = opaque->btpo_next;
/*
* Scan over all items to see which ones need deleted
* according to the callback function.
* Scan over all items to see which ones need deleted according to the
* callback function.
*/
ndeletable = 0;
minoff = P_FIRSTDATAKEY(opaque);
@@ -890,8 +889,8 @@ restart:
}
/*
* Apply any needed deletes. We issue just one _bt_delitems()
* call per page, so as to minimize WAL traffic.
* Apply any needed deletes. We issue just one _bt_delitems() call
* per page, so as to minimize WAL traffic.
*/
if (ndeletable > 0)
{
@@ -908,8 +907,8 @@ restart:
* have any deletions to do. (If we do, _bt_delitems takes care
* of this.) This ensures we won't process the page again.
*
* We treat this like a hint-bit update because there's no need
* to WAL-log it.
* We treat this like a hint-bit update because there's no need to
* WAL-log it.
*/
if (vstate->cycleid != 0 &&
opaque->btpo_cycleid == vstate->cycleid)
@@ -920,10 +919,10 @@ restart:
}
/*
* If it's now empty, try to delete; else count the live tuples.
* We don't delete when recursing, though, to avoid putting entries
* into freePages out-of-order (doesn't seem worth any extra code to
* handle the case).
* If it's now empty, try to delete; else count the live tuples. We
* don't delete when recursing, though, to avoid putting entries into
* freePages out-of-order (doesn't seem worth any extra code to handle
* the case).
*/
if (minoff > maxoff)
delete_now = (blkno == orig_blkno);
@@ -947,13 +946,12 @@ restart:
stats->pages_deleted++;
/*
* During VACUUM FULL it's okay to recycle deleted pages
* immediately, since there can be no other transactions scanning
* the index. Note that we will only recycle the current page and
* not any parent pages that _bt_pagedel might have recursed to;
* this seems reasonable in the name of simplicity. (Trying to do
* otherwise would mean we'd have to sort the list of recyclable
* pages we're building.)
* During VACUUM FULL it's okay to recycle deleted pages immediately,
* since there can be no other transactions scanning the index. Note
* that we will only recycle the current page and not any parent pages
* that _bt_pagedel might have recursed to; this seems reasonable in
* the name of simplicity. (Trying to do otherwise would mean we'd
* have to sort the list of recyclable pages we're building.)
*/
if (ndel && info->vacuum_full)
{
@@ -969,11 +967,11 @@ restart:
_bt_relbuf(rel, buf);
/*
* This is really tail recursion, but if the compiler is too stupid
* to optimize it as such, we'd eat an uncomfortably large amount of
* stack space per recursion level (due to the deletable[] array).
* A failure is improbable since the number of levels isn't likely to be
* large ... but just in case, let's hand-optimize into a loop.
* This is really tail recursion, but if the compiler is too stupid to
* optimize it as such, we'd eat an uncomfortably large amount of stack
* space per recursion level (due to the deletable[] array). A failure is
* improbable since the number of levels isn't likely to be large ... but
* just in case, let's hand-optimize into a loop.
*/
if (recurse_to != P_NONE)
{

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.106 2006/08/24 01:18:34 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.107 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -22,7 +22,7 @@
static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
OffsetNumber offnum);
OffsetNumber offnum);
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
static Buffer _bt_walk_left(Relation rel, Buffer buf);
static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
@@ -417,7 +417,7 @@ _bt_compare(Relation rel,
* _bt_first() -- Find the first item in a scan.
*
* We need to be clever about the direction of scan, the search
* conditions, and the tree ordering. We find the first item (or,
* conditions, and the tree ordering. We find the first item (or,
* if backwards scan, the last item) in the tree that satisfies the
* qualifications in the scan key. On success exit, the page containing
* the current index tuple is pinned but not locked, and data about
@@ -604,7 +604,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
{
ScanKey cur = startKeys[i];
Assert(cur->sk_attno == i+1);
Assert(cur->sk_attno == i + 1);
if (cur->sk_flags & SK_ROW_HEADER)
{
@@ -612,16 +612,17 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* Row comparison header: look to the first row member instead.
*
* The member scankeys are already in insertion format (ie, they
* have sk_func = 3-way-comparison function), but we have to
* watch out for nulls, which _bt_preprocess_keys didn't check.
* A null in the first row member makes the condition unmatchable,
* just like qual_ok = false.
* have sk_func = 3-way-comparison function), but we have to watch
* out for nulls, which _bt_preprocess_keys didn't check. A null
* in the first row member makes the condition unmatchable, just
* like qual_ok = false.
*/
cur = (ScanKey) DatumGetPointer(cur->sk_argument);
Assert(cur->sk_flags & SK_ROW_MEMBER);
if (cur->sk_flags & SK_ISNULL)
return false;
memcpy(scankeys + i, cur, sizeof(ScanKeyData));
/*
* If the row comparison is the last positioning key we accepted,
* try to add additional keys from the lower-order row members.
@@ -833,10 +834,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
*
* The actually desired starting point is either this item or the prior
* one, or in the end-of-page case it's the first item on the next page or
* the last item on this page. Adjust the starting offset if needed.
* (If this results in an offset before the first item or after the last
* one, _bt_readpage will report no items found, and then we'll step to
* the next page as needed.)
* the last item on this page. Adjust the starting offset if needed. (If
* this results in an offset before the first item or after the last one,
* _bt_readpage will report no items found, and then we'll step to the
* next page as needed.)
*/
if (goback)
offnum = OffsetNumberPrev(offnum);
@@ -882,8 +883,8 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
BTScanOpaque so = (BTScanOpaque) scan->opaque;
/*
* Advance to next tuple on current page; or if there's no more,
* try to step to the next page with data.
* Advance to next tuple on current page; or if there's no more, try to
* step to the next page with data.
*/
if (ScanDirectionIsForward(dir))
{
@@ -954,8 +955,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
/*
* we must save the page's right-link while scanning it; this tells us
* where to step right to after we're done with these items. There is
* no corresponding need for the left-link, since splits always go right.
* where to step right to after we're done with these items. There is no
* corresponding need for the left-link, since splits always go right.
*/
so->currPos.nextPage = opaque->btpo_next;
@@ -1055,8 +1056,8 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
_bt_killitems(scan, true);
/*
* Before we modify currPos, make a copy of the page data if there
* was a mark position that needs it.
* Before we modify currPos, make a copy of the page data if there was a
* mark position that needs it.
*/
if (so->markItemIndex >= 0)
{
@@ -1112,11 +1113,11 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
so->currPos.moreRight = true;
/*
* Walk left to the next page with data. This is much more
* complex than the walk-right case because of the possibility
* that the page to our left splits while we are in flight to it,
* plus the possibility that the page we were on gets deleted
* after we leave it. See nbtree/README for details.
* Walk left to the next page with data. This is much more complex
* than the walk-right case because of the possibility that the page
* to our left splits while we are in flight to it, plus the
* possibility that the page we were on gets deleted after we leave
* it. See nbtree/README for details.
*/
for (;;)
{
@@ -1136,9 +1137,9 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
return false;
/*
* Okay, we managed to move left to a non-deleted page.
* Done if it's not half-dead and contains matching tuples.
* Else loop back and do it all again.
* Okay, we managed to move left to a non-deleted page. Done if
* it's not half-dead and contains matching tuples. Else loop back
* and do it all again.
*/
page = BufferGetPage(so->currPos.buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);

View File

@@ -57,7 +57,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.106 2006/07/14 14:52:17 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.107 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -125,7 +125,7 @@ static void _bt_slideleft(Page page);
static void _bt_sortaddtup(Page page, Size itemsize,
IndexTuple itup, OffsetNumber itup_off);
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state,
IndexTuple itup);
IndexTuple itup);
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state);
static void _bt_load(BTWriteState *wstate,
BTSpool *btspool, BTSpool *btspool2);
@@ -351,7 +351,7 @@ _bt_pagestate(BTWriteState *wstate, uint32 level)
state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
else
state->btps_full = RelationGetTargetPageFreeSpace(wstate->index,
BTREE_DEFAULT_FILLFACTOR);
BTREE_DEFAULT_FILLFACTOR);
/* no parent level, yet */
state->btps_next = NULL;
@@ -464,8 +464,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
Size itupsz;
/*
* This is a handy place to check for cancel interrupts during the
* btree load phase of index creation.
* This is a handy place to check for cancel interrupts during the btree
* load phase of index creation.
*/
CHECK_FOR_INTERRUPTS();
@@ -499,10 +499,10 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
"or use full text indexing.")));
/*
* Check to see if page is "full". It's definitely full if the item
* won't fit. Otherwise, compare to the target freespace derived from
* the fillfactor. However, we must put at least two items on each
* page, so disregard fillfactor if we don't have that many.
* Check to see if page is "full". It's definitely full if the item won't
* fit. Otherwise, compare to the target freespace derived from the
* fillfactor. However, we must put at least two items on each page, so
* disregard fillfactor if we don't have that many.
*/
if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY))
{

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.78 2006/07/25 19:13:00 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.79 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -28,8 +28,8 @@
static void _bt_mark_scankey_required(ScanKey skey);
static bool _bt_check_rowcompare(ScanKey skey,
IndexTuple tuple, TupleDesc tupdesc,
ScanDirection dir, bool *continuescan);
IndexTuple tuple, TupleDesc tupdesc,
ScanDirection dir, bool *continuescan);
/*
@@ -83,7 +83,7 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
* comparison data ultimately used must match the key datatypes.
*
* The result cannot be used with _bt_compare(), unless comparison
* data is first stored into the key entries. Currently this
* data is first stored into the key entries. Currently this
* routine is only called by nbtsort.c and tuplesort.c, which have
* their own comparison routines.
*/
@@ -388,7 +388,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
/*
* Emit the cleaned-up keys into the outkeys[] array, and then
* mark them if they are required. They are required (possibly
* mark them if they are required. They are required (possibly
* only in one direction) if all attrs before this one had "=".
*/
for (j = BTMaxStrategyNumber; --j >= 0;)
@@ -461,7 +461,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
* Mark a scankey as "required to continue the scan".
*
* Depending on the operator type, the key may be required for both scan
* directions or just one. Also, if the key is a row comparison header,
* directions or just one. Also, if the key is a row comparison header,
* we have to mark the appropriate subsidiary ScanKeys as required. In
* such cases, the first subsidiary key is required, but subsequent ones
* are required only as long as they correspond to successive index columns.
@@ -472,12 +472,12 @@ _bt_preprocess_keys(IndexScanDesc scan)
* scribbling on a data structure belonging to the index AM's caller, not on
* our private copy. This should be OK because the marking will not change
* from scan to scan within a query, and so we'd just re-mark the same way
* anyway on a rescan. Something to keep an eye on though.
* anyway on a rescan. Something to keep an eye on though.
*/
static void
_bt_mark_scankey_required(ScanKey skey)
{
int addflags;
int addflags;
switch (skey->sk_strategy)
{
@@ -503,8 +503,8 @@ _bt_mark_scankey_required(ScanKey skey)
if (skey->sk_flags & SK_ROW_HEADER)
{
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
AttrNumber attno = skey->sk_attno;
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
AttrNumber attno = skey->sk_attno;
/* First subkey should be same as the header says */
Assert(subkey->sk_attno == attno);
@@ -558,12 +558,12 @@ _bt_checkkeys(IndexScanDesc scan,
*continuescan = true; /* default assumption */
/*
* If the scan specifies not to return killed tuples, then we treat
* a killed tuple as not passing the qual. Most of the time, it's a
* win to not bother examining the tuple's index keys, but just return
* If the scan specifies not to return killed tuples, then we treat a
* killed tuple as not passing the qual. Most of the time, it's a win to
* not bother examining the tuple's index keys, but just return
* immediately with continuescan = true to proceed to the next tuple.
* However, if this is the last tuple on the page, we should check
* the index keys to prevent uselessly advancing to the next page.
* However, if this is the last tuple on the page, we should check the
* index keys to prevent uselessly advancing to the next page.
*/
if (scan->ignore_killed_tuples && ItemIdDeleted(iid))
{
@@ -580,9 +580,10 @@ _bt_checkkeys(IndexScanDesc scan,
if (offnum > P_FIRSTDATAKEY(opaque))
return false;
}
/*
* OK, we want to check the keys, but we'll return FALSE even
* if the tuple passes the key tests.
* OK, we want to check the keys, but we'll return FALSE even if the
* tuple passes the key tests.
*/
tuple_valid = false;
}
@@ -734,10 +735,9 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
{
/*
* Unlike the simple-scankey case, this isn't a disallowed case.
* But it can never match. If all the earlier row comparison
* columns are required for the scan direction, we can stop
* the scan, because there can't be another tuple that will
* succeed.
* But it can never match. If all the earlier row comparison
* columns are required for the scan direction, we can stop the
* scan, because there can't be another tuple that will succeed.
*/
if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument))
subkey--;
@@ -771,7 +771,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
*/
switch (subkey->sk_strategy)
{
/* EQ and NE cases aren't allowed here */
/* EQ and NE cases aren't allowed here */
case BTLessStrategyNumber:
result = (cmpresult < 0);
break;
@@ -795,8 +795,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
{
/*
* Tuple fails this qual. If it's a required qual for the current
* scan direction, then we can conclude no further tuples will
* pass, either. Note we have to look at the deciding column, not
* scan direction, then we can conclude no further tuples will pass,
* either. Note we have to look at the deciding column, not
* necessarily the first or last column of the row condition.
*/
if ((subkey->sk_flags & SK_BT_REQFWD) &&
@@ -822,7 +822,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
* is sufficient for setting LP_DELETE hint bits.
*
* We match items by heap TID before assuming they are the right ones to
* delete. We cope with cases where items have moved right due to insertions.
* delete. We cope with cases where items have moved right due to insertions.
* If an item has moved off the current page due to a split, we'll fail to
* find it and do nothing (this is not an error case --- we assume the item
* will eventually get marked in a future indexscan). Note that because we
@@ -856,9 +856,9 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
for (i = 0; i < so->numKilled; i++)
{
int itemIndex = so->killedItems[i];
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
OffsetNumber offnum = kitem->indexOffset;
int itemIndex = so->killedItems[i];
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
OffsetNumber offnum = kitem->indexOffset;
Assert(itemIndex >= so->currPos.firstItem &&
itemIndex <= so->currPos.lastItem);
@@ -881,9 +881,9 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
}
/*
* Since this can be redone later if needed, it's treated the same
* as a commit-hint-bit status update for heap tuples: we mark the
* buffer dirty but don't make a WAL log entry.
* Since this can be redone later if needed, it's treated the same as a
* commit-hint-bit status update for heap tuples: we mark the buffer dirty
* but don't make a WAL log entry.
*
* Whenever we mark anything LP_DELETEd, we also set the page's
* BTP_HAS_GARBAGE flag, which is likewise just a hint.
@@ -898,8 +898,8 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
/*
* Always reset the scan state, so we don't look for same items
* on other pages.
* Always reset the scan state, so we don't look for same items on other
* pages.
*/
so->numKilled = 0;
}
@@ -908,8 +908,8 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
/*
* The following routines manage a shared-memory area in which we track
* assignment of "vacuum cycle IDs" to currently-active btree vacuuming
* operations. There is a single counter which increments each time we
* start a vacuum to assign it a cycle ID. Since multiple vacuums could
* operations. There is a single counter which increments each time we
* start a vacuum to assign it a cycle ID. Since multiple vacuums could
* be active concurrently, we have to track the cycle ID for each active
* vacuum; this requires at most MaxBackends entries (usually far fewer).
* We assume at most one vacuum can be active for a given index.
@@ -987,7 +987,8 @@ _bt_start_vacuum(Relation rel)
LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
/* Assign the next cycle ID, being careful to avoid zero */
do {
do
{
result = ++(btvacinfo->cycle_ctr);
} while (result == 0);

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.37 2006/08/07 16:57:56 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.38 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -82,7 +82,7 @@ forget_matching_split(RelFileNode node, BlockNumber downlink, bool is_root)
* in correct itemno sequence, but physically the opposite order from the
* original, because we insert them in the opposite of itemno order. This
* does not matter in any current btree code, but it's something to keep an
* eye on. Is it worth changing just on general principles?
* eye on. Is it worth changing just on general principles?
*/
static void
_bt_restore_page(Page page, char *from, int len)
@@ -155,7 +155,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
char *datapos;
int datalen;
xl_btree_metadata md;
BlockNumber downlink = 0;
BlockNumber downlink = 0;
datapos = (char *) xlrec + SizeOfBtreeInsert;
datalen = record->xl_len - SizeOfBtreeInsert;
@@ -180,7 +180,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
if (!(record->xl_info & XLR_BKP_BLOCK_1))
{
buffer = XLogReadBuffer(reln,
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
false);
if (BufferIsValid(buffer))
{
@@ -193,7 +193,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
else
{
if (PageAddItem(page, (Item) datapos, datalen,
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
LP_USED) == InvalidOffsetNumber)
elog(PANIC, "btree_insert_redo: failed to add item");
@@ -225,7 +225,7 @@ btree_xlog_split(bool onleft, bool isroot,
OffsetNumber targetoff;
BlockNumber leftsib;
BlockNumber rightsib;
BlockNumber downlink = 0;
BlockNumber downlink = 0;
Buffer buffer;
Page page;
BTPageOpaque pageop;
@@ -376,8 +376,8 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
}
/*
* Mark the page as not containing any LP_DELETE items --- see comments
* in _bt_delitems().
* Mark the page as not containing any LP_DELETE items --- see comments in
* _bt_delitems().
*/
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
@@ -543,7 +543,7 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
Buffer buffer;
Page page;
BTPageOpaque pageop;
BlockNumber downlink = 0;
BlockNumber downlink = 0;
reln = XLogOpenRelation(xlrec->node);
buffer = XLogReadBuffer(reln, xlrec->rootblk, true);
@@ -637,9 +637,9 @@ static void
out_target(StringInfo buf, xl_btreetid *target)
{
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
target->node.spcNode, target->node.dbNode, target->node.relNode,
ItemPointerGetBlockNumber(&(target->tid)),
ItemPointerGetOffsetNumber(&(target->tid)));
target->node.spcNode, target->node.dbNode, target->node.relNode,
ItemPointerGetBlockNumber(&(target->tid)),
ItemPointerGetOffsetNumber(&(target->tid)));
}
void
@@ -680,7 +680,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "split_l: ");
out_target(buf, &(xlrec->target));
appendStringInfo(buf, "; oth %u; rgh %u",
xlrec->otherblk, xlrec->rightblk);
xlrec->otherblk, xlrec->rightblk);
break;
}
case XLOG_BTREE_SPLIT_R:
@@ -690,7 +690,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "split_r: ");
out_target(buf, &(xlrec->target));
appendStringInfo(buf, "; oth %u; rgh %u",
xlrec->otherblk, xlrec->rightblk);
xlrec->otherblk, xlrec->rightblk);
break;
}
case XLOG_BTREE_SPLIT_L_ROOT:
@@ -700,7 +700,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "split_l_root: ");
out_target(buf, &(xlrec->target));
appendStringInfo(buf, "; oth %u; rgh %u",
xlrec->otherblk, xlrec->rightblk);
xlrec->otherblk, xlrec->rightblk);
break;
}
case XLOG_BTREE_SPLIT_R_ROOT:
@@ -710,7 +710,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "split_r_root: ");
out_target(buf, &(xlrec->target));
appendStringInfo(buf, "; oth %u; rgh %u",
xlrec->otherblk, xlrec->rightblk);
xlrec->otherblk, xlrec->rightblk);
break;
}
case XLOG_BTREE_DELETE:
@@ -718,8 +718,8 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
appendStringInfo(buf, "delete: rel %u/%u/%u; blk %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block);
break;
}
case XLOG_BTREE_DELETE_PAGE:
@@ -730,7 +730,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "delete_page: ");
out_target(buf, &(xlrec->target));
appendStringInfo(buf, "; dead %u; left %u; right %u",
xlrec->deadblk, xlrec->leftblk, xlrec->rightblk);
xlrec->deadblk, xlrec->leftblk, xlrec->rightblk);
break;
}
case XLOG_BTREE_NEWROOT:
@@ -738,9 +738,9 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
appendStringInfo(buf, "newroot: rel %u/%u/%u; root %u lev %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode,
xlrec->rootblk, xlrec->level);
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode,
xlrec->rootblk, xlrec->level);
break;
}
default:

View File

@@ -24,7 +24,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.39 2006/07/13 16:49:12 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.40 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -414,7 +414,7 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record)
void
clog_desc(StringInfo buf, uint8 xl_info, char *rec)
{
uint8 info = xl_info & ~XLR_INFO_MASK;
uint8 info = xl_info & ~XLR_INFO_MASK;
if (info == CLOG_ZEROPAGE)
{

View File

@@ -42,7 +42,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.20 2006/07/20 00:46:42 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.21 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1493,10 +1493,10 @@ CheckPointMultiXact(void)
/*
* Truncate the SLRU files. This could be done at any time, but
* checkpoint seems a reasonable place for it. There is one exception:
* if we are called during xlog recovery, then shared->latest_page_number
* isn't valid (because StartupMultiXact hasn't been called yet) and
* so SimpleLruTruncate would get confused. It seems best not to risk
* checkpoint seems a reasonable place for it. There is one exception: if
* we are called during xlog recovery, then shared->latest_page_number
* isn't valid (because StartupMultiXact hasn't been called yet) and so
* SimpleLruTruncate would get confused. It seems best not to risk
* removing any data during recovery anyway, so don't truncate.
*/
if (!InRecovery)
@@ -1917,7 +1917,7 @@ multixact_desc(StringInfo buf, uint8 xl_info, char *rec)
int i;
appendStringInfo(buf, "create multixact %u offset %u:",
xlrec->mid, xlrec->moff);
xlrec->mid, xlrec->moff);
for (i = 0; i < xlrec->nxids; i++)
appendStringInfo(buf, " %u", xlrec->xids[i]);
}

View File

@@ -41,7 +41,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.38 2006/07/14 14:52:17 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.39 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -92,8 +92,8 @@
typedef struct SlruFlushData
{
int num_files; /* # files actually open */
int fd[MAX_FLUSH_BUFFERS]; /* their FD's */
int num_files; /* # files actually open */
int fd[MAX_FLUSH_BUFFERS]; /* their FD's */
int segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */
} SlruFlushData;
@@ -113,7 +113,7 @@ typedef struct SlruFlushData
* page_lru_count entries to be "reset" to lower values than they should have,
* in case a process is delayed while it executes this macro. With care in
* SlruSelectLRUPage(), this does little harm, and in any case the absolute
* worst possible consequence is a nonoptimal choice of page to evict. The
* worst possible consequence is a nonoptimal choice of page to evict. The
* gain from allowing concurrent reads of SLRU pages seems worth it.
*/
#define SlruRecentlyUsed(shared, slotno) \
@@ -158,13 +158,13 @@ SimpleLruShmemSize(int nslots)
/* we assume nslots isn't so large as to risk overflow */
sz = MAXALIGN(sizeof(SlruSharedData));
sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
sz += MAXALIGN(nslots * sizeof(LWLockId)); /* buffer_locks[] */
sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
sz += MAXALIGN(nslots * sizeof(LWLockId)); /* buffer_locks[] */
return BUFFERALIGN(sz) + BLCKSZ * nslots;
}
@@ -653,9 +653,9 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
* Easiest way to deal with that is to accept references to
* nonexistent files here and in SlruPhysicalReadPage.)
*
* Note: it is possible for more than one backend to be executing
* this code simultaneously for different pages of the same file.
* Hence, don't use O_EXCL or O_TRUNC or anything like that.
* Note: it is possible for more than one backend to be executing this
* code simultaneously for different pages of the same file. Hence,
* don't use O_EXCL or O_TRUNC or anything like that.
*/
SlruFileName(ctl, path, segno);
fd = BasicOpenFile(path, O_RDWR | O_CREAT | PG_BINARY,
@@ -759,22 +759,22 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not access status of transaction %u", xid),
errdetail("Could not seek in file \"%s\" to offset %u: %m.",
path, offset)));
errdetail("Could not seek in file \"%s\" to offset %u: %m.",
path, offset)));
break;
case SLRU_READ_FAILED:
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not access status of transaction %u", xid),
errdetail("Could not read from file \"%s\" at offset %u: %m.",
path, offset)));
errdetail("Could not read from file \"%s\" at offset %u: %m.",
path, offset)));
break;
case SLRU_WRITE_FAILED:
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not access status of transaction %u", xid),
errdetail("Could not write to file \"%s\" at offset %u: %m.",
path, offset)));
errdetail("Could not write to file \"%s\" at offset %u: %m.",
path, offset)));
break;
case SLRU_FSYNC_FAILED:
ereport(ERROR,

View File

@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.23 2006/10/03 21:21:35 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.24 2006/10/04 00:29:49 momjian Exp $
*
* NOTES
* Each global transaction is associated with a global transaction
@@ -1250,8 +1250,8 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
if (errno != ENOENT || giveWarning)
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not remove twophase state file \"%s\": %m",
path)));
errmsg("could not remove twophase state file \"%s\": %m",
path)));
}
/*

View File

@@ -6,7 +6,7 @@
* Copyright (c) 2000-2006, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.74 2006/09/26 17:21:39 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.75 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -133,10 +133,10 @@ GetNewTransactionId(bool isSubXact)
{
/*
* Use volatile pointer to prevent code rearrangement; other backends
* could be examining my subxids info concurrently, and we don't
* want them to see an invalid intermediate state, such as
* incrementing nxids before filling the array entry. Note we are
* assuming that TransactionId and int fetch/store are atomic.
* could be examining my subxids info concurrently, and we don't want
* them to see an invalid intermediate state, such as incrementing
* nxids before filling the array entry. Note we are assuming that
* TransactionId and int fetch/store are atomic.
*/
volatile PGPROC *myproc = MyProc;
@@ -144,7 +144,7 @@ GetNewTransactionId(bool isSubXact)
myproc->xid = xid;
else
{
int nxids = myproc->subxids.nxids;
int nxids = myproc->subxids.nxids;
if (nxids < PGPROC_MAX_CACHED_SUBXIDS)
{
@@ -196,7 +196,7 @@ SetTransactionIdLimit(TransactionId oldest_datminxid,
* The place where we actually get into deep trouble is halfway around
* from the oldest existing XID. (This calculation is probably off by one
* or two counts, because the special XIDs reduce the size of the loop a
* little bit. But we throw in plenty of slop below, so it doesn't
* little bit. But we throw in plenty of slop below, so it doesn't
* matter.)
*/
xidWrapLimit = oldest_datminxid + (MaxTransactionId >> 1);

View File

@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.226 2006/08/27 19:11:46 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.227 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1376,12 +1376,12 @@ StartTransaction(void)
XactLockTableInsert(s->transactionId);
PG_TRACE1 (transaction__start, s->transactionId);
PG_TRACE1(transaction__start, s->transactionId);
/*
* set transaction_timestamp() (a/k/a now()). We want this to be the
* same as the first command's statement_timestamp(), so don't do a
* fresh GetCurrentTimestamp() call (which'd be expensive anyway).
* set transaction_timestamp() (a/k/a now()). We want this to be the same
* as the first command's statement_timestamp(), so don't do a fresh
* GetCurrentTimestamp() call (which'd be expensive anyway).
*/
xactStartTimestamp = stmtStartTimestamp;
@@ -1521,7 +1521,7 @@ CommitTransaction(void)
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
/* Clear the subtransaction-XID cache too while holding the lock */
MyProc->subxids.nxids = 0;
@@ -1530,7 +1530,7 @@ CommitTransaction(void)
LWLockRelease(ProcArrayLock);
}
PG_TRACE1 (transaction__commit, s->transactionId);
PG_TRACE1(transaction__commit, s->transactionId);
/*
* This is all post-commit cleanup. Note that if an error is raised here,
@@ -1921,7 +1921,7 @@ AbortTransaction(void)
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
/* Clear the subtransaction-XID cache too while holding the lock */
MyProc->subxids.nxids = 0;
@@ -1930,7 +1930,7 @@ AbortTransaction(void)
LWLockRelease(ProcArrayLock);
}
PG_TRACE1 (transaction__abort, s->transactionId);
PG_TRACE1(transaction__abort, s->transactionId);
/*
* Post-abort cleanup. See notes in CommitTransaction() concerning
@@ -4206,8 +4206,8 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
int i;
appendStringInfo(buf, "%04u-%02u-%02u %02u:%02u:%02u",
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec);
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec);
if (xlrec->nrels > 0)
{
appendStringInfo(buf, "; rels:");
@@ -4216,7 +4216,7 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
RelFileNode rnode = xlrec->xnodes[i];
appendStringInfo(buf, " %u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
rnode.spcNode, rnode.dbNode, rnode.relNode);
}
}
if (xlrec->nsubxacts > 0)
@@ -4237,8 +4237,8 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
int i;
appendStringInfo(buf, "%04u-%02u-%02u %02u:%02u:%02u",
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec);
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec);
if (xlrec->nrels > 0)
{
appendStringInfo(buf, "; rels:");
@@ -4247,7 +4247,7 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
RelFileNode rnode = xlrec->xnodes[i];
appendStringInfo(buf, " %u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
rnode.spcNode, rnode.dbNode, rnode.relNode);
}
}
if (xlrec->nsubxacts > 0)
@@ -4264,7 +4264,7 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
void
xact_desc(StringInfo buf, uint8 xl_info, char *rec)
{
uint8 info = xl_info & ~XLR_INFO_MASK;
uint8 info = xl_info & ~XLR_INFO_MASK;
if (info == XLOG_XACT_COMMIT)
{

View File

@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.249 2006/08/21 16:16:31 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.250 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -349,9 +349,9 @@ typedef struct XLogCtlInsert
*/
typedef struct XLogCtlWrite
{
XLogwrtResult LogwrtResult; /* current value of LogwrtResult */
int curridx; /* cache index of next block to write */
time_t lastSegSwitchTime; /* time of last xlog segment switch */
XLogwrtResult LogwrtResult; /* current value of LogwrtResult */
int curridx; /* cache index of next block to write */
time_t lastSegSwitchTime; /* time of last xlog segment switch */
} XLogCtlWrite;
/*
@@ -481,7 +481,7 @@ static bool InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
bool use_lock);
static int XLogFileOpen(uint32 log, uint32 seg);
static int XLogFileRead(uint32 log, uint32 seg, int emode);
static void XLogFileClose(void);
static void XLogFileClose(void);
static bool RestoreArchivedFile(char *path, const char *xlogfname,
const char *recovername, off_t expectedSize);
static int PreallocXlogFiles(XLogRecPtr endptr);
@@ -506,7 +506,7 @@ static void issue_xlog_fsync(void);
static void xlog_outrec(StringInfo buf, XLogRecord *record);
#endif
static bool read_backup_label(XLogRecPtr *checkPointLoc,
XLogRecPtr *minRecoveryLoc);
XLogRecPtr *minRecoveryLoc);
static void rm_redo_error_callback(void *arg);
@@ -697,9 +697,9 @@ begin:;
/*
* NOTE: We disallow len == 0 because it provides a useful bit of extra
* error checking in ReadRecord. This means that all callers of
* XLogInsert must supply at least some not-in-a-buffer data. However,
* we make an exception for XLOG SWITCH records because we don't want
* them to ever cross a segment boundary.
* XLogInsert must supply at least some not-in-a-buffer data. However, we
* make an exception for XLOG SWITCH records because we don't want them to
* ever cross a segment boundary.
*/
if (len == 0 && !isLogSwitch)
elog(PANIC, "invalid xlog record length %u", len);
@@ -752,8 +752,8 @@ begin:;
* checkpoint, so it's better to be slow in this case and fast otherwise.
*
* If we aren't doing full-page writes then RedoRecPtr doesn't actually
* affect the contents of the XLOG record, so we'll update our local
* copy but not force a recomputation.
* affect the contents of the XLOG record, so we'll update our local copy
* but not force a recomputation.
*/
if (!XLByteEQ(RedoRecPtr, Insert->RedoRecPtr))
{
@@ -782,10 +782,10 @@ begin:;
}
/*
* Also check to see if forcePageWrites was just turned on; if we
* weren't already doing full-page writes then go back and recompute.
* (If it was just turned off, we could recompute the record without
* full pages, but we choose not to bother.)
* Also check to see if forcePageWrites was just turned on; if we weren't
* already doing full-page writes then go back and recompute. (If it was
* just turned off, we could recompute the record without full pages, but
* we choose not to bother.)
*/
if (Insert->forcePageWrites && !doPageWrites)
{
@@ -870,11 +870,11 @@ begin:;
INSERT_RECPTR(RecPtr, Insert, curridx);
/*
* If the record is an XLOG_SWITCH, and we are exactly at the start
* of a segment, we need not insert it (and don't want to because
* we'd like consecutive switch requests to be no-ops). Instead,
* make sure everything is written and flushed through the end of
* the prior segment, and return the prior segment's end address.
* If the record is an XLOG_SWITCH, and we are exactly at the start of a
* segment, we need not insert it (and don't want to because we'd like
* consecutive switch requests to be no-ops). Instead, make sure
* everything is written and flushed through the end of the prior segment,
* and return the prior segment's end address.
*/
if (isLogSwitch &&
(RecPtr.xrecoff % XLogSegSize) == SizeOfXLogLongPHD)
@@ -926,7 +926,7 @@ begin:;
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
{
StringInfoData buf;
StringInfoData buf;
initStringInfo(&buf);
appendStringInfo(&buf, "INSERT @ %X/%X: ",
@@ -1019,8 +1019,8 @@ begin:;
LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
/*
* Flush through the end of the page containing XLOG_SWITCH,
* and perform end-of-segment actions (eg, notifying archiver).
* Flush through the end of the page containing XLOG_SWITCH, and
* perform end-of-segment actions (eg, notifying archiver).
*/
WriteRqst = XLogCtl->xlblocks[curridx];
FlushRqst.Write = WriteRqst;
@@ -1667,8 +1667,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
* switch.
*
* This is also the right place to notify the Archiver that the
* segment is ready to copy to archival storage, and to update
* the timer for archive_timeout.
* segment is ready to copy to archival storage, and to update the
* timer for archive_timeout.
*/
if (finishing_seg || (xlog_switch && last_iteration))
{
@@ -2300,36 +2300,35 @@ XLogFileClose(void)
Assert(openLogFile >= 0);
/*
* posix_fadvise is problematic on many platforms: on older x86 Linux
* it just dumps core, and there are reports of problems on PPC platforms
* as well. The following is therefore disabled for the time being.
* We could consider some kind of configure test to see if it's safe to
* use, but since we lack hard evidence that there's any useful performance
* gain to be had, spending time on that seems unprofitable for now.
* posix_fadvise is problematic on many platforms: on older x86 Linux it
* just dumps core, and there are reports of problems on PPC platforms as
* well. The following is therefore disabled for the time being. We could
* consider some kind of configure test to see if it's safe to use, but
* since we lack hard evidence that there's any useful performance gain to
* be had, spending time on that seems unprofitable for now.
*/
#ifdef NOT_USED
/*
* WAL segment files will not be re-read in normal operation, so we advise
* OS to release any cached pages. But do not do so if WAL archiving is
* OS to release any cached pages. But do not do so if WAL archiving is
* active, because archiver process could use the cache to read the WAL
* segment.
*
* While O_DIRECT works for O_SYNC, posix_fadvise() works for fsync()
* and O_SYNC, and some platforms only have posix_fadvise().
* While O_DIRECT works for O_SYNC, posix_fadvise() works for fsync() and
* O_SYNC, and some platforms only have posix_fadvise().
*/
#if defined(HAVE_DECL_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
if (!XLogArchivingActive())
posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
#endif
#endif /* NOT_USED */
#endif /* NOT_USED */
if (close(openLogFile))
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not close log file %u, segment %u: %m",
openLogId, openLogSeg)));
(errcode_for_file_access(),
errmsg("could not close log file %u, segment %u: %m",
openLogId, openLogSeg)));
openLogFile = -1;
}
@@ -2978,8 +2977,8 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
got_record:;
/*
* xl_len == 0 is bad data for everything except XLOG SWITCH, where
* it is required.
* xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
* required.
*/
if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
{
@@ -3168,6 +3167,7 @@ got_record:;
EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len);
ReadRecPtr = *RecPtr;
memcpy(buffer, record, total_len);
/*
* Special processing if it's an XLOG SWITCH record
*/
@@ -3177,10 +3177,11 @@ got_record:;
EndRecPtr.xrecoff += XLogSegSize - 1;
EndRecPtr.xrecoff -= EndRecPtr.xrecoff % XLogSegSize;
nextRecord = NULL; /* definitely not on same page */
/*
* Pretend that readBuf contains the last page of the segment.
* This is just to avoid Assert failure in StartupXLOG if XLOG
* ends with this segment.
* Pretend that readBuf contains the last page of the segment. This is
* just to avoid Assert failure in StartupXLOG if XLOG ends with this
* segment.
*/
readOff = XLogSegSize - XLOG_BLCKSZ;
}
@@ -3661,7 +3662,7 @@ static void
WriteControlFile(void)
{
int fd;
char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
char *localeptr;
/*
@@ -3846,9 +3847,9 @@ ReadControlFile(void)
if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
ereport(FATAL,
(errmsg("database files are incompatible with server"),
errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
" but the server was compiled with XLOG_BLCKSZ %d.",
ControlFile->xlog_blcksz, XLOG_BLCKSZ),
errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
" but the server was compiled with XLOG_BLCKSZ %d.",
ControlFile->xlog_blcksz, XLOG_BLCKSZ),
errhint("It looks like you need to recompile or initdb.")));
if (ControlFile->xlog_seg_size != XLOG_SEG_SIZE)
ereport(FATAL,
@@ -4027,7 +4028,7 @@ XLOGShmemInit(void)
* Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
* in additional info.)
*/
XLogCtl->XLogCacheByte = (Size) XLOG_BLCKSZ * XLOGbuffers;
XLogCtl->XLogCacheByte = (Size) XLOG_BLCKSZ *XLOGbuffers;
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
@@ -4649,10 +4650,10 @@ StartupXLOG(void)
" you will have to use the last backup for recovery.")));
else if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY)
ereport(LOG,
(errmsg("database system was interrupted while in recovery at log time %s",
str_time(ControlFile->checkPointCopy.time)),
errhint("If this has occurred more than once some data may be corrupted"
" and you may need to choose an earlier recovery target.")));
(errmsg("database system was interrupted while in recovery at log time %s",
str_time(ControlFile->checkPointCopy.time)),
errhint("If this has occurred more than once some data may be corrupted"
" and you may need to choose an earlier recovery target.")));
else if (ControlFile->state == DB_IN_PRODUCTION)
ereport(LOG,
(errmsg("database system was interrupted at %s",
@@ -4812,10 +4813,10 @@ StartupXLOG(void)
int rmid;
/*
* Update pg_control to show that we are recovering and to show
* the selected checkpoint as the place we are starting from.
* We also mark pg_control with any minimum recovery stop point
* obtained from a backup history file.
* Update pg_control to show that we are recovering and to show the
* selected checkpoint as the place we are starting from. We also mark
* pg_control with any minimum recovery stop point obtained from a
* backup history file.
*/
if (InArchiveRecovery)
{
@@ -4839,12 +4840,12 @@ StartupXLOG(void)
UpdateControlFile();
/*
* If there was a backup label file, it's done its job and the
* info has now been propagated into pg_control. We must get rid of
* the label file so that if we crash during recovery, we'll pick up
* at the latest recovery restartpoint instead of going all the way
* back to the backup start point. It seems prudent though to just
* rename the file out of the way rather than delete it completely.
* If there was a backup label file, it's done its job and the info
* has now been propagated into pg_control. We must get rid of the
* label file so that if we crash during recovery, we'll pick up at
* the latest recovery restartpoint instead of going all the way back
* to the backup start point. It seems prudent though to just rename
* the file out of the way rather than delete it completely.
*/
if (haveBackupLabel)
{
@@ -4884,7 +4885,7 @@ StartupXLOG(void)
{
bool recoveryContinue = true;
bool recoveryApply = true;
ErrorContextCallback errcontext;
ErrorContextCallback errcontext;
InRedo = true;
ereport(LOG,
@@ -4899,17 +4900,17 @@ StartupXLOG(void)
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
{
StringInfoData buf;
StringInfoData buf;
initStringInfo(&buf);
appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
EndRecPtr.xlogid, EndRecPtr.xrecoff);
ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
EndRecPtr.xlogid, EndRecPtr.xrecoff);
xlog_outrec(&buf, record);
appendStringInfo(&buf, " - ");
RmgrTable[record->xl_rmid].rm_desc(&buf,
record->xl_info,
XLogRecGetData(record));
XLogRecGetData(record));
elog(LOG, "%s", buf.data);
pfree(buf.data);
}
@@ -5383,9 +5384,9 @@ GetRecentNextXid(void)
void
GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch)
{
uint32 ckptXidEpoch;
TransactionId ckptXid;
TransactionId nextXid;
uint32 ckptXidEpoch;
TransactionId ckptXid;
TransactionId nextXid;
/* Must read checkpoint info first, else have race condition */
{
@@ -5718,7 +5719,7 @@ CheckPointGuts(XLogRecPtr checkPointRedo)
CheckPointCLOG();
CheckPointSUBTRANS();
CheckPointMultiXact();
FlushBufferPool(); /* performs all required fsyncs */
FlushBufferPool(); /* performs all required fsyncs */
/* We deliberately delay 2PC checkpointing as long as possible */
CheckPointTwoPhase(checkPointRedo);
}
@@ -5735,12 +5736,12 @@ CheckPointGuts(XLogRecPtr checkPointRedo)
static void
RecoveryRestartPoint(const CheckPoint *checkPoint)
{
int elapsed_secs;
int rmid;
int elapsed_secs;
int rmid;
/*
* Do nothing if the elapsed time since the last restartpoint is less
* than half of checkpoint_timeout. (We use a value less than
* Do nothing if the elapsed time since the last restartpoint is less than
* half of checkpoint_timeout. (We use a value less than
* checkpoint_timeout so that variations in the timing of checkpoints on
* the master, or speed of transmission of WAL segments to a slave, won't
* make the slave skip a restartpoint once it's synced with the master.)
@@ -5770,9 +5771,9 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
CheckPointGuts(checkPoint->redo);
/*
* Update pg_control so that any subsequent crash will restart from
* this checkpoint. Note: ReadRecPtr gives the XLOG address of the
* checkpoint record itself.
* Update pg_control so that any subsequent crash will restart from this
* checkpoint. Note: ReadRecPtr gives the XLOG address of the checkpoint
* record itself.
*/
ControlFile->prevCheckPoint = ControlFile->checkPoint;
ControlFile->checkPoint = ReadRecPtr;
@@ -5926,7 +5927,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
void
xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
{
uint8 info = xl_info & ~XLR_INFO_MASK;
uint8 info = xl_info & ~XLR_INFO_MASK;
if (info == XLOG_CHECKPOINT_SHUTDOWN ||
info == XLOG_CHECKPOINT_ONLINE)
@@ -5934,15 +5935,15 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
CheckPoint *checkpoint = (CheckPoint *) rec;
appendStringInfo(buf, "checkpoint: redo %X/%X; undo %X/%X; "
"tli %u; xid %u/%u; oid %u; multi %u; offset %u; %s",
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
checkpoint->ThisTimeLineID,
checkpoint->nextXidEpoch, checkpoint->nextXid,
checkpoint->nextOid,
checkpoint->nextMulti,
checkpoint->nextMultiOffset,
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
"tli %u; xid %u/%u; oid %u; multi %u; offset %u; %s",
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
checkpoint->ThisTimeLineID,
checkpoint->nextXidEpoch, checkpoint->nextXid,
checkpoint->nextOid,
checkpoint->nextMulti,
checkpoint->nextMultiOffset,
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
}
else if (info == XLOG_NEXTOID)
{
@@ -5973,7 +5974,7 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
{
if (record->xl_info & XLR_SET_BKP_BLOCK(i))
appendStringInfo(buf, "; bkpb%d", i+1);
appendStringInfo(buf, "; bkpb%d", i + 1);
}
appendStringInfo(buf, ": %s", RmgrTable[record->xl_rmid].rm_name);
@@ -6142,18 +6143,18 @@ pg_start_backup(PG_FUNCTION_ARGS)
* Mark backup active in shared memory. We must do full-page WAL writes
* during an on-line backup even if not doing so at other times, because
* it's quite possible for the backup dump to obtain a "torn" (partially
* written) copy of a database page if it reads the page concurrently
* with our write to the same page. This can be fixed as long as the
* first write to the page in the WAL sequence is a full-page write.
* Hence, we turn on forcePageWrites and then force a CHECKPOINT, to
* ensure there are no dirty pages in shared memory that might get
* dumped while the backup is in progress without having a corresponding
* WAL record. (Once the backup is complete, we need not force full-page
* writes anymore, since we expect that any pages not modified during
* the backup interval must have been correctly captured by the backup.)
* written) copy of a database page if it reads the page concurrently with
* our write to the same page. This can be fixed as long as the first
* write to the page in the WAL sequence is a full-page write. Hence, we
* turn on forcePageWrites and then force a CHECKPOINT, to ensure there
* are no dirty pages in shared memory that might get dumped while the
* backup is in progress without having a corresponding WAL record. (Once
* the backup is complete, we need not force full-page writes anymore,
* since we expect that any pages not modified during the backup interval
* must have been correctly captured by the backup.)
*
* We must hold WALInsertLock to change the value of forcePageWrites,
* to ensure adequate interlocking against XLogInsert().
* We must hold WALInsertLock to change the value of forcePageWrites, to
* ensure adequate interlocking against XLogInsert().
*/
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
if (XLogCtl->Insert.forcePageWrites)
@@ -6171,7 +6172,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
PG_TRY();
{
/*
* Force a CHECKPOINT. Aside from being necessary to prevent torn
* Force a CHECKPOINT. Aside from being necessary to prevent torn
* page problems, this guarantees that two successive backup runs will
* have different checkpoint positions and hence different history
* file names, even if nothing happened in between.
@@ -6303,10 +6304,9 @@ pg_stop_backup(PG_FUNCTION_ARGS)
LWLockRelease(WALInsertLock);
/*
* Force a switch to a new xlog segment file, so that the backup
* is valid as soon as archiver moves out the current segment file.
* We'll report the end address of the XLOG SWITCH record as the backup
* stopping point.
* Force a switch to a new xlog segment file, so that the backup is valid
* as soon as archiver moves out the current segment file. We'll report
* the end address of the XLOG SWITCH record as the backup stopping point.
*/
stoppoint = RequestXLogSwitch();
@@ -6392,9 +6392,9 @@ pg_stop_backup(PG_FUNCTION_ARGS)
BACKUP_LABEL_FILE)));
/*
* Clean out any no-longer-needed history files. As a side effect,
* this will post a .ready file for the newly created history file,
* notifying the archiver that history file may be archived immediately.
* Clean out any no-longer-needed history files. As a side effect, this
* will post a .ready file for the newly created history file, notifying
* the archiver that history file may be archived immediately.
*/
CleanupBackupHistory();
@@ -6415,7 +6415,7 @@ Datum
pg_switch_xlog(PG_FUNCTION_ARGS)
{
text *result;
XLogRecPtr switchpoint;
XLogRecPtr switchpoint;
char location[MAXFNAMELEN];
if (!superuser())
@@ -6514,17 +6514,17 @@ pg_xlogfile_name_offset(PG_FUNCTION_ARGS)
uint32 xrecoff;
XLogRecPtr locationpoint;
char xlogfilename[MAXFNAMELEN];
Datum values[2];
bool isnull[2];
TupleDesc resultTupleDesc;
HeapTuple resultHeapTuple;
Datum result;
Datum values[2];
bool isnull[2];
TupleDesc resultTupleDesc;
HeapTuple resultHeapTuple;
Datum result;
/*
* Read input and parse
*/
locationstr = DatumGetCString(DirectFunctionCall1(textout,
PointerGetDatum(location)));
PointerGetDatum(location)));
if (sscanf(locationstr, "%X/%X", &uxlogid, &uxrecoff) != 2)
ereport(ERROR,
@@ -6536,8 +6536,8 @@ pg_xlogfile_name_offset(PG_FUNCTION_ARGS)
locationpoint.xrecoff = uxrecoff;
/*
* Construct a tuple descriptor for the result row. This must match
* this function's pg_proc entry!
* Construct a tuple descriptor for the result row. This must match this
* function's pg_proc entry!
*/
resultTupleDesc = CreateTemplateTupleDesc(2, false);
TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name",
@@ -6593,7 +6593,7 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
char xlogfilename[MAXFNAMELEN];
locationstr = DatumGetCString(DirectFunctionCall1(textout,
PointerGetDatum(location)));
PointerGetDatum(location)));
if (sscanf(locationstr, "%X/%X", &uxlogid, &uxrecoff) != 2)
ereport(ERROR,
@@ -6608,7 +6608,7 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
XLogFileName(xlogfilename, ThisTimeLineID, xlogid, xlogseg);
result = DatumGetTextP(DirectFunctionCall1(textin,
CStringGetDatum(xlogfilename)));
CStringGetDatum(xlogfilename)));
PG_RETURN_TEXT_P(result);
}
@@ -6734,8 +6734,8 @@ read_backup_label(XLogRecPtr *checkPointLoc, XLogRecPtr *minRecoveryLoc)
static void
rm_redo_error_callback(void *arg)
{
XLogRecord *record = (XLogRecord *) arg;
StringInfoData buf;
XLogRecord *record = (XLogRecord *) arg;
StringInfoData buf;
initStringInfo(&buf);
RmgrTable[record->xl_rmid].rm_desc(&buf,

View File

@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.47 2006/07/14 14:52:17 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.48 2006/10/04 00:29:49 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -172,8 +172,8 @@ XLogCheckInvalidPages(void)
hash_seq_init(&status, invalid_page_tab);
/*
* Our strategy is to emit WARNING messages for all remaining entries
* and only PANIC after we've dumped all the available info.
* Our strategy is to emit WARNING messages for all remaining entries and
* only PANIC after we've dumped all the available info.
*/
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
{
@@ -255,7 +255,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
if (!init)
{
/* check that page has been initialized */
Page page = (Page) BufferGetPage(buffer);
Page page = (Page) BufferGetPage(buffer);
if (PageIsNew((PageHeader) page))
{