mirror of
https://github.com/postgres/postgres.git
synced 2025-11-10 17:42:29 +03:00
pgindent run for 8.2.
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.110 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.111 2006/10/04 00:29:47 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1294,7 +1294,7 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
|
||||
{
|
||||
if (tuple == NULL) /* internal error */
|
||||
elog(ERROR, "cannot extract system attribute from virtual tuple");
|
||||
if (slot->tts_mintuple) /* internal error */
|
||||
if (slot->tts_mintuple) /* internal error */
|
||||
elog(ERROR, "cannot extract system attribute from minimal tuple");
|
||||
return heap_getsysattr(tuple, attnum, tupleDesc, isnull);
|
||||
}
|
||||
@@ -1480,7 +1480,7 @@ slot_attisnull(TupleTableSlot *slot, int attnum)
|
||||
{
|
||||
if (tuple == NULL) /* internal error */
|
||||
elog(ERROR, "cannot extract system attribute from virtual tuple");
|
||||
if (slot->tts_mintuple) /* internal error */
|
||||
if (slot->tts_mintuple) /* internal error */
|
||||
elog(ERROR, "cannot extract system attribute from minimal tuple");
|
||||
return heap_attisnull(tuple, attnum);
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/printtup.c,v 1.98 2006/08/12 02:52:03 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/printtup.c,v 1.99 2006/10/04 00:29:47 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -127,8 +127,8 @@ printtup_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are supposed to emit row descriptions,
|
||||
* then send the tuple descriptor of the tuples.
|
||||
* If we are supposed to emit row descriptions, then send the tuple
|
||||
* descriptor of the tuples.
|
||||
*/
|
||||
if (myState->sendDescrip)
|
||||
SendRowDescriptionMessage(typeinfo,
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.1 2006/07/03 22:45:36 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.2 2006/10/04 00:29:47 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -59,7 +59,7 @@ transformRelOptions(Datum oldOptions, List *defList,
|
||||
/* Copy any oldOptions that aren't to be replaced */
|
||||
if (oldOptions != (Datum) 0)
|
||||
{
|
||||
ArrayType *array = DatumGetArrayTypeP(oldOptions);
|
||||
ArrayType *array = DatumGetArrayTypeP(oldOptions);
|
||||
Datum *oldoptions;
|
||||
int noldoptions;
|
||||
int i;
|
||||
@@ -71,15 +71,15 @@ transformRelOptions(Datum oldOptions, List *defList,
|
||||
|
||||
for (i = 0; i < noldoptions; i++)
|
||||
{
|
||||
text *oldoption = DatumGetTextP(oldoptions[i]);
|
||||
char *text_str = (char *) VARATT_DATA(oldoption);
|
||||
int text_len = VARATT_SIZE(oldoption) - VARHDRSZ;
|
||||
text *oldoption = DatumGetTextP(oldoptions[i]);
|
||||
char *text_str = (char *) VARATT_DATA(oldoption);
|
||||
int text_len = VARATT_SIZE(oldoption) - VARHDRSZ;
|
||||
|
||||
/* Search for a match in defList */
|
||||
foreach(cell, defList)
|
||||
{
|
||||
DefElem *def = lfirst(cell);
|
||||
int kw_len = strlen(def->defname);
|
||||
DefElem *def = lfirst(cell);
|
||||
int kw_len = strlen(def->defname);
|
||||
|
||||
if (text_len > kw_len && text_str[kw_len] == '=' &&
|
||||
pg_strncasecmp(text_str, def->defname, kw_len) == 0)
|
||||
@@ -96,33 +96,33 @@ transformRelOptions(Datum oldOptions, List *defList,
|
||||
}
|
||||
|
||||
/*
|
||||
* If CREATE/SET, add new options to array; if RESET, just check that
|
||||
* the user didn't say RESET (option=val). (Must do this because the
|
||||
* grammar doesn't enforce it.)
|
||||
* If CREATE/SET, add new options to array; if RESET, just check that the
|
||||
* user didn't say RESET (option=val). (Must do this because the grammar
|
||||
* doesn't enforce it.)
|
||||
*/
|
||||
foreach(cell, defList)
|
||||
{
|
||||
DefElem *def = lfirst(cell);
|
||||
DefElem *def = lfirst(cell);
|
||||
|
||||
if (isReset)
|
||||
{
|
||||
if (def->arg != NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("RESET must not include values for parameters")));
|
||||
errmsg("RESET must not include values for parameters")));
|
||||
}
|
||||
else
|
||||
{
|
||||
text *t;
|
||||
text *t;
|
||||
const char *value;
|
||||
Size len;
|
||||
Size len;
|
||||
|
||||
if (ignoreOids && pg_strcasecmp(def->defname, "oids") == 0)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Flatten the DefElem into a text string like "name=arg".
|
||||
* If we have just "name", assume "name=true" is meant.
|
||||
* Flatten the DefElem into a text string like "name=arg". If we
|
||||
* have just "name", assume "name=true" is meant.
|
||||
*/
|
||||
if (def->arg != NULL)
|
||||
value = defGetString(def);
|
||||
@@ -163,10 +163,10 @@ transformRelOptions(Datum oldOptions, List *defList,
|
||||
* containing the corresponding value, or NULL if the keyword does not appear.
|
||||
*/
|
||||
void
|
||||
parseRelOptions(Datum options, int numkeywords, const char * const *keywords,
|
||||
parseRelOptions(Datum options, int numkeywords, const char *const * keywords,
|
||||
char **values, bool validate)
|
||||
{
|
||||
ArrayType *array;
|
||||
ArrayType *array;
|
||||
Datum *optiondatums;
|
||||
int noptions;
|
||||
int i;
|
||||
@@ -187,21 +187,21 @@ parseRelOptions(Datum options, int numkeywords, const char * const *keywords,
|
||||
|
||||
for (i = 0; i < noptions; i++)
|
||||
{
|
||||
text *optiontext = DatumGetTextP(optiondatums[i]);
|
||||
char *text_str = (char *) VARATT_DATA(optiontext);
|
||||
int text_len = VARATT_SIZE(optiontext) - VARHDRSZ;
|
||||
int j;
|
||||
text *optiontext = DatumGetTextP(optiondatums[i]);
|
||||
char *text_str = (char *) VARATT_DATA(optiontext);
|
||||
int text_len = VARATT_SIZE(optiontext) - VARHDRSZ;
|
||||
int j;
|
||||
|
||||
/* Search for a match in keywords */
|
||||
for (j = 0; j < numkeywords; j++)
|
||||
{
|
||||
int kw_len = strlen(keywords[j]);
|
||||
int kw_len = strlen(keywords[j]);
|
||||
|
||||
if (text_len > kw_len && text_str[kw_len] == '=' &&
|
||||
pg_strncasecmp(text_str, keywords[j], kw_len) == 0)
|
||||
{
|
||||
char *value;
|
||||
int value_len;
|
||||
char *value;
|
||||
int value_len;
|
||||
|
||||
if (values[j] && validate)
|
||||
ereport(ERROR,
|
||||
@@ -218,8 +218,8 @@ parseRelOptions(Datum options, int numkeywords, const char * const *keywords,
|
||||
}
|
||||
if (j >= numkeywords && validate)
|
||||
{
|
||||
char *s;
|
||||
char *p;
|
||||
char *s;
|
||||
char *p;
|
||||
|
||||
s = DatumGetCString(DirectFunctionCall1(textout, optiondatums[i]));
|
||||
p = strchr(s, '=');
|
||||
@@ -240,17 +240,17 @@ bytea *
|
||||
default_reloptions(Datum reloptions, bool validate,
|
||||
int minFillfactor, int defaultFillfactor)
|
||||
{
|
||||
static const char * const default_keywords[1] = { "fillfactor" };
|
||||
char *values[1];
|
||||
int32 fillfactor;
|
||||
static const char *const default_keywords[1] = {"fillfactor"};
|
||||
char *values[1];
|
||||
int32 fillfactor;
|
||||
StdRdOptions *result;
|
||||
|
||||
parseRelOptions(reloptions, 1, default_keywords, values, validate);
|
||||
|
||||
/*
|
||||
* If no options, we can just return NULL rather than doing anything.
|
||||
* (defaultFillfactor is thus not used, but we require callers to pass
|
||||
* it anyway since we would need it if more options were added.)
|
||||
* (defaultFillfactor is thus not used, but we require callers to pass it
|
||||
* anyway since we would need it if more options were added.)
|
||||
*/
|
||||
if (values[0] == NULL)
|
||||
return NULL;
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginarrayproc.c
|
||||
* support functions for GIN's indexing of any array
|
||||
* support functions for GIN's indexing of any array
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.5 2006/09/10 20:14:20 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.6 2006/10/04 00:29:47 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
@@ -23,64 +23,73 @@
|
||||
#define GinContainedStrategy 3
|
||||
#define GinEqualStrategy 4
|
||||
|
||||
#define ARRAYCHECK(x) do { \
|
||||
#define ARRAYCHECK(x) do { \
|
||||
if ( ARR_HASNULL(x) ) \
|
||||
ereport(ERROR, \
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
|
||||
errmsg("array must not contain nulls"))); \
|
||||
} while(0)
|
||||
ereport(ERROR, \
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
|
||||
errmsg("array must not contain nulls"))); \
|
||||
} while(0)
|
||||
|
||||
|
||||
/*
|
||||
* Function used as extractValue and extractQuery both
|
||||
*/
|
||||
Datum
|
||||
ginarrayextract(PG_FUNCTION_ARGS) {
|
||||
ArrayType *array;
|
||||
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
|
||||
Datum *entries = NULL;
|
||||
int16 elmlen;
|
||||
bool elmbyval;
|
||||
char elmalign;
|
||||
ginarrayextract(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *array;
|
||||
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
|
||||
Datum *entries = NULL;
|
||||
int16 elmlen;
|
||||
bool elmbyval;
|
||||
char elmalign;
|
||||
|
||||
/* we should guarantee that array will not be destroyed during all operation */
|
||||
/*
|
||||
* we should guarantee that array will not be destroyed during all
|
||||
* operation
|
||||
*/
|
||||
array = PG_GETARG_ARRAYTYPE_P_COPY(0);
|
||||
|
||||
ARRAYCHECK(array);
|
||||
|
||||
get_typlenbyvalalign(ARR_ELEMTYPE(array),
|
||||
&elmlen, &elmbyval, &elmalign);
|
||||
&elmlen, &elmbyval, &elmalign);
|
||||
|
||||
deconstruct_array(array,
|
||||
ARR_ELEMTYPE(array),
|
||||
elmlen, elmbyval, elmalign,
|
||||
&entries, NULL, (int*)nentries);
|
||||
ARR_ELEMTYPE(array),
|
||||
elmlen, elmbyval, elmalign,
|
||||
&entries, NULL, (int *) nentries);
|
||||
|
||||
/* we should not free array, entries[i] points into it */
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
Datum
|
||||
ginarrayconsistent(PG_FUNCTION_ARGS) {
|
||||
bool *check = (bool*)PG_GETARG_POINTER(0);
|
||||
StrategyNumber strategy = PG_GETARG_UINT16(1);
|
||||
ArrayType *query = PG_GETARG_ARRAYTYPE_P(2);
|
||||
int res, i, nentries;
|
||||
ginarrayconsistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bool *check = (bool *) PG_GETARG_POINTER(0);
|
||||
StrategyNumber strategy = PG_GETARG_UINT16(1);
|
||||
ArrayType *query = PG_GETARG_ARRAYTYPE_P(2);
|
||||
int res,
|
||||
i,
|
||||
nentries;
|
||||
|
||||
/* ARRAYCHECK was already done by previous ginarrayextract call */
|
||||
|
||||
switch( strategy ) {
|
||||
switch (strategy)
|
||||
{
|
||||
case GinOverlapStrategy:
|
||||
case GinContainedStrategy:
|
||||
/* at least one element in check[] is true, so result = true */
|
||||
/* at least one element in check[] is true, so result = true */
|
||||
res = TRUE;
|
||||
break;
|
||||
case GinContainsStrategy:
|
||||
case GinEqualStrategy:
|
||||
nentries=ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query));
|
||||
nentries = ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query));
|
||||
res = TRUE;
|
||||
for(i=0;i<nentries;i++)
|
||||
if ( !check[i] ) {
|
||||
for (i = 0; i < nentries; i++)
|
||||
if (!check[i])
|
||||
{
|
||||
res = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginbtree.c
|
||||
* page utilities routines for the postgres inverted index access method.
|
||||
* page utilities routines for the postgres inverted index access method.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.4 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.5 2006/10/04 00:29:47 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -20,24 +20,29 @@
|
||||
* Locks buffer by needed method for search.
|
||||
*/
|
||||
static int
|
||||
ginTraverseLock(Buffer buffer, bool searchMode) {
|
||||
Page page;
|
||||
int access=GIN_SHARE;
|
||||
ginTraverseLock(Buffer buffer, bool searchMode)
|
||||
{
|
||||
Page page;
|
||||
int access = GIN_SHARE;
|
||||
|
||||
LockBuffer(buffer, GIN_SHARE);
|
||||
page = BufferGetPage( buffer );
|
||||
if ( GinPageIsLeaf(page) ) {
|
||||
if ( searchMode == FALSE ) {
|
||||
page = BufferGetPage(buffer);
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
if (searchMode == FALSE)
|
||||
{
|
||||
/* we should relock our page */
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
|
||||
/* But root can become non-leaf during relock */
|
||||
if ( !GinPageIsLeaf(page) ) {
|
||||
/* resore old lock type (very rare) */
|
||||
if (!GinPageIsLeaf(page))
|
||||
{
|
||||
/* resore old lock type (very rare) */
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
LockBuffer(buffer, GIN_SHARE);
|
||||
} else
|
||||
}
|
||||
else
|
||||
access = GIN_EXCLUSIVE;
|
||||
}
|
||||
}
|
||||
@@ -45,9 +50,10 @@ ginTraverseLock(Buffer buffer, bool searchMode) {
|
||||
return access;
|
||||
}
|
||||
|
||||
GinBtreeStack*
|
||||
ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno) {
|
||||
GinBtreeStack *stack = (GinBtreeStack*)palloc(sizeof(GinBtreeStack));
|
||||
GinBtreeStack *
|
||||
ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno)
|
||||
{
|
||||
GinBtreeStack *stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
||||
|
||||
stack->blkno = blkno;
|
||||
stack->buffer = ReadBuffer(btree->index, stack->blkno);
|
||||
@@ -62,63 +68,73 @@ ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno) {
|
||||
/*
|
||||
* Locates leaf page contained tuple
|
||||
*/
|
||||
GinBtreeStack*
|
||||
ginFindLeafPage(GinBtree btree, GinBtreeStack *stack) {
|
||||
bool isfirst=TRUE;
|
||||
GinBtreeStack *
|
||||
ginFindLeafPage(GinBtree btree, GinBtreeStack *stack)
|
||||
{
|
||||
bool isfirst = TRUE;
|
||||
BlockNumber rootBlkno;
|
||||
|
||||
if ( !stack )
|
||||
if (!stack)
|
||||
stack = ginPrepareFindLeafPage(btree, GIN_ROOT_BLKNO);
|
||||
rootBlkno = stack->blkno;
|
||||
|
||||
for(;;) {
|
||||
Page page;
|
||||
for (;;)
|
||||
{
|
||||
Page page;
|
||||
BlockNumber child;
|
||||
int access=GIN_SHARE;
|
||||
int access = GIN_SHARE;
|
||||
|
||||
stack->off = InvalidOffsetNumber;
|
||||
|
||||
page = BufferGetPage( stack->buffer );
|
||||
|
||||
if ( isfirst ) {
|
||||
if ( GinPageIsLeaf(page) && !btree->searchMode )
|
||||
page = BufferGetPage(stack->buffer);
|
||||
|
||||
if (isfirst)
|
||||
{
|
||||
if (GinPageIsLeaf(page) && !btree->searchMode)
|
||||
access = GIN_EXCLUSIVE;
|
||||
isfirst = FALSE;
|
||||
} else
|
||||
}
|
||||
else
|
||||
access = ginTraverseLock(stack->buffer, btree->searchMode);
|
||||
|
||||
/* ok, page is correctly locked, we should check to move right ..,
|
||||
root never has a right link, so small optimization */
|
||||
while( btree->fullScan==FALSE && stack->blkno != rootBlkno && btree->isMoveRight(btree, page) ) {
|
||||
/*
|
||||
* ok, page is correctly locked, we should check to move right ..,
|
||||
* root never has a right link, so small optimization
|
||||
*/
|
||||
while (btree->fullScan == FALSE && stack->blkno != rootBlkno && btree->isMoveRight(btree, page))
|
||||
{
|
||||
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
if ( rightlink==InvalidBlockNumber )
|
||||
if (rightlink == InvalidBlockNumber)
|
||||
/* rightmost page */
|
||||
break;
|
||||
|
||||
stack->blkno = rightlink;
|
||||
LockBuffer(stack->buffer, GIN_UNLOCK);
|
||||
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
|
||||
LockBuffer(stack->buffer, access);
|
||||
page = BufferGetPage( stack->buffer );
|
||||
LockBuffer(stack->buffer, access);
|
||||
page = BufferGetPage(stack->buffer);
|
||||
}
|
||||
|
||||
if ( GinPageIsLeaf(page) ) /* we found, return locked page */
|
||||
if (GinPageIsLeaf(page)) /* we found, return locked page */
|
||||
return stack;
|
||||
|
||||
/* now we have correct buffer, try to find child */
|
||||
child = btree->findChildPage(btree, stack);
|
||||
|
||||
LockBuffer(stack->buffer, GIN_UNLOCK);
|
||||
Assert( child != InvalidBlockNumber );
|
||||
Assert( stack->blkno != child );
|
||||
Assert(child != InvalidBlockNumber);
|
||||
Assert(stack->blkno != child);
|
||||
|
||||
if ( btree->searchMode ) {
|
||||
if (btree->searchMode)
|
||||
{
|
||||
/* in search mode we may forget path to leaf */
|
||||
stack->blkno = child;
|
||||
stack->buffer = ReleaseAndReadBuffer( stack->buffer, btree->index, stack->blkno );
|
||||
} else {
|
||||
GinBtreeStack *ptr = (GinBtreeStack*)palloc(sizeof(GinBtreeStack));
|
||||
stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
|
||||
}
|
||||
else
|
||||
{
|
||||
GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
||||
|
||||
ptr->parent = stack;
|
||||
stack = ptr;
|
||||
@@ -133,93 +149,110 @@ ginFindLeafPage(GinBtree btree, GinBtreeStack *stack) {
|
||||
}
|
||||
|
||||
void
|
||||
freeGinBtreeStack( GinBtreeStack *stack ) {
|
||||
while(stack) {
|
||||
GinBtreeStack *tmp = stack->parent;
|
||||
if ( stack->buffer != InvalidBuffer )
|
||||
freeGinBtreeStack(GinBtreeStack *stack)
|
||||
{
|
||||
while (stack)
|
||||
{
|
||||
GinBtreeStack *tmp = stack->parent;
|
||||
|
||||
if (stack->buffer != InvalidBuffer)
|
||||
ReleaseBuffer(stack->buffer);
|
||||
|
||||
pfree( stack );
|
||||
pfree(stack);
|
||||
stack = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to find parent for current stack position, returns correct
|
||||
* Try to find parent for current stack position, returns correct
|
||||
* parent and child's offset in stack->parent.
|
||||
* Function should never release root page to prevent conflicts
|
||||
* with vacuum process
|
||||
*/
|
||||
void
|
||||
findParents( GinBtree btree, GinBtreeStack *stack,
|
||||
BlockNumber rootBlkno) {
|
||||
findParents(GinBtree btree, GinBtreeStack *stack,
|
||||
BlockNumber rootBlkno)
|
||||
{
|
||||
|
||||
Page page;
|
||||
Buffer buffer;
|
||||
BlockNumber blkno, leftmostBlkno;
|
||||
Page page;
|
||||
Buffer buffer;
|
||||
BlockNumber blkno,
|
||||
leftmostBlkno;
|
||||
OffsetNumber offset;
|
||||
GinBtreeStack *root = stack->parent;
|
||||
GinBtreeStack *ptr;
|
||||
GinBtreeStack *root = stack->parent;
|
||||
GinBtreeStack *ptr;
|
||||
|
||||
if ( !root ) {
|
||||
if (!root)
|
||||
{
|
||||
/* XLog mode... */
|
||||
root = (GinBtreeStack*)palloc(sizeof(GinBtreeStack));
|
||||
root = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
||||
root->blkno = rootBlkno;
|
||||
root->buffer = ReadBuffer(btree->index, rootBlkno);
|
||||
LockBuffer(root->buffer, GIN_EXCLUSIVE);
|
||||
root->parent = NULL;
|
||||
} else {
|
||||
/* find root, we should not release root page until update is finished!! */
|
||||
while( root->parent ) {
|
||||
ReleaseBuffer( root->buffer );
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* find root, we should not release root page until update is
|
||||
* finished!!
|
||||
*/
|
||||
while (root->parent)
|
||||
{
|
||||
ReleaseBuffer(root->buffer);
|
||||
root = root->parent;
|
||||
}
|
||||
|
||||
Assert( root->blkno == rootBlkno );
|
||||
Assert( BufferGetBlockNumber(root->buffer) == rootBlkno );
|
||||
Assert(root->blkno == rootBlkno);
|
||||
Assert(BufferGetBlockNumber(root->buffer) == rootBlkno);
|
||||
LockBuffer(root->buffer, GIN_EXCLUSIVE);
|
||||
}
|
||||
root->off = InvalidOffsetNumber;
|
||||
|
||||
page = BufferGetPage(root->buffer);
|
||||
Assert( !GinPageIsLeaf(page) );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
|
||||
/* check trivial case */
|
||||
if ( (root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber ) {
|
||||
if ((root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber)
|
||||
{
|
||||
stack->parent = root;
|
||||
return;
|
||||
}
|
||||
|
||||
leftmostBlkno = blkno = btree->getLeftMostPage(btree, page);
|
||||
LockBuffer(root->buffer, GIN_UNLOCK );
|
||||
Assert( blkno!=InvalidBlockNumber );
|
||||
LockBuffer(root->buffer, GIN_UNLOCK);
|
||||
Assert(blkno != InvalidBlockNumber);
|
||||
|
||||
|
||||
for(;;) {
|
||||
for (;;)
|
||||
{
|
||||
buffer = ReadBuffer(btree->index, blkno);
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
page = BufferGetPage(buffer);
|
||||
if ( GinPageIsLeaf(page) )
|
||||
if (GinPageIsLeaf(page))
|
||||
elog(ERROR, "Lost path");
|
||||
|
||||
leftmostBlkno = btree->getLeftMostPage(btree, page);
|
||||
|
||||
while( (offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber))==InvalidOffsetNumber ) {
|
||||
while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
|
||||
{
|
||||
blkno = GinPageGetOpaque(page)->rightlink;
|
||||
LockBuffer(buffer,GIN_UNLOCK);
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
ReleaseBuffer(buffer);
|
||||
if ( blkno == InvalidBlockNumber )
|
||||
if (blkno == InvalidBlockNumber)
|
||||
break;
|
||||
buffer = ReadBuffer(btree->index, blkno);
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
page = BufferGetPage(buffer);
|
||||
}
|
||||
|
||||
if ( blkno != InvalidBlockNumber ) {
|
||||
ptr = (GinBtreeStack*)palloc(sizeof(GinBtreeStack));
|
||||
if (blkno != InvalidBlockNumber)
|
||||
{
|
||||
ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
|
||||
ptr->blkno = blkno;
|
||||
ptr->buffer = buffer;
|
||||
ptr->parent = root; /* it's may be wrong, but in next call we will correct */
|
||||
ptr->parent = root; /* it's may be wrong, but in next call we will
|
||||
* correct */
|
||||
ptr->off = offset;
|
||||
stack->parent = ptr;
|
||||
return;
|
||||
@@ -233,79 +266,94 @@ findParents( GinBtree btree, GinBtreeStack *stack,
|
||||
* Insert value (stored in GinBtree) to tree descibed by stack
|
||||
*/
|
||||
void
|
||||
ginInsertValue(GinBtree btree, GinBtreeStack *stack) {
|
||||
GinBtreeStack *parent = stack;
|
||||
BlockNumber rootBlkno = InvalidBuffer;
|
||||
Page page, rpage, lpage;
|
||||
ginInsertValue(GinBtree btree, GinBtreeStack *stack)
|
||||
{
|
||||
GinBtreeStack *parent = stack;
|
||||
BlockNumber rootBlkno = InvalidBuffer;
|
||||
Page page,
|
||||
rpage,
|
||||
lpage;
|
||||
|
||||
/* remember root BlockNumber */
|
||||
while( parent ) {
|
||||
while (parent)
|
||||
{
|
||||
rootBlkno = parent->blkno;
|
||||
parent = parent->parent;
|
||||
}
|
||||
|
||||
while( stack ) {
|
||||
while (stack)
|
||||
{
|
||||
XLogRecData *rdata;
|
||||
BlockNumber savedRightLink;
|
||||
BlockNumber savedRightLink;
|
||||
|
||||
page = BufferGetPage( stack->buffer );
|
||||
page = BufferGetPage(stack->buffer);
|
||||
savedRightLink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
if ( btree->isEnoughSpace( btree, stack->buffer, stack->off ) ) {
|
||||
if (btree->isEnoughSpace(btree, stack->buffer, stack->off))
|
||||
{
|
||||
START_CRIT_SECTION();
|
||||
btree->placeToPage( btree, stack->buffer, stack->off, &rdata );
|
||||
btree->placeToPage(btree, stack->buffer, stack->off, &rdata);
|
||||
|
||||
if (!btree->index->rd_istemp) {
|
||||
XLogRecPtr recptr;
|
||||
if (!btree->index->rd_istemp)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
|
||||
PageSetLSN(page, recptr);
|
||||
PageSetTLI(page, ThisTimeLineID);
|
||||
}
|
||||
}
|
||||
|
||||
MarkBufferDirty( stack->buffer );
|
||||
MarkBufferDirty(stack->buffer);
|
||||
UnlockReleaseBuffer(stack->buffer);
|
||||
END_CRIT_SECTION();
|
||||
|
||||
freeGinBtreeStack(stack->parent);
|
||||
return;
|
||||
} else {
|
||||
Buffer rbuffer = GinNewBuffer(btree->index);
|
||||
Page newlpage;
|
||||
}
|
||||
else
|
||||
{
|
||||
Buffer rbuffer = GinNewBuffer(btree->index);
|
||||
Page newlpage;
|
||||
|
||||
/* newlpage is a pointer to memory page, it does'nt assosiates with buffer,
|
||||
stack->buffer shoud be untouched */
|
||||
newlpage = btree->splitPage( btree, stack->buffer, rbuffer, stack->off, &rdata );
|
||||
/*
|
||||
* newlpage is a pointer to memory page, it does'nt assosiates
|
||||
* with buffer, stack->buffer shoud be untouched
|
||||
*/
|
||||
newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
|
||||
|
||||
|
||||
((ginxlogSplit*)(rdata->data))->rootBlkno = rootBlkno;
|
||||
((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
|
||||
|
||||
parent = stack->parent;
|
||||
|
||||
if ( parent == NULL ) {
|
||||
/* split root, so we need to allocate new left page and
|
||||
place pointer on root to left and right page */
|
||||
Buffer lbuffer = GinNewBuffer(btree->index);
|
||||
if (parent == NULL)
|
||||
{
|
||||
/*
|
||||
* split root, so we need to allocate new left page and place
|
||||
* pointer on root to left and right page
|
||||
*/
|
||||
Buffer lbuffer = GinNewBuffer(btree->index);
|
||||
|
||||
((ginxlogSplit*)(rdata->data))->isRootSplit = TRUE;
|
||||
((ginxlogSplit*)(rdata->data))->rrlink = InvalidBlockNumber;
|
||||
((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
|
||||
((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;
|
||||
|
||||
|
||||
page = BufferGetPage( stack->buffer );
|
||||
lpage = BufferGetPage( lbuffer );
|
||||
rpage = BufferGetPage( rbuffer );
|
||||
page = BufferGetPage(stack->buffer);
|
||||
lpage = BufferGetPage(lbuffer);
|
||||
rpage = BufferGetPage(rbuffer);
|
||||
|
||||
GinPageGetOpaque(rpage)->rightlink = InvalidBlockNumber;
|
||||
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
|
||||
((ginxlogSplit*)(rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);
|
||||
((ginxlogSplit *) (rdata->data))->lblkno = BufferGetBlockNumber(lbuffer);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
GinInitBuffer( stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF );
|
||||
PageRestoreTempPage( newlpage, lpage );
|
||||
btree->fillRoot( btree, stack->buffer, lbuffer, rbuffer );
|
||||
if (!btree->index->rd_istemp) {
|
||||
XLogRecPtr recptr;
|
||||
GinInitBuffer(stack->buffer, GinPageGetOpaque(newlpage)->flags & ~GIN_LEAF);
|
||||
PageRestoreTempPage(newlpage, lpage);
|
||||
btree->fillRoot(btree, stack->buffer, lbuffer, rbuffer);
|
||||
if (!btree->index->rd_istemp)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
|
||||
PageSetLSN(page, recptr);
|
||||
@@ -324,23 +372,26 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) {
|
||||
UnlockReleaseBuffer(stack->buffer);
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
return;
|
||||
} else {
|
||||
/* split non-root page */
|
||||
((ginxlogSplit*)(rdata->data))->isRootSplit = FALSE;
|
||||
((ginxlogSplit*)(rdata->data))->rrlink = savedRightLink;
|
||||
|
||||
lpage = BufferGetPage( stack->buffer );
|
||||
rpage = BufferGetPage( rbuffer );
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* split non-root page */
|
||||
((ginxlogSplit *) (rdata->data))->isRootSplit = FALSE;
|
||||
((ginxlogSplit *) (rdata->data))->rrlink = savedRightLink;
|
||||
|
||||
lpage = BufferGetPage(stack->buffer);
|
||||
rpage = BufferGetPage(rbuffer);
|
||||
|
||||
GinPageGetOpaque(rpage)->rightlink = savedRightLink;
|
||||
GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
PageRestoreTempPage( newlpage, lpage );
|
||||
if (!btree->index->rd_istemp) {
|
||||
XLogRecPtr recptr;
|
||||
PageRestoreTempPage(newlpage, lpage);
|
||||
if (!btree->index->rd_istemp)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
|
||||
PageSetLSN(lpage, recptr);
|
||||
@@ -350,7 +401,7 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) {
|
||||
}
|
||||
MarkBufferDirty(rbuffer);
|
||||
UnlockReleaseBuffer(rbuffer);
|
||||
MarkBufferDirty( stack->buffer );
|
||||
MarkBufferDirty(stack->buffer);
|
||||
END_CRIT_SECTION();
|
||||
}
|
||||
}
|
||||
@@ -361,31 +412,33 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack) {
|
||||
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
|
||||
|
||||
/* move right if it's needed */
|
||||
page = BufferGetPage( parent->buffer );
|
||||
while( (parent->off=btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber ) {
|
||||
page = BufferGetPage(parent->buffer);
|
||||
while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
|
||||
{
|
||||
BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
LockBuffer(parent->buffer, GIN_UNLOCK);
|
||||
|
||||
if ( rightlink==InvalidBlockNumber ) {
|
||||
/* rightmost page, but we don't find parent, we should
|
||||
use plain search... */
|
||||
if (rightlink == InvalidBlockNumber)
|
||||
{
|
||||
/*
|
||||
* rightmost page, but we don't find parent, we should use
|
||||
* plain search...
|
||||
*/
|
||||
findParents(btree, stack, rootBlkno);
|
||||
parent=stack->parent;
|
||||
page = BufferGetPage( parent->buffer );
|
||||
parent = stack->parent;
|
||||
page = BufferGetPage(parent->buffer);
|
||||
break;
|
||||
}
|
||||
|
||||
parent->blkno = rightlink;
|
||||
parent->buffer = ReleaseAndReadBuffer(parent->buffer, btree->index, parent->blkno);
|
||||
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
|
||||
page = BufferGetPage( parent->buffer );
|
||||
LockBuffer(parent->buffer, GIN_EXCLUSIVE);
|
||||
page = BufferGetPage(parent->buffer);
|
||||
}
|
||||
|
||||
UnlockReleaseBuffer(stack->buffer);
|
||||
pfree( stack );
|
||||
pfree(stack);
|
||||
stack = parent;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginbulk.c
|
||||
* routines for fast build of inverted index
|
||||
* routines for fast build of inverted index
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.5 2006/08/29 14:05:44 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.6 2006/10/04 00:29:47 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -22,7 +22,8 @@
|
||||
#define DEF_NPTR 4
|
||||
|
||||
void
|
||||
ginInitBA(BuildAccumulator *accum) {
|
||||
ginInitBA(BuildAccumulator *accum)
|
||||
{
|
||||
accum->maxdepth = 1;
|
||||
accum->stackpos = 0;
|
||||
accum->entries = NULL;
|
||||
@@ -31,11 +32,13 @@ ginInitBA(BuildAccumulator *accum) {
|
||||
accum->entryallocator = NULL;
|
||||
}
|
||||
|
||||
static EntryAccumulator*
|
||||
EAAllocate( BuildAccumulator *accum ) {
|
||||
if ( accum->entryallocator == NULL || accum->length>=DEF_NENTRY ) {
|
||||
accum->entryallocator = palloc(sizeof(EntryAccumulator)*DEF_NENTRY);
|
||||
accum->allocatedMemory += sizeof(EntryAccumulator)*DEF_NENTRY;
|
||||
static EntryAccumulator *
|
||||
EAAllocate(BuildAccumulator *accum)
|
||||
{
|
||||
if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY)
|
||||
{
|
||||
accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY);
|
||||
accum->allocatedMemory += sizeof(EntryAccumulator) * DEF_NENTRY;
|
||||
accum->length = 0;
|
||||
}
|
||||
|
||||
@@ -48,24 +51,27 @@ EAAllocate( BuildAccumulator *accum ) {
|
||||
* item pointer are ordered
|
||||
*/
|
||||
static void
|
||||
ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heapptr) {
|
||||
if ( entry->number >= entry->length ) {
|
||||
ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heapptr)
|
||||
{
|
||||
if (entry->number >= entry->length)
|
||||
{
|
||||
accum->allocatedMemory += sizeof(ItemPointerData) * entry->length;
|
||||
entry->length *= 2;
|
||||
entry->list = (ItemPointerData*)repalloc(entry->list,
|
||||
sizeof(ItemPointerData)*entry->length);
|
||||
entry->list = (ItemPointerData *) repalloc(entry->list,
|
||||
sizeof(ItemPointerData) * entry->length);
|
||||
}
|
||||
|
||||
if ( entry->shouldSort==FALSE ) {
|
||||
int res = compareItemPointers( entry->list + entry->number - 1, heapptr );
|
||||
if (entry->shouldSort == FALSE)
|
||||
{
|
||||
int res = compareItemPointers(entry->list + entry->number - 1, heapptr);
|
||||
|
||||
Assert( res != 0 );
|
||||
Assert(res != 0);
|
||||
|
||||
if ( res > 0 )
|
||||
entry->shouldSort=TRUE;
|
||||
if (res > 0)
|
||||
entry->shouldSort = TRUE;
|
||||
}
|
||||
|
||||
entry->list[ entry->number ] = *heapptr;
|
||||
entry->list[entry->number] = *heapptr;
|
||||
entry->number++;
|
||||
}
|
||||
|
||||
@@ -74,7 +80,8 @@ ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heap
|
||||
* to avoid computing the datum size twice.
|
||||
*/
|
||||
static Datum
|
||||
getDatumCopy(BuildAccumulator *accum, Datum value) {
|
||||
getDatumCopy(BuildAccumulator *accum, Datum value)
|
||||
{
|
||||
Form_pg_attribute *att = accum->ginstate->tupdesc->attrs;
|
||||
Datum res;
|
||||
|
||||
@@ -100,51 +107,58 @@ getDatumCopy(BuildAccumulator *accum, Datum value) {
|
||||
* Find/store one entry from indexed value.
|
||||
*/
|
||||
static void
|
||||
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry) {
|
||||
EntryAccumulator *ea = accum->entries, *pea = NULL;
|
||||
int res = 0;
|
||||
uint32 depth = 1;
|
||||
ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry)
|
||||
{
|
||||
EntryAccumulator *ea = accum->entries,
|
||||
*pea = NULL;
|
||||
int res = 0;
|
||||
uint32 depth = 1;
|
||||
|
||||
while( ea ) {
|
||||
while (ea)
|
||||
{
|
||||
res = compareEntries(accum->ginstate, entry, ea->value);
|
||||
if ( res == 0 )
|
||||
break; /* found */
|
||||
else {
|
||||
if (res == 0)
|
||||
break; /* found */
|
||||
else
|
||||
{
|
||||
pea = ea;
|
||||
if ( res < 0 )
|
||||
if (res < 0)
|
||||
ea = ea->left;
|
||||
else
|
||||
ea = ea->right;
|
||||
}
|
||||
depth++;
|
||||
}
|
||||
|
||||
if ( depth > accum->maxdepth )
|
||||
|
||||
if (depth > accum->maxdepth)
|
||||
accum->maxdepth = depth;
|
||||
|
||||
if ( ea == NULL ) {
|
||||
if (ea == NULL)
|
||||
{
|
||||
ea = EAAllocate(accum);
|
||||
|
||||
ea->left = ea->right = NULL;
|
||||
ea->value = getDatumCopy(accum, entry);
|
||||
ea->value = getDatumCopy(accum, entry);
|
||||
ea->length = DEF_NPTR;
|
||||
ea->number = 1;
|
||||
ea->shouldSort = FALSE;
|
||||
ea->list = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
|
||||
ea->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
|
||||
ea->list[0] = *heapptr;
|
||||
accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
|
||||
accum->allocatedMemory += sizeof(ItemPointerData) * DEF_NPTR;
|
||||
|
||||
if ( pea == NULL )
|
||||
if (pea == NULL)
|
||||
accum->entries = ea;
|
||||
else {
|
||||
Assert( res != 0 );
|
||||
if ( res < 0 )
|
||||
else
|
||||
{
|
||||
Assert(res != 0);
|
||||
if (res < 0)
|
||||
pea->left = ea;
|
||||
else
|
||||
pea->right = ea;
|
||||
}
|
||||
} else
|
||||
ginInsertData( accum, ea, heapptr );
|
||||
}
|
||||
else
|
||||
ginInsertData(accum, ea, heapptr);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -152,22 +166,23 @@ ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, Datum entry) {
|
||||
* then calls itself for each parts
|
||||
*/
|
||||
static void
|
||||
ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry,
|
||||
uint32 low, uint32 high, uint32 offset) {
|
||||
uint32 pos;
|
||||
uint32 middle = (low+high)>>1;
|
||||
ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry,
|
||||
uint32 low, uint32 high, uint32 offset)
|
||||
{
|
||||
uint32 pos;
|
||||
uint32 middle = (low + high) >> 1;
|
||||
|
||||
pos = (low+middle)>>1;
|
||||
if ( low!=middle && pos>=offset && pos-offset < nentry )
|
||||
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
|
||||
pos = (high+middle+1)>>1;
|
||||
if ( middle+1 != high && pos>=offset && pos-offset < nentry )
|
||||
ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
|
||||
pos = (low + middle) >> 1;
|
||||
if (low != middle && pos >= offset && pos - offset < nentry)
|
||||
ginInsertEntry(accum, heapptr, entries[pos - offset]);
|
||||
pos = (high + middle + 1) >> 1;
|
||||
if (middle + 1 != high && pos >= offset && pos - offset < nentry)
|
||||
ginInsertEntry(accum, heapptr, entries[pos - offset]);
|
||||
|
||||
if ( low!=middle )
|
||||
ginChooseElem(accum, heapptr, entries, nentry, low, middle, offset );
|
||||
if ( high!=middle+1 )
|
||||
ginChooseElem(accum, heapptr, entries, nentry, middle+1, high, offset );
|
||||
if (low != middle)
|
||||
ginChooseElem(accum, heapptr, entries, nentry, low, middle, offset);
|
||||
if (high != middle + 1)
|
||||
ginChooseElem(accum, heapptr, entries, nentry, middle + 1, high, offset);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -176,56 +191,71 @@ ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint
|
||||
* next middle on left part and middle of right part.
|
||||
*/
|
||||
void
|
||||
ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) {
|
||||
uint32 i, nbit=0, offset;
|
||||
ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry)
|
||||
{
|
||||
uint32 i,
|
||||
nbit = 0,
|
||||
offset;
|
||||
|
||||
if (nentry==0)
|
||||
if (nentry == 0)
|
||||
return;
|
||||
|
||||
i=nentry-1;
|
||||
for(;i>0;i>>=1) nbit++;
|
||||
i = nentry - 1;
|
||||
for (; i > 0; i >>= 1)
|
||||
nbit++;
|
||||
|
||||
nbit = 1<<nbit;
|
||||
offset = (nbit-nentry)/2;
|
||||
nbit = 1 << nbit;
|
||||
offset = (nbit - nentry) / 2;
|
||||
|
||||
ginInsertEntry( accum, heapptr, entries[ (nbit>>1)-offset ]);
|
||||
ginInsertEntry(accum, heapptr, entries[(nbit >> 1) - offset]);
|
||||
ginChooseElem(accum, heapptr, entries, nentry, 0, nbit, offset);
|
||||
}
|
||||
|
||||
static int
|
||||
qsortCompareItemPointers( const void *a, const void *b ) {
|
||||
int res = compareItemPointers( (ItemPointer)a, (ItemPointer)b );
|
||||
Assert( res!=0 );
|
||||
static int
|
||||
qsortCompareItemPointers(const void *a, const void *b)
|
||||
{
|
||||
int res = compareItemPointers((ItemPointer) a, (ItemPointer) b);
|
||||
|
||||
Assert(res != 0);
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* walk on binary tree and returns ordered nodes
|
||||
*/
|
||||
static EntryAccumulator*
|
||||
walkTree( BuildAccumulator *accum ) {
|
||||
EntryAccumulator *entry = accum->stack[ accum->stackpos ];
|
||||
* walk on binary tree and returns ordered nodes
|
||||
*/
|
||||
static EntryAccumulator *
|
||||
walkTree(BuildAccumulator *accum)
|
||||
{
|
||||
EntryAccumulator *entry = accum->stack[accum->stackpos];
|
||||
|
||||
if ( entry->list != NULL ) {
|
||||
if (entry->list != NULL)
|
||||
{
|
||||
/* return entry itself: we already was at left sublink */
|
||||
return entry;
|
||||
} else if ( entry->right && entry->right != accum->stack[ accum->stackpos+1 ] ) {
|
||||
}
|
||||
else if (entry->right && entry->right != accum->stack[accum->stackpos + 1])
|
||||
{
|
||||
/* go on right sublink */
|
||||
accum->stackpos++;
|
||||
entry = entry->right;
|
||||
|
||||
/* find most-left value */
|
||||
for(;;) {
|
||||
accum->stack[ accum->stackpos ] = entry;
|
||||
if ( entry->left ) {
|
||||
for (;;)
|
||||
{
|
||||
accum->stack[accum->stackpos] = entry;
|
||||
if (entry->left)
|
||||
{
|
||||
accum->stackpos++;
|
||||
entry = entry->left;
|
||||
} else
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we already return all left subtree, itself and right subtree */
|
||||
if ( accum->stackpos == 0 )
|
||||
if (accum->stackpos == 0)
|
||||
return 0;
|
||||
accum->stackpos--;
|
||||
return walkTree(accum);
|
||||
@@ -234,47 +264,53 @@ walkTree( BuildAccumulator *accum ) {
|
||||
return entry;
|
||||
}
|
||||
|
||||
ItemPointerData*
|
||||
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) {
|
||||
EntryAccumulator *entry;
|
||||
ItemPointerData *
|
||||
ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n)
|
||||
{
|
||||
EntryAccumulator *entry;
|
||||
ItemPointerData *list;
|
||||
|
||||
|
||||
if ( accum->stack == NULL ) {
|
||||
if (accum->stack == NULL)
|
||||
{
|
||||
/* first call */
|
||||
accum->stack = palloc0(sizeof(EntryAccumulator*)*(accum->maxdepth+1));
|
||||
accum->stack = palloc0(sizeof(EntryAccumulator *) * (accum->maxdepth + 1));
|
||||
entry = accum->entries;
|
||||
|
||||
if ( entry == NULL )
|
||||
if (entry == NULL)
|
||||
return NULL;
|
||||
|
||||
/* find most-left value */
|
||||
for(;;) {
|
||||
accum->stack[ accum->stackpos ] = entry;
|
||||
if ( entry->left ) {
|
||||
for (;;)
|
||||
{
|
||||
accum->stack[accum->stackpos] = entry;
|
||||
if (entry->left)
|
||||
{
|
||||
accum->stackpos++;
|
||||
entry = entry->left;
|
||||
} else
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
pfree( accum->stack[ accum->stackpos ]->list );
|
||||
accum->stack[ accum->stackpos ]->list = NULL;
|
||||
entry = walkTree( accum );
|
||||
}
|
||||
else
|
||||
{
|
||||
pfree(accum->stack[accum->stackpos]->list);
|
||||
accum->stack[accum->stackpos]->list = NULL;
|
||||
entry = walkTree(accum);
|
||||
}
|
||||
|
||||
if ( entry == NULL )
|
||||
if (entry == NULL)
|
||||
return NULL;
|
||||
|
||||
*n = entry->number;
|
||||
*value = entry->value;
|
||||
list = entry->list;
|
||||
*n = entry->number;
|
||||
*value = entry->value;
|
||||
list = entry->list;
|
||||
|
||||
Assert(list != NULL);
|
||||
|
||||
if ( entry->shouldSort && entry->number > 1 )
|
||||
if (entry->shouldSort && entry->number > 1)
|
||||
qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* gindatapage.c
|
||||
* page utilities routines for the postgres inverted index access method.
|
||||
* page utilities routines for the postgres inverted index access method.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.3 2006/07/16 00:52:05 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.4 2006/10/04 00:29:47 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -16,50 +16,56 @@
|
||||
#include "access/gin.h"
|
||||
|
||||
int
|
||||
compareItemPointers( ItemPointer a, ItemPointer b ) {
|
||||
if ( GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b) ) {
|
||||
if ( GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b) )
|
||||
compareItemPointers(ItemPointer a, ItemPointer b)
|
||||
{
|
||||
if (GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b))
|
||||
{
|
||||
if (GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b))
|
||||
return 0;
|
||||
return ( GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b) ) ? 1 : -1;
|
||||
}
|
||||
return (GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b)) ? 1 : -1;
|
||||
}
|
||||
|
||||
return ( GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b) ) ? 1 : -1;
|
||||
return (GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b)) ? 1 : -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge two ordered array of itempointer
|
||||
*/
|
||||
void
|
||||
MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb) {
|
||||
void
|
||||
MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb)
|
||||
{
|
||||
ItemPointerData *dptr = dst;
|
||||
ItemPointerData *aptr = a, *bptr = b;
|
||||
ItemPointerData *aptr = a,
|
||||
*bptr = b;
|
||||
|
||||
while( aptr - a < na && bptr - b < nb ) {
|
||||
if ( compareItemPointers(aptr, bptr) > 0 )
|
||||
while (aptr - a < na && bptr - b < nb)
|
||||
{
|
||||
if (compareItemPointers(aptr, bptr) > 0)
|
||||
*dptr++ = *bptr++;
|
||||
else
|
||||
*dptr++ = *aptr++;
|
||||
}
|
||||
|
||||
while( aptr - a < na )
|
||||
while (aptr - a < na)
|
||||
*dptr++ = *aptr++;
|
||||
|
||||
while( bptr - b < nb )
|
||||
while (bptr - b < nb)
|
||||
*dptr++ = *bptr++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks, should we move to right link...
|
||||
* Checks, should we move to right link...
|
||||
* Compares inserting itemp pointer with right bound of current page
|
||||
*/
|
||||
static bool
|
||||
dataIsMoveRight(GinBtree btree, Page page) {
|
||||
ItemPointer iptr = GinDataPageGetRightBound(page);
|
||||
dataIsMoveRight(GinBtree btree, Page page)
|
||||
{
|
||||
ItemPointer iptr = GinDataPageGetRightBound(page);
|
||||
|
||||
if ( GinPageRightMost(page) )
|
||||
return FALSE;
|
||||
if (GinPageRightMost(page))
|
||||
return FALSE;
|
||||
|
||||
return ( compareItemPointers( btree->items + btree->curitem, iptr ) > 0 ) ? TRUE : FALSE;
|
||||
return (compareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -67,94 +73,113 @@ dataIsMoveRight(GinBtree btree, Page page) {
|
||||
* page correctly choosen and searching value SHOULD be on page
|
||||
*/
|
||||
static BlockNumber
|
||||
dataLocateItem(GinBtree btree, GinBtreeStack *stack) {
|
||||
OffsetNumber low, high, maxoff;
|
||||
PostingItem *pitem=NULL;
|
||||
int result;
|
||||
Page page = BufferGetPage( stack->buffer );
|
||||
dataLocateItem(GinBtree btree, GinBtreeStack *stack)
|
||||
{
|
||||
OffsetNumber low,
|
||||
high,
|
||||
maxoff;
|
||||
PostingItem *pitem = NULL;
|
||||
int result;
|
||||
Page page = BufferGetPage(stack->buffer);
|
||||
|
||||
Assert( !GinPageIsLeaf(page) );
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
if ( btree->fullScan ) {
|
||||
if (btree->fullScan)
|
||||
{
|
||||
stack->off = FirstOffsetNumber;
|
||||
stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
|
||||
return btree->getLeftMostPage(btree, page);
|
||||
}
|
||||
|
||||
low = FirstOffsetNumber;
|
||||
maxoff = high = GinPageGetOpaque(page)->maxoff;
|
||||
Assert( high >= low );
|
||||
maxoff = high = GinPageGetOpaque(page)->maxoff;
|
||||
Assert(high >= low);
|
||||
|
||||
high++;
|
||||
|
||||
while (high > low) {
|
||||
while (high > low)
|
||||
{
|
||||
OffsetNumber mid = low + ((high - low) / 2);
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page,mid);
|
||||
|
||||
if ( mid == maxoff )
|
||||
/* Right infinity, page already correctly choosen
|
||||
with a help of dataIsMoveRight */
|
||||
pitem = (PostingItem *) GinDataPageGetItem(page, mid);
|
||||
|
||||
if (mid == maxoff)
|
||||
|
||||
/*
|
||||
* Right infinity, page already correctly choosen with a help of
|
||||
* dataIsMoveRight
|
||||
*/
|
||||
result = -1;
|
||||
else {
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page,mid);
|
||||
result = compareItemPointers( btree->items + btree->curitem, &( pitem->key ) );
|
||||
else
|
||||
{
|
||||
pitem = (PostingItem *) GinDataPageGetItem(page, mid);
|
||||
result = compareItemPointers(btree->items + btree->curitem, &(pitem->key));
|
||||
}
|
||||
|
||||
if ( result == 0 ) {
|
||||
if (result == 0)
|
||||
{
|
||||
stack->off = mid;
|
||||
return PostingItemGetBlockNumber(pitem);
|
||||
} else if ( result > 0 )
|
||||
}
|
||||
else if (result > 0)
|
||||
low = mid + 1;
|
||||
else
|
||||
high = mid;
|
||||
}
|
||||
|
||||
Assert( high>=FirstOffsetNumber && high <= maxoff );
|
||||
Assert(high >= FirstOffsetNumber && high <= maxoff);
|
||||
|
||||
stack->off = high;
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page,high);
|
||||
pitem = (PostingItem *) GinDataPageGetItem(page, high);
|
||||
return PostingItemGetBlockNumber(pitem);
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* Searches correct position for value on leaf page.
|
||||
* Page should be corrrectly choosen.
|
||||
* Page should be corrrectly choosen.
|
||||
* Returns true if value found on page.
|
||||
*/
|
||||
static bool
|
||||
dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) {
|
||||
Page page = BufferGetPage( stack->buffer );
|
||||
OffsetNumber low, high;
|
||||
int result;
|
||||
dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
|
||||
{
|
||||
Page page = BufferGetPage(stack->buffer);
|
||||
OffsetNumber low,
|
||||
high;
|
||||
int result;
|
||||
|
||||
Assert( GinPageIsLeaf(page) );
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert(GinPageIsLeaf(page));
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
if ( btree->fullScan ) {
|
||||
if (btree->fullScan)
|
||||
{
|
||||
stack->off = FirstOffsetNumber;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
low=FirstOffsetNumber;
|
||||
low = FirstOffsetNumber;
|
||||
high = GinPageGetOpaque(page)->maxoff;
|
||||
|
||||
if ( high < low ) {
|
||||
if (high < low)
|
||||
{
|
||||
stack->off = FirstOffsetNumber;
|
||||
return false;
|
||||
}
|
||||
|
||||
high++;
|
||||
|
||||
while (high > low) {
|
||||
while (high > low)
|
||||
{
|
||||
OffsetNumber mid = low + ((high - low) / 2);
|
||||
|
||||
result = compareItemPointers( btree->items + btree->curitem, (ItemPointer)GinDataPageGetItem(page,mid) );
|
||||
result = compareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid));
|
||||
|
||||
if ( result == 0 ) {
|
||||
if (result == 0)
|
||||
{
|
||||
stack->off = mid;
|
||||
return true;
|
||||
} else if ( result > 0 )
|
||||
}
|
||||
else if (result > 0)
|
||||
low = mid + 1;
|
||||
else
|
||||
high = mid;
|
||||
@@ -169,34 +194,41 @@ dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) {
|
||||
* offset of PostingItem
|
||||
*/
|
||||
static OffsetNumber
|
||||
dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) {
|
||||
OffsetNumber i, maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
|
||||
{
|
||||
OffsetNumber i,
|
||||
maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
PostingItem *pitem;
|
||||
|
||||
Assert( !GinPageIsLeaf(page) );
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
/* if page isn't changed, we returns storedOff */
|
||||
if ( storedOff>= FirstOffsetNumber && storedOff<=maxoff) {
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page, storedOff);
|
||||
if ( PostingItemGetBlockNumber(pitem) == blkno )
|
||||
if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
|
||||
{
|
||||
pitem = (PostingItem *) GinDataPageGetItem(page, storedOff);
|
||||
if (PostingItemGetBlockNumber(pitem) == blkno)
|
||||
return storedOff;
|
||||
|
||||
/* we hope, that needed pointer goes to right. It's true
|
||||
if there wasn't a deletion */
|
||||
for( i=storedOff+1 ; i <= maxoff ; i++ ) {
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page, i);
|
||||
if ( PostingItemGetBlockNumber(pitem) == blkno )
|
||||
/*
|
||||
* we hope, that needed pointer goes to right. It's true if there
|
||||
* wasn't a deletion
|
||||
*/
|
||||
for (i = storedOff + 1; i <= maxoff; i++)
|
||||
{
|
||||
pitem = (PostingItem *) GinDataPageGetItem(page, i);
|
||||
if (PostingItemGetBlockNumber(pitem) == blkno)
|
||||
return i;
|
||||
}
|
||||
|
||||
maxoff = storedOff-1;
|
||||
maxoff = storedOff - 1;
|
||||
}
|
||||
|
||||
/* last chance */
|
||||
for( i=FirstOffsetNumber; i <= maxoff ; i++ ) {
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page, i);
|
||||
if ( PostingItemGetBlockNumber(pitem) == blkno )
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||
{
|
||||
pitem = (PostingItem *) GinDataPageGetItem(page, i);
|
||||
if (PostingItemGetBlockNumber(pitem) == blkno)
|
||||
return i;
|
||||
}
|
||||
|
||||
@@ -207,14 +239,15 @@ dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber stor
|
||||
* retunrs blkno of lefmost child
|
||||
*/
|
||||
static BlockNumber
|
||||
dataGetLeftMostPage(GinBtree btree, Page page) {
|
||||
dataGetLeftMostPage(GinBtree btree, Page page)
|
||||
{
|
||||
PostingItem *pitem;
|
||||
|
||||
Assert( !GinPageIsLeaf(page) );
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert( GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
Assert(GinPageIsData(page));
|
||||
Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
|
||||
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page, FirstOffsetNumber);
|
||||
pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber);
|
||||
return PostingItemGetBlockNumber(pitem);
|
||||
}
|
||||
|
||||
@@ -223,18 +256,22 @@ dataGetLeftMostPage(GinBtree btree, Page page) {
|
||||
* correct value! depending on leaf or non-leaf page
|
||||
*/
|
||||
void
|
||||
GinDataPageAddItem( Page page, void *data, OffsetNumber offset ) {
|
||||
GinDataPageAddItem(Page page, void *data, OffsetNumber offset)
|
||||
{
|
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
char *ptr;
|
||||
char *ptr;
|
||||
|
||||
if ( offset == InvalidOffsetNumber ) {
|
||||
ptr = GinDataPageGetItem(page,maxoff+1);
|
||||
} else {
|
||||
ptr = GinDataPageGetItem(page,offset);
|
||||
if ( maxoff+1-offset != 0 )
|
||||
memmove( ptr+GinSizeOfItem(page), ptr, (maxoff-offset+1) * GinSizeOfItem(page) );
|
||||
if (offset == InvalidOffsetNumber)
|
||||
{
|
||||
ptr = GinDataPageGetItem(page, maxoff + 1);
|
||||
}
|
||||
memcpy( ptr, data, GinSizeOfItem(page) );
|
||||
else
|
||||
{
|
||||
ptr = GinDataPageGetItem(page, offset);
|
||||
if (maxoff + 1 - offset != 0)
|
||||
memmove(ptr + GinSizeOfItem(page), ptr, (maxoff - offset + 1) * GinSizeOfItem(page));
|
||||
}
|
||||
memcpy(ptr, data, GinSizeOfItem(page));
|
||||
|
||||
GinPageGetOpaque(page)->maxoff++;
|
||||
}
|
||||
@@ -243,15 +280,16 @@ GinDataPageAddItem( Page page, void *data, OffsetNumber offset ) {
|
||||
* Deletes posting item from non-leaf page
|
||||
*/
|
||||
void
|
||||
PageDeletePostingItem(Page page, OffsetNumber offset) {
|
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
PageDeletePostingItem(Page page, OffsetNumber offset)
|
||||
{
|
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
|
||||
Assert( !GinPageIsLeaf(page) );
|
||||
Assert( offset>=FirstOffsetNumber && offset <= maxoff );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
Assert(offset >= FirstOffsetNumber && offset <= maxoff);
|
||||
|
||||
if ( offset != maxoff )
|
||||
memmove( GinDataPageGetItem(page,offset), GinDataPageGetItem(page,offset+1),
|
||||
sizeof(PostingItem) * (maxoff-offset) );
|
||||
if (offset != maxoff)
|
||||
memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1),
|
||||
sizeof(PostingItem) * (maxoff - offset));
|
||||
|
||||
GinPageGetOpaque(page)->maxoff--;
|
||||
}
|
||||
@@ -261,19 +299,24 @@ PageDeletePostingItem(Page page, OffsetNumber offset) {
|
||||
* item pointer never deletes!
|
||||
*/
|
||||
static bool
|
||||
dataIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
|
||||
Page page = BufferGetPage(buf);
|
||||
dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert( !btree->isDelete );
|
||||
Assert(GinPageIsData(page));
|
||||
Assert(!btree->isDelete);
|
||||
|
||||
if ( GinPageIsLeaf(page) ) {
|
||||
if ( GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff ) {
|
||||
if ( (btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page) )
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
|
||||
{
|
||||
if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
|
||||
return true;
|
||||
} else if ( sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page) )
|
||||
}
|
||||
else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
|
||||
return true;
|
||||
} else if ( sizeof(PostingItem) <= GinDataPageGetFreeSpace(page) )
|
||||
}
|
||||
else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -285,14 +328,17 @@ dataIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
|
||||
* item pointer never deletes!
|
||||
*/
|
||||
static BlockNumber
|
||||
dataPrepareData( GinBtree btree, Page page, OffsetNumber off) {
|
||||
dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
|
||||
{
|
||||
BlockNumber ret = InvalidBlockNumber;
|
||||
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
if ( !GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber ) {
|
||||
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page,off);
|
||||
PostingItemSetBlockNumber( pitem, btree->rightblkno );
|
||||
if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
|
||||
{
|
||||
PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off);
|
||||
|
||||
PostingItemSetBlockNumber(pitem, btree->rightblkno);
|
||||
ret = btree->rightblkno;
|
||||
}
|
||||
|
||||
@@ -301,24 +347,25 @@ dataPrepareData( GinBtree btree, Page page, OffsetNumber off) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* Places keys to page and fills WAL record. In case leaf page and
|
||||
* build mode puts all ItemPointers to page.
|
||||
*/
|
||||
static void
|
||||
dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) {
|
||||
Page page = BufferGetPage(buf);
|
||||
dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
static XLogRecData rdata[3];
|
||||
int sizeofitem = GinSizeOfItem(page);
|
||||
static ginxlogInsert data;
|
||||
int sizeofitem = GinSizeOfItem(page);
|
||||
static ginxlogInsert data;
|
||||
|
||||
*prdata = rdata;
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
data.updateBlkno = dataPrepareData( btree, page, off );
|
||||
data.updateBlkno = dataPrepareData(btree, page, off);
|
||||
|
||||
data.node = btree->index->rd_node;
|
||||
data.blkno = BufferGetBlockNumber( buf );
|
||||
data.blkno = BufferGetBlockNumber(buf);
|
||||
data.offset = off;
|
||||
data.nitem = 1;
|
||||
data.isDelete = FALSE;
|
||||
@@ -337,109 +384,124 @@ dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prda
|
||||
rdata[1].next = &rdata[2];
|
||||
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
rdata[2].data = (GinPageIsLeaf(page)) ? ((char*)(btree->items+btree->curitem)) : ((char*)&(btree->pitem));
|
||||
rdata[2].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
|
||||
rdata[2].len = sizeofitem;
|
||||
rdata[2].next = NULL;
|
||||
|
||||
if ( GinPageIsLeaf(page) ) {
|
||||
if ( GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff ) {
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
|
||||
{
|
||||
/* usually, create index... */
|
||||
uint32 savedPos = btree->curitem;
|
||||
uint32 savedPos = btree->curitem;
|
||||
|
||||
while( btree->curitem < btree->nitem ) {
|
||||
GinDataPageAddItem(page, btree->items+btree->curitem, off);
|
||||
while (btree->curitem < btree->nitem)
|
||||
{
|
||||
GinDataPageAddItem(page, btree->items + btree->curitem, off);
|
||||
off++;
|
||||
btree->curitem++;
|
||||
}
|
||||
data.nitem = btree->curitem-savedPos;
|
||||
data.nitem = btree->curitem - savedPos;
|
||||
rdata[2].len = sizeofitem * data.nitem;
|
||||
} else {
|
||||
GinDataPageAddItem(page, btree->items+btree->curitem, off);
|
||||
}
|
||||
else
|
||||
{
|
||||
GinDataPageAddItem(page, btree->items + btree->curitem, off);
|
||||
btree->curitem++;
|
||||
}
|
||||
} else
|
||||
GinDataPageAddItem(page, &(btree->pitem), off);
|
||||
}
|
||||
else
|
||||
GinDataPageAddItem(page, &(btree->pitem), off);
|
||||
}
|
||||
|
||||
/*
|
||||
* split page and fills WAL record. original buffer(lbuf) leaves untouched,
|
||||
* returns shadow page of lbuf filled new data. In leaf page and build mode puts all
|
||||
* returns shadow page of lbuf filled new data. In leaf page and build mode puts all
|
||||
* ItemPointers to pages. Also, in build mode splits data by way to full fulled
|
||||
* left page
|
||||
*/
|
||||
static Page
|
||||
dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) {
|
||||
dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
|
||||
{
|
||||
static ginxlogSplit data;
|
||||
static XLogRecData rdata[4];
|
||||
static char vector[2*BLCKSZ];
|
||||
char *ptr;
|
||||
static char vector[2 * BLCKSZ];
|
||||
char *ptr;
|
||||
OffsetNumber separator;
|
||||
ItemPointer bound;
|
||||
Page lpage = GinPageGetCopyPage( BufferGetPage( lbuf ) );
|
||||
ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
|
||||
int sizeofitem = GinSizeOfItem(lpage);
|
||||
ItemPointer bound;
|
||||
Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
|
||||
ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
|
||||
int sizeofitem = GinSizeOfItem(lpage);
|
||||
OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
|
||||
Page rpage = BufferGetPage( rbuf );
|
||||
Size pageSize = PageGetPageSize( lpage );
|
||||
Size freeSpace;
|
||||
uint32 nCopied = 1;
|
||||
Page rpage = BufferGetPage(rbuf);
|
||||
Size pageSize = PageGetPageSize(lpage);
|
||||
Size freeSpace;
|
||||
uint32 nCopied = 1;
|
||||
|
||||
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize );
|
||||
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
|
||||
freeSpace = GinDataPageGetFreeSpace(rpage);
|
||||
|
||||
*prdata = rdata;
|
||||
data.leftChildBlkno = ( GinPageIsLeaf(lpage) ) ?
|
||||
InvalidOffsetNumber : PostingItemGetBlockNumber( &(btree->pitem) );
|
||||
data.updateBlkno = dataPrepareData( btree, lpage, off );
|
||||
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
|
||||
InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
|
||||
data.updateBlkno = dataPrepareData(btree, lpage, off);
|
||||
|
||||
memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
|
||||
maxoff*sizeofitem);
|
||||
memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
|
||||
maxoff * sizeofitem);
|
||||
|
||||
if ( GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff ) {
|
||||
if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
|
||||
{
|
||||
nCopied = 0;
|
||||
while( btree->curitem < btree->nitem && maxoff*sizeof(ItemPointerData) < 2*(freeSpace - sizeof(ItemPointerData)) ) {
|
||||
memcpy( vector + maxoff*sizeof(ItemPointerData), btree->items+btree->curitem,
|
||||
sizeof(ItemPointerData) );
|
||||
while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
|
||||
{
|
||||
memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem,
|
||||
sizeof(ItemPointerData));
|
||||
maxoff++;
|
||||
nCopied++;
|
||||
btree->curitem++;
|
||||
}
|
||||
} else {
|
||||
ptr = vector + (off-1)*sizeofitem;
|
||||
if ( maxoff+1-off != 0 )
|
||||
memmove( ptr+sizeofitem, ptr, (maxoff-off+1) * sizeofitem );
|
||||
if ( GinPageIsLeaf(lpage) ) {
|
||||
memcpy(ptr, btree->items+btree->curitem, sizeofitem );
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = vector + (off - 1) * sizeofitem;
|
||||
if (maxoff + 1 - off != 0)
|
||||
memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
|
||||
if (GinPageIsLeaf(lpage))
|
||||
{
|
||||
memcpy(ptr, btree->items + btree->curitem, sizeofitem);
|
||||
btree->curitem++;
|
||||
} else
|
||||
memcpy(ptr, &(btree->pitem), sizeofitem );
|
||||
|
||||
}
|
||||
else
|
||||
memcpy(ptr, &(btree->pitem), sizeofitem);
|
||||
|
||||
maxoff++;
|
||||
}
|
||||
|
||||
/* we suppose that during index creation table scaned from
|
||||
begin to end, so ItemPointers are monotonically increased.. */
|
||||
if ( btree->isBuild && GinPageRightMost(lpage) )
|
||||
separator=freeSpace/sizeofitem;
|
||||
/*
|
||||
* we suppose that during index creation table scaned from begin to end,
|
||||
* so ItemPointers are monotonically increased..
|
||||
*/
|
||||
if (btree->isBuild && GinPageRightMost(lpage))
|
||||
separator = freeSpace / sizeofitem;
|
||||
else
|
||||
separator=maxoff/2;
|
||||
separator = maxoff / 2;
|
||||
|
||||
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize );
|
||||
GinInitPage( lpage, GinPageGetOpaque(rpage)->flags, pageSize );
|
||||
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
|
||||
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
|
||||
|
||||
memcpy( GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem );
|
||||
memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem);
|
||||
GinPageGetOpaque(lpage)->maxoff = separator;
|
||||
memcpy( GinDataPageGetItem(rpage, FirstOffsetNumber),
|
||||
vector + separator * sizeofitem, (maxoff-separator) * sizeofitem );
|
||||
GinPageGetOpaque(rpage)->maxoff = maxoff-separator;
|
||||
memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber),
|
||||
vector + separator * sizeofitem, (maxoff - separator) * sizeofitem);
|
||||
GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
|
||||
|
||||
PostingItemSetBlockNumber( &(btree->pitem), BufferGetBlockNumber(lbuf) );
|
||||
if ( GinPageIsLeaf(lpage) )
|
||||
btree->pitem.key = *(ItemPointerData*)GinDataPageGetItem(lpage,
|
||||
GinPageGetOpaque(lpage)->maxoff);
|
||||
else
|
||||
btree->pitem.key = ((PostingItem*)GinDataPageGetItem(lpage,
|
||||
GinPageGetOpaque(lpage)->maxoff))->key;
|
||||
PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
|
||||
if (GinPageIsLeaf(lpage))
|
||||
btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage,
|
||||
GinPageGetOpaque(lpage)->maxoff);
|
||||
else
|
||||
btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage,
|
||||
GinPageGetOpaque(lpage)->maxoff))->key;
|
||||
btree->rightblkno = BufferGetBlockNumber(rbuf);
|
||||
|
||||
/* set up right bound for left page */
|
||||
@@ -452,8 +514,8 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
|
||||
|
||||
data.node = btree->index->rd_node;
|
||||
data.rootBlkno = InvalidBlockNumber;
|
||||
data.lblkno = BufferGetBlockNumber( lbuf );
|
||||
data.rblkno = BufferGetBlockNumber( rbuf );
|
||||
data.lblkno = BufferGetBlockNumber(lbuf);
|
||||
data.rblkno = BufferGetBlockNumber(rbuf);
|
||||
data.separator = separator;
|
||||
data.nitem = maxoff;
|
||||
data.isData = TRUE;
|
||||
@@ -468,34 +530,37 @@ dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRe
|
||||
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].data = vector;
|
||||
rdata[1].len = MAXALIGN( maxoff * sizeofitem );
|
||||
rdata[1].len = MAXALIGN(maxoff * sizeofitem);
|
||||
rdata[1].next = NULL;
|
||||
|
||||
return lpage;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fills new root by right bound values from child.
|
||||
* Fills new root by right bound values from child.
|
||||
* Also called from ginxlog, should not use btree
|
||||
*/
|
||||
void
|
||||
dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) {
|
||||
Page page = BufferGetPage(root),
|
||||
lpage = BufferGetPage(lbuf),
|
||||
rpage = BufferGetPage(rbuf);
|
||||
PostingItem li, ri;
|
||||
dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
|
||||
{
|
||||
Page page = BufferGetPage(root),
|
||||
lpage = BufferGetPage(lbuf),
|
||||
rpage = BufferGetPage(rbuf);
|
||||
PostingItem li,
|
||||
ri;
|
||||
|
||||
li.key = *GinDataPageGetRightBound(lpage);
|
||||
PostingItemSetBlockNumber( &li, BufferGetBlockNumber(lbuf) );
|
||||
GinDataPageAddItem(page, &li, InvalidOffsetNumber );
|
||||
PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
|
||||
GinDataPageAddItem(page, &li, InvalidOffsetNumber);
|
||||
|
||||
ri.key = *GinDataPageGetRightBound(rpage);
|
||||
PostingItemSetBlockNumber( &ri, BufferGetBlockNumber(rbuf) );
|
||||
GinDataPageAddItem(page, &ri, InvalidOffsetNumber );
|
||||
PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
|
||||
GinDataPageAddItem(page, &ri, InvalidOffsetNumber);
|
||||
}
|
||||
|
||||
void
|
||||
prepareDataScan( GinBtree btree, Relation index) {
|
||||
prepareDataScan(GinBtree btree, Relation index)
|
||||
{
|
||||
memset(btree, 0, sizeof(GinBtreeData));
|
||||
btree->index = index;
|
||||
btree->isMoveRight = dataIsMoveRight;
|
||||
@@ -509,21 +574,22 @@ prepareDataScan( GinBtree btree, Relation index) {
|
||||
btree->fillRoot = dataFillRoot;
|
||||
|
||||
btree->searchMode = FALSE;
|
||||
btree->isDelete = FALSE;
|
||||
btree->isDelete = FALSE;
|
||||
btree->fullScan = FALSE;
|
||||
btree->isBuild= FALSE;
|
||||
btree->isBuild = FALSE;
|
||||
}
|
||||
|
||||
GinPostingTreeScan*
|
||||
prepareScanPostingTree( Relation index, BlockNumber rootBlkno, bool searchMode) {
|
||||
GinPostingTreeScan *gdi = (GinPostingTreeScan*)palloc0( sizeof(GinPostingTreeScan) );
|
||||
GinPostingTreeScan *
|
||||
prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
|
||||
{
|
||||
GinPostingTreeScan *gdi = (GinPostingTreeScan *) palloc0(sizeof(GinPostingTreeScan));
|
||||
|
||||
prepareDataScan(&gdi->btree, index);
|
||||
|
||||
prepareDataScan( &gdi->btree, index );
|
||||
|
||||
gdi->btree.searchMode = searchMode;
|
||||
gdi->btree.fullScan = searchMode;
|
||||
|
||||
gdi->stack = ginPrepareFindLeafPage( &gdi->btree, rootBlkno );
|
||||
gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
|
||||
|
||||
return gdi;
|
||||
}
|
||||
@@ -532,33 +598,35 @@ prepareScanPostingTree( Relation index, BlockNumber rootBlkno, bool searchMode)
|
||||
* Inserts array of item pointers, may execute several tree scan (very rare)
|
||||
*/
|
||||
void
|
||||
insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem) {
|
||||
insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
|
||||
{
|
||||
BlockNumber rootBlkno = gdi->stack->blkno;
|
||||
|
||||
gdi->btree.items = items;
|
||||
gdi->btree.nitem = nitem;
|
||||
gdi->btree.curitem = 0;
|
||||
|
||||
while( gdi->btree.curitem < gdi->btree.nitem ) {
|
||||
while (gdi->btree.curitem < gdi->btree.nitem)
|
||||
{
|
||||
if (!gdi->stack)
|
||||
gdi->stack = ginPrepareFindLeafPage( &gdi->btree, rootBlkno );
|
||||
gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
|
||||
|
||||
gdi->stack = ginFindLeafPage( &gdi->btree, gdi->stack );
|
||||
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
|
||||
|
||||
if ( gdi->btree.findItem( &(gdi->btree), gdi->stack ) )
|
||||
elog(ERROR,"item pointer (%u,%d) already exists",
|
||||
ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
|
||||
ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
|
||||
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
|
||||
elog(ERROR, "item pointer (%u,%d) already exists",
|
||||
ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
|
||||
ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
|
||||
|
||||
ginInsertValue(&(gdi->btree), gdi->stack);
|
||||
|
||||
gdi->stack=NULL;
|
||||
gdi->stack = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
Buffer
|
||||
scanBeginPostingTree( GinPostingTreeScan *gdi ) {
|
||||
gdi->stack = ginFindLeafPage( &gdi->btree, gdi->stack );
|
||||
scanBeginPostingTree(GinPostingTreeScan *gdi)
|
||||
{
|
||||
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
|
||||
return gdi->stack->buffer;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginentrypage.c
|
||||
* page utilities routines for the postgres inverted index access method.
|
||||
* page utilities routines for the postgres inverted index access method.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.3 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.4 2006/10/04 00:29:47 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -23,48 +23,52 @@
|
||||
* 1) Posting list
|
||||
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usial
|
||||
* - ItemPointerGetBlockNumber(&itup->t_tid) contains original
|
||||
* size of tuple (without posting list).
|
||||
* size of tuple (without posting list).
|
||||
* Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
|
||||
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
|
||||
* of elements in posting list (number of heap itempointer)
|
||||
* Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
|
||||
* - After usial part of tuple there is a posting list
|
||||
* - After usial part of tuple there is a posting list
|
||||
* Macros: GinGetPosting(itup)
|
||||
* 2) Posting tree
|
||||
* - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usial
|
||||
* - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
|
||||
* - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
|
||||
* root of posting tree
|
||||
* - ItemPointerGetOffsetNumber(&itup->t_tid) contains magick number GIN_TREE_POSTING
|
||||
*/
|
||||
IndexTuple
|
||||
GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd) {
|
||||
bool isnull=FALSE;
|
||||
GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd)
|
||||
{
|
||||
bool isnull = FALSE;
|
||||
IndexTuple itup;
|
||||
|
||||
itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
|
||||
itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
|
||||
|
||||
GinSetOrigSizePosting( itup, IndexTupleSize(itup) );
|
||||
GinSetOrigSizePosting(itup, IndexTupleSize(itup));
|
||||
|
||||
if ( nipd > 0 ) {
|
||||
uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData)*nipd);
|
||||
if (nipd > 0)
|
||||
{
|
||||
uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd);
|
||||
|
||||
if ( newsize >= INDEX_SIZE_MASK )
|
||||
if (newsize >= INDEX_SIZE_MASK)
|
||||
return NULL;
|
||||
|
||||
if ( newsize > TOAST_INDEX_TARGET && nipd > 1 )
|
||||
if (newsize > TOAST_INDEX_TARGET && nipd > 1)
|
||||
return NULL;
|
||||
|
||||
itup = repalloc( itup, newsize );
|
||||
itup = repalloc(itup, newsize);
|
||||
|
||||
/* set new size */
|
||||
itup->t_info &= ~INDEX_SIZE_MASK;
|
||||
itup->t_info &= ~INDEX_SIZE_MASK;
|
||||
itup->t_info |= newsize;
|
||||
|
||||
if ( ipd )
|
||||
memcpy( GinGetPosting(itup), ipd, sizeof(ItemPointerData)*nipd );
|
||||
GinSetNPosting(itup, nipd);
|
||||
} else {
|
||||
GinSetNPosting(itup, 0);
|
||||
if (ipd)
|
||||
memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
|
||||
GinSetNPosting(itup, nipd);
|
||||
}
|
||||
else
|
||||
{
|
||||
GinSetNPosting(itup, 0);
|
||||
}
|
||||
return itup;
|
||||
}
|
||||
@@ -74,31 +78,35 @@ GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd) {
|
||||
* so we don't use right bound, we use rightest key instead.
|
||||
*/
|
||||
static IndexTuple
|
||||
getRightMostTuple(Page page) {
|
||||
getRightMostTuple(Page page)
|
||||
{
|
||||
OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
|
||||
|
||||
return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
|
||||
}
|
||||
|
||||
Datum
|
||||
ginGetHighKey(GinState *ginstate, Page page) {
|
||||
IndexTuple itup;
|
||||
bool isnull;
|
||||
ginGetHighKey(GinState *ginstate, Page page)
|
||||
{
|
||||
IndexTuple itup;
|
||||
bool isnull;
|
||||
|
||||
itup = getRightMostTuple(page);
|
||||
|
||||
return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
|
||||
return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
|
||||
}
|
||||
|
||||
static bool
|
||||
entryIsMoveRight(GinBtree btree, Page page) {
|
||||
Datum highkey;
|
||||
static bool
|
||||
entryIsMoveRight(GinBtree btree, Page page)
|
||||
{
|
||||
Datum highkey;
|
||||
|
||||
if ( GinPageRightMost(page) )
|
||||
if (GinPageRightMost(page))
|
||||
return FALSE;
|
||||
|
||||
highkey = ginGetHighKey(btree->ginstate, page);
|
||||
|
||||
if ( compareEntries(btree->ginstate, btree->entryValue, highkey) > 0 )
|
||||
if (compareEntries(btree->ginstate, btree->entryValue, highkey) > 0)
|
||||
return TRUE;
|
||||
|
||||
return FALSE;
|
||||
@@ -109,16 +117,20 @@ entryIsMoveRight(GinBtree btree, Page page) {
|
||||
* page correctly choosen and searching value SHOULD be on page
|
||||
*/
|
||||
static BlockNumber
|
||||
entryLocateEntry(GinBtree btree, GinBtreeStack *stack) {
|
||||
OffsetNumber low, high, maxoff;
|
||||
IndexTuple itup = NULL;
|
||||
int result;
|
||||
Page page = BufferGetPage( stack->buffer );
|
||||
entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
|
||||
{
|
||||
OffsetNumber low,
|
||||
high,
|
||||
maxoff;
|
||||
IndexTuple itup = NULL;
|
||||
int result;
|
||||
Page page = BufferGetPage(stack->buffer);
|
||||
|
||||
Assert( !GinPageIsLeaf(page) );
|
||||
Assert( !GinPageIsData(page) );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
Assert(!GinPageIsData(page));
|
||||
|
||||
if ( btree->fullScan ) {
|
||||
if (btree->fullScan)
|
||||
{
|
||||
stack->off = FirstOffsetNumber;
|
||||
stack->predictNumber *= PageGetMaxOffsetNumber(page);
|
||||
return btree->getLeftMostPage(btree, page);
|
||||
@@ -126,39 +138,43 @@ entryLocateEntry(GinBtree btree, GinBtreeStack *stack) {
|
||||
|
||||
low = FirstOffsetNumber;
|
||||
maxoff = high = PageGetMaxOffsetNumber(page);
|
||||
Assert( high >= low );
|
||||
Assert(high >= low);
|
||||
|
||||
high++;
|
||||
|
||||
while (high > low) {
|
||||
while (high > low)
|
||||
{
|
||||
OffsetNumber mid = low + ((high - low) / 2);
|
||||
|
||||
if ( mid == maxoff && GinPageRightMost(page) )
|
||||
if (mid == maxoff && GinPageRightMost(page))
|
||||
/* Right infinity */
|
||||
result = -1;
|
||||
else {
|
||||
bool isnull;
|
||||
else
|
||||
{
|
||||
bool isnull;
|
||||
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
|
||||
result = compareEntries(btree->ginstate, btree->entryValue,
|
||||
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull) );
|
||||
result = compareEntries(btree->ginstate, btree->entryValue,
|
||||
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
|
||||
}
|
||||
|
||||
if ( result == 0 ) {
|
||||
if (result == 0)
|
||||
{
|
||||
stack->off = mid;
|
||||
Assert( GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO );
|
||||
Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
|
||||
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
|
||||
} else if ( result > 0 )
|
||||
}
|
||||
else if (result > 0)
|
||||
low = mid + 1;
|
||||
else
|
||||
high = mid;
|
||||
}
|
||||
|
||||
Assert( high>=FirstOffsetNumber && high <= maxoff );
|
||||
Assert(high >= FirstOffsetNumber && high <= maxoff);
|
||||
|
||||
stack->off = high;
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
|
||||
Assert( GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO );
|
||||
Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
|
||||
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
|
||||
}
|
||||
|
||||
@@ -168,15 +184,18 @@ entryLocateEntry(GinBtree btree, GinBtreeStack *stack) {
|
||||
* Returns true if value found on page.
|
||||
*/
|
||||
static bool
|
||||
entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) {
|
||||
Page page = BufferGetPage( stack->buffer );
|
||||
OffsetNumber low, high;
|
||||
IndexTuple itup;
|
||||
entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
|
||||
{
|
||||
Page page = BufferGetPage(stack->buffer);
|
||||
OffsetNumber low,
|
||||
high;
|
||||
IndexTuple itup;
|
||||
|
||||
Assert( GinPageIsLeaf(page) );
|
||||
Assert( !GinPageIsData(page) );
|
||||
Assert(GinPageIsLeaf(page));
|
||||
Assert(!GinPageIsData(page));
|
||||
|
||||
if ( btree->fullScan ) {
|
||||
if (btree->fullScan)
|
||||
{
|
||||
stack->off = FirstOffsetNumber;
|
||||
return TRUE;
|
||||
}
|
||||
@@ -184,26 +203,30 @@ entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) {
|
||||
low = FirstOffsetNumber;
|
||||
high = PageGetMaxOffsetNumber(page);
|
||||
|
||||
if ( high < low ) {
|
||||
if (high < low)
|
||||
{
|
||||
stack->off = FirstOffsetNumber;
|
||||
return false;
|
||||
}
|
||||
|
||||
high++;
|
||||
|
||||
while (high > low) {
|
||||
while (high > low)
|
||||
{
|
||||
OffsetNumber mid = low + ((high - low) / 2);
|
||||
bool isnull;
|
||||
int result;
|
||||
bool isnull;
|
||||
int result;
|
||||
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
|
||||
result = compareEntries(btree->ginstate, btree->entryValue,
|
||||
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull) );
|
||||
index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
|
||||
|
||||
if ( result == 0 ) {
|
||||
if (result == 0)
|
||||
{
|
||||
stack->off = mid;
|
||||
return true;
|
||||
} else if ( result > 0 )
|
||||
}
|
||||
else if (result > 0)
|
||||
low = mid + 1;
|
||||
else
|
||||
high = mid;
|
||||
@@ -214,33 +237,40 @@ entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) {
|
||||
}
|
||||
|
||||
static OffsetNumber
|
||||
entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) {
|
||||
OffsetNumber i, maxoff = PageGetMaxOffsetNumber(page);
|
||||
IndexTuple itup;
|
||||
entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
|
||||
{
|
||||
OffsetNumber i,
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
IndexTuple itup;
|
||||
|
||||
Assert( !GinPageIsLeaf(page) );
|
||||
Assert( !GinPageIsData(page) );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
Assert(!GinPageIsData(page));
|
||||
|
||||
/* if page isn't changed, we returns storedOff */
|
||||
if ( storedOff>= FirstOffsetNumber && storedOff<=maxoff) {
|
||||
if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
|
||||
{
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
|
||||
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
|
||||
if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
|
||||
return storedOff;
|
||||
|
||||
/* we hope, that needed pointer goes to right. It's true
|
||||
if there wasn't a deletion */
|
||||
for( i=storedOff+1 ; i <= maxoff ; i++ ) {
|
||||
/*
|
||||
* we hope, that needed pointer goes to right. It's true if there
|
||||
* wasn't a deletion
|
||||
*/
|
||||
for (i = storedOff + 1; i <= maxoff; i++)
|
||||
{
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
||||
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
|
||||
if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
|
||||
return i;
|
||||
}
|
||||
maxoff = storedOff-1;
|
||||
maxoff = storedOff - 1;
|
||||
}
|
||||
|
||||
/* last chance */
|
||||
for( i=FirstOffsetNumber; i <= maxoff ; i++ ) {
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||
{
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
||||
if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
|
||||
if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
|
||||
return i;
|
||||
}
|
||||
|
||||
@@ -248,31 +278,35 @@ entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber sto
|
||||
}
|
||||
|
||||
static BlockNumber
|
||||
entryGetLeftMostPage(GinBtree btree, Page page) {
|
||||
IndexTuple itup;
|
||||
entryGetLeftMostPage(GinBtree btree, Page page)
|
||||
{
|
||||
IndexTuple itup;
|
||||
|
||||
Assert( !GinPageIsLeaf(page) );
|
||||
Assert( !GinPageIsData(page) );
|
||||
Assert( PageGetMaxOffsetNumber(page) >= FirstOffsetNumber );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
Assert(!GinPageIsData(page));
|
||||
Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
|
||||
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
|
||||
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
|
||||
return GinItemPointerGetBlockNumber(&(itup)->t_tid);
|
||||
}
|
||||
|
||||
static bool
|
||||
entryIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
|
||||
Size itupsz = 0;
|
||||
Page page = BufferGetPage(buf);
|
||||
entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
|
||||
{
|
||||
Size itupsz = 0;
|
||||
Page page = BufferGetPage(buf);
|
||||
|
||||
Assert( btree->entry );
|
||||
Assert( !GinPageIsData(page) );
|
||||
Assert(btree->entry);
|
||||
Assert(!GinPageIsData(page));
|
||||
|
||||
if ( btree->isDelete ) {
|
||||
IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off));
|
||||
itupsz = MAXALIGN( IndexTupleSize( itup ) ) + sizeof(ItemIdData);
|
||||
if (btree->isDelete)
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
|
||||
|
||||
itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
|
||||
}
|
||||
|
||||
if ( PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData) )
|
||||
if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -284,19 +318,23 @@ entryIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
|
||||
* if child split is occured
|
||||
*/
|
||||
static BlockNumber
|
||||
entryPreparePage( GinBtree btree, Page page, OffsetNumber off) {
|
||||
entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
|
||||
{
|
||||
BlockNumber ret = InvalidBlockNumber;
|
||||
|
||||
Assert( btree->entry );
|
||||
Assert( !GinPageIsData(page) );
|
||||
Assert(btree->entry);
|
||||
Assert(!GinPageIsData(page));
|
||||
|
||||
if ( btree->isDelete ) {
|
||||
Assert( GinPageIsLeaf(page) );
|
||||
if (btree->isDelete)
|
||||
{
|
||||
Assert(GinPageIsLeaf(page));
|
||||
PageIndexTupleDelete(page, off);
|
||||
}
|
||||
|
||||
if ( !GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber ) {
|
||||
IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off));
|
||||
if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
|
||||
|
||||
ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber);
|
||||
ret = btree->rightblkno;
|
||||
}
|
||||
@@ -310,22 +348,23 @@ entryPreparePage( GinBtree btree, Page page, OffsetNumber off) {
|
||||
* Place tuple on page and fills WAL record
|
||||
*/
|
||||
static void
|
||||
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) {
|
||||
Page page = BufferGetPage(buf);
|
||||
entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
static XLogRecData rdata[3];
|
||||
OffsetNumber placed;
|
||||
static ginxlogInsert data;
|
||||
OffsetNumber placed;
|
||||
static ginxlogInsert data;
|
||||
|
||||
*prdata = rdata;
|
||||
data.updateBlkno = entryPreparePage( btree, page, off );
|
||||
data.updateBlkno = entryPreparePage(btree, page, off);
|
||||
|
||||
placed = PageAddItem( page, (Item)btree->entry, IndexTupleSize(btree->entry), off, LP_USED);
|
||||
if ( placed != off )
|
||||
placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, LP_USED);
|
||||
if (placed != off)
|
||||
elog(ERROR, "failed to add item to index page in \"%s\"",
|
||||
RelationGetRelationName(btree->index));
|
||||
RelationGetRelationName(btree->index));
|
||||
|
||||
data.node = btree->index->rd_node;
|
||||
data.blkno = BufferGetBlockNumber( buf );
|
||||
data.blkno = BufferGetBlockNumber(buf);
|
||||
data.offset = off;
|
||||
data.nitem = 1;
|
||||
data.isDelete = btree->isDelete;
|
||||
@@ -358,87 +397,99 @@ entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prd
|
||||
* an equal number!
|
||||
*/
|
||||
static Page
|
||||
entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) {
|
||||
entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
|
||||
{
|
||||
static XLogRecData rdata[2];
|
||||
OffsetNumber i, maxoff, separator=InvalidOffsetNumber;
|
||||
Size totalsize=0;
|
||||
Size lsize = 0, size;
|
||||
static char tupstore[ 2*BLCKSZ ];
|
||||
char *ptr;
|
||||
IndexTuple itup, leftrightmost=NULL;
|
||||
static ginxlogSplit data;
|
||||
Datum value;
|
||||
bool isnull;
|
||||
Page page;
|
||||
Page lpage = GinPageGetCopyPage( BufferGetPage( lbuf ) );
|
||||
Page rpage = BufferGetPage( rbuf );
|
||||
Size pageSize = PageGetPageSize( lpage );
|
||||
OffsetNumber i,
|
||||
maxoff,
|
||||
separator = InvalidOffsetNumber;
|
||||
Size totalsize = 0;
|
||||
Size lsize = 0,
|
||||
size;
|
||||
static char tupstore[2 * BLCKSZ];
|
||||
char *ptr;
|
||||
IndexTuple itup,
|
||||
leftrightmost = NULL;
|
||||
static ginxlogSplit data;
|
||||
Datum value;
|
||||
bool isnull;
|
||||
Page page;
|
||||
Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
|
||||
Page rpage = BufferGetPage(rbuf);
|
||||
Size pageSize = PageGetPageSize(lpage);
|
||||
|
||||
*prdata = rdata;
|
||||
data.leftChildBlkno = ( GinPageIsLeaf(lpage) ) ?
|
||||
InvalidOffsetNumber : GinItemPointerGetBlockNumber( &(btree->entry->t_tid) );
|
||||
data.updateBlkno = entryPreparePage( btree, lpage, off );
|
||||
data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
|
||||
InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid));
|
||||
data.updateBlkno = entryPreparePage(btree, lpage, off);
|
||||
|
||||
maxoff = PageGetMaxOffsetNumber(lpage);
|
||||
ptr = tupstore;
|
||||
ptr = tupstore;
|
||||
|
||||
for(i=FirstOffsetNumber; i<=maxoff; i++) {
|
||||
if ( i==off ) {
|
||||
size = MAXALIGN( IndexTupleSize(btree->entry) );
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||
{
|
||||
if (i == off)
|
||||
{
|
||||
size = MAXALIGN(IndexTupleSize(btree->entry));
|
||||
memcpy(ptr, btree->entry, size);
|
||||
ptr+=size;
|
||||
ptr += size;
|
||||
totalsize += size + sizeof(ItemIdData);
|
||||
}
|
||||
|
||||
itup = (IndexTuple)PageGetItem(lpage, PageGetItemId(lpage, i));
|
||||
size = MAXALIGN( IndexTupleSize(itup) );
|
||||
itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
|
||||
size = MAXALIGN(IndexTupleSize(itup));
|
||||
memcpy(ptr, itup, size);
|
||||
ptr+=size;
|
||||
ptr += size;
|
||||
totalsize += size + sizeof(ItemIdData);
|
||||
}
|
||||
|
||||
if ( off==maxoff+1 ) {
|
||||
size = MAXALIGN( IndexTupleSize(btree->entry) );
|
||||
if (off == maxoff + 1)
|
||||
{
|
||||
size = MAXALIGN(IndexTupleSize(btree->entry));
|
||||
memcpy(ptr, btree->entry, size);
|
||||
ptr+=size;
|
||||
ptr += size;
|
||||
totalsize += size + sizeof(ItemIdData);
|
||||
}
|
||||
|
||||
GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize );
|
||||
GinInitPage( lpage, GinPageGetOpaque(rpage)->flags, pageSize );
|
||||
GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
|
||||
GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
|
||||
|
||||
ptr = tupstore;
|
||||
maxoff++;
|
||||
maxoff++;
|
||||
lsize = 0;
|
||||
|
||||
page = lpage;
|
||||
for(i=FirstOffsetNumber; i<=maxoff; i++) {
|
||||
itup = (IndexTuple)ptr;
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||
{
|
||||
itup = (IndexTuple) ptr;
|
||||
|
||||
if ( lsize > totalsize/2 ) {
|
||||
if ( separator==InvalidOffsetNumber )
|
||||
separator = i-1;
|
||||
if (lsize > totalsize / 2)
|
||||
{
|
||||
if (separator == InvalidOffsetNumber)
|
||||
separator = i - 1;
|
||||
page = rpage;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
leftrightmost = itup;
|
||||
lsize += MAXALIGN( IndexTupleSize(itup) ) + sizeof(ItemIdData);
|
||||
lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
|
||||
}
|
||||
|
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
|
||||
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index page in \"%s\"",
|
||||
RelationGetRelationName(btree->index));
|
||||
ptr += MAXALIGN( IndexTupleSize(itup) );
|
||||
RelationGetRelationName(btree->index));
|
||||
ptr += MAXALIGN(IndexTupleSize(itup));
|
||||
}
|
||||
|
||||
|
||||
value = index_getattr(leftrightmost, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
|
||||
btree->entry = GinFormTuple( btree->ginstate, value, NULL, 0);
|
||||
ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber( lbuf ), InvalidOffsetNumber);
|
||||
btree->rightblkno = BufferGetBlockNumber( rbuf );
|
||||
|
||||
btree->entry = GinFormTuple(btree->ginstate, value, NULL, 0);
|
||||
ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber);
|
||||
btree->rightblkno = BufferGetBlockNumber(rbuf);
|
||||
|
||||
data.node = btree->index->rd_node;
|
||||
data.rootBlkno = InvalidBlockNumber;
|
||||
data.lblkno = BufferGetBlockNumber( lbuf );
|
||||
data.rblkno = BufferGetBlockNumber( rbuf );
|
||||
data.lblkno = BufferGetBlockNumber(lbuf);
|
||||
data.rblkno = BufferGetBlockNumber(rbuf);
|
||||
data.separator = separator;
|
||||
data.nitem = maxoff;
|
||||
data.isData = FALSE;
|
||||
@@ -458,23 +509,28 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
|
||||
return lpage;
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* return newly allocate rightmost tuple
|
||||
*/
|
||||
IndexTuple
|
||||
ginPageGetLinkItup(Buffer buf) {
|
||||
IndexTuple itup, nitup;
|
||||
Page page = BufferGetPage(buf);
|
||||
ginPageGetLinkItup(Buffer buf)
|
||||
{
|
||||
IndexTuple itup,
|
||||
nitup;
|
||||
Page page = BufferGetPage(buf);
|
||||
|
||||
itup = getRightMostTuple( page );
|
||||
if ( GinPageIsLeaf(page) && !GinIsPostingTree(itup) ) {
|
||||
nitup = (IndexTuple)palloc( MAXALIGN(GinGetOrigSizePosting(itup)) );
|
||||
memcpy( nitup, itup, GinGetOrigSizePosting(itup) );
|
||||
itup = getRightMostTuple(page);
|
||||
if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
|
||||
{
|
||||
nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup)));
|
||||
memcpy(nitup, itup, GinGetOrigSizePosting(itup));
|
||||
nitup->t_info &= ~INDEX_SIZE_MASK;
|
||||
nitup->t_info |= GinGetOrigSizePosting(itup);
|
||||
} else {
|
||||
nitup = (IndexTuple)palloc( MAXALIGN(IndexTupleSize(itup)) );
|
||||
memcpy( nitup, itup, IndexTupleSize(itup) );
|
||||
}
|
||||
else
|
||||
{
|
||||
nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup)));
|
||||
memcpy(nitup, itup, IndexTupleSize(itup));
|
||||
}
|
||||
|
||||
ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber);
|
||||
@@ -486,23 +542,25 @@ ginPageGetLinkItup(Buffer buf) {
|
||||
* Also called from ginxlog, should not use btree
|
||||
*/
|
||||
void
|
||||
entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) {
|
||||
Page page;
|
||||
IndexTuple itup;
|
||||
entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
|
||||
{
|
||||
Page page;
|
||||
IndexTuple itup;
|
||||
|
||||
page = BufferGetPage(root);
|
||||
|
||||
itup = ginPageGetLinkItup( lbuf );
|
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
|
||||
itup = ginPageGetLinkItup(lbuf);
|
||||
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index root page");
|
||||
|
||||
itup = ginPageGetLinkItup( rbuf );
|
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
|
||||
itup = ginPageGetLinkItup(rbuf);
|
||||
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index root page");
|
||||
}
|
||||
|
||||
void
|
||||
prepareEntryScan( GinBtree btree, Relation index, Datum value, GinState *ginstate) {
|
||||
prepareEntryScan(GinBtree btree, Relation index, Datum value, GinState *ginstate)
|
||||
{
|
||||
memset(btree, 0, sizeof(GinBtreeData));
|
||||
|
||||
btree->isMoveRight = entryIsMoveRight;
|
||||
@@ -524,4 +582,3 @@ prepareEntryScan( GinBtree btree, Relation index, Datum value, GinState *ginstat
|
||||
btree->fullScan = FALSE;
|
||||
btree->isBuild = FALSE;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginget.c
|
||||
* fetch tuples from a GIN scan.
|
||||
* fetch tuples from a GIN scan.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.2 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.3 2006/10/04 00:29:47 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -18,15 +18,17 @@
|
||||
#include "utils/memutils.h"
|
||||
|
||||
static OffsetNumber
|
||||
findItemInPage( Page page, ItemPointer item, OffsetNumber off ) {
|
||||
findItemInPage(Page page, ItemPointer item, OffsetNumber off)
|
||||
{
|
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
int res;
|
||||
int res;
|
||||
|
||||
for(; off<=maxoff; off++) {
|
||||
res = compareItemPointers( item, (ItemPointer)GinDataPageGetItem(page, off) );
|
||||
Assert( res>= 0 );
|
||||
for (; off <= maxoff; off++)
|
||||
{
|
||||
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, off));
|
||||
Assert(res >= 0);
|
||||
|
||||
if ( res == 0 )
|
||||
if (res == 0)
|
||||
return off;
|
||||
}
|
||||
|
||||
@@ -38,24 +40,29 @@ findItemInPage( Page page, ItemPointer item, OffsetNumber off ) {
|
||||
* Stop* functions unlock buffer (but don't release!)
|
||||
*/
|
||||
static void
|
||||
startScanEntry( Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall ) {
|
||||
if ( entry->master != NULL ) {
|
||||
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall)
|
||||
{
|
||||
if (entry->master != NULL)
|
||||
{
|
||||
entry->isFinished = entry->master->isFinished;
|
||||
return;
|
||||
}
|
||||
|
||||
if ( firstCall ) {
|
||||
/* at first call we should find entry, and
|
||||
begin scan of posting tree or just store posting list in memory */
|
||||
if (firstCall)
|
||||
{
|
||||
/*
|
||||
* at first call we should find entry, and begin scan of posting tree
|
||||
* or just store posting list in memory
|
||||
*/
|
||||
GinBtreeData btreeEntry;
|
||||
GinBtreeStack *stackEntry;
|
||||
Page page;
|
||||
bool needUnlock = TRUE;
|
||||
GinBtreeStack *stackEntry;
|
||||
Page page;
|
||||
bool needUnlock = TRUE;
|
||||
|
||||
prepareEntryScan( &btreeEntry, index, entry->entry, ginstate );
|
||||
prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
|
||||
btreeEntry.searchMode = TRUE;
|
||||
stackEntry = ginFindLeafPage(&btreeEntry, NULL);
|
||||
page = BufferGetPage( stackEntry->buffer );
|
||||
page = BufferGetPage(stackEntry->buffer);
|
||||
|
||||
entry->isFinished = TRUE;
|
||||
entry->buffer = InvalidBuffer;
|
||||
@@ -65,103 +72,115 @@ startScanEntry( Relation index, GinState *ginstate, GinScanEntry entry, bool fir
|
||||
entry->reduceResult = FALSE;
|
||||
entry->predictNumberResult = 0;
|
||||
|
||||
if ( btreeEntry.findItem( &btreeEntry, stackEntry ) ) {
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
|
||||
if (btreeEntry.findItem(&btreeEntry, stackEntry))
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
|
||||
|
||||
if ( GinIsPostingTree(itup) ) {
|
||||
if (GinIsPostingTree(itup))
|
||||
{
|
||||
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
||||
GinPostingTreeScan *gdi;
|
||||
Page page;
|
||||
Page page;
|
||||
|
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||
needUnlock = FALSE;
|
||||
gdi = prepareScanPostingTree( index, rootPostingTree, TRUE );
|
||||
needUnlock = FALSE;
|
||||
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
||||
|
||||
entry->buffer = scanBeginPostingTree( gdi );
|
||||
IncrBufferRefCount( entry->buffer );
|
||||
entry->buffer = scanBeginPostingTree(gdi);
|
||||
IncrBufferRefCount(entry->buffer);
|
||||
|
||||
page = BufferGetPage( entry->buffer );
|
||||
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
|
||||
page = BufferGetPage(entry->buffer);
|
||||
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
|
||||
|
||||
freeGinBtreeStack( gdi->stack );
|
||||
pfree( gdi );
|
||||
freeGinBtreeStack(gdi->stack);
|
||||
pfree(gdi);
|
||||
entry->isFinished = FALSE;
|
||||
} else if ( GinGetNPosting(itup) > 0 ) {
|
||||
}
|
||||
else if (GinGetNPosting(itup) > 0)
|
||||
{
|
||||
entry->nlist = GinGetNPosting(itup);
|
||||
entry->list = (ItemPointerData*)palloc( sizeof(ItemPointerData) * entry->nlist );
|
||||
memcpy( entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist );
|
||||
entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
|
||||
memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
|
||||
entry->isFinished = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if ( needUnlock )
|
||||
if (needUnlock)
|
||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||
freeGinBtreeStack( stackEntry );
|
||||
} else if ( entry->buffer != InvalidBuffer ) {
|
||||
freeGinBtreeStack(stackEntry);
|
||||
}
|
||||
else if (entry->buffer != InvalidBuffer)
|
||||
{
|
||||
/* we should find place were we was stopped */
|
||||
BlockNumber blkno;
|
||||
Page page;
|
||||
Page page;
|
||||
|
||||
LockBuffer( entry->buffer, GIN_SHARE );
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
|
||||
if ( !ItemPointerIsValid( &entry->curItem ) )
|
||||
if (!ItemPointerIsValid(&entry->curItem))
|
||||
/* start position */
|
||||
return;
|
||||
Assert( entry->offset!=InvalidOffsetNumber );
|
||||
Assert(entry->offset != InvalidOffsetNumber);
|
||||
|
||||
page = BufferGetPage( entry->buffer );
|
||||
page = BufferGetPage(entry->buffer);
|
||||
|
||||
/* try to find curItem in current buffer */
|
||||
if ( (entry->offset=findItemInPage(page , &entry->curItem, entry->offset))!=InvalidOffsetNumber )
|
||||
if ((entry->offset = findItemInPage(page, &entry->curItem, entry->offset)) != InvalidOffsetNumber)
|
||||
return;
|
||||
|
||||
/* walk to right */
|
||||
while( (blkno = GinPageGetOpaque( page )->rightlink)!=InvalidBlockNumber ) {
|
||||
LockBuffer( entry->buffer, GIN_UNLOCK );
|
||||
entry->buffer = ReleaseAndReadBuffer( entry->buffer, index, blkno );
|
||||
LockBuffer( entry->buffer, GIN_SHARE );
|
||||
page = BufferGetPage( entry->buffer );
|
||||
while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber)
|
||||
{
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
page = BufferGetPage(entry->buffer);
|
||||
|
||||
if ( (entry->offset=findItemInPage(page , &entry->curItem, FirstOffsetNumber))!=InvalidOffsetNumber )
|
||||
if ((entry->offset = findItemInPage(page, &entry->curItem, FirstOffsetNumber)) != InvalidOffsetNumber)
|
||||
return;
|
||||
}
|
||||
|
||||
elog(ERROR,"Logic error: lost previously founded ItemId");
|
||||
elog(ERROR, "Logic error: lost previously founded ItemId");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
stopScanEntry( GinScanEntry entry ) {
|
||||
if ( entry->buffer != InvalidBuffer )
|
||||
LockBuffer( entry->buffer, GIN_UNLOCK );
|
||||
stopScanEntry(GinScanEntry entry)
|
||||
{
|
||||
if (entry->buffer != InvalidBuffer)
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
}
|
||||
|
||||
static void
|
||||
startScanKey( Relation index, GinState *ginstate, GinScanKey key ) {
|
||||
uint32 i;
|
||||
startScanKey(Relation index, GinState *ginstate, GinScanKey key)
|
||||
{
|
||||
uint32 i;
|
||||
|
||||
for(i=0;i<key->nentries;i++)
|
||||
startScanEntry( index, ginstate, key->scanEntry+i, key->firstCall );
|
||||
|
||||
if ( key->firstCall ) {
|
||||
memset( key->entryRes, TRUE, sizeof(bool) * key->nentries );
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall);
|
||||
|
||||
if (key->firstCall)
|
||||
{
|
||||
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
|
||||
key->isFinished = FALSE;
|
||||
key->firstCall = FALSE;
|
||||
|
||||
if ( GinFuzzySearchLimit > 0 ) {
|
||||
if (GinFuzzySearchLimit > 0)
|
||||
{
|
||||
/*
|
||||
* If all of keys more than treshold we will try to reduce
|
||||
* result, we hope (and only hope, for intersection operation of array
|
||||
* our supposition isn't true), that total result will not more
|
||||
* than minimal predictNumberResult.
|
||||
* If all of keys more than treshold we will try to reduce result,
|
||||
* we hope (and only hope, for intersection operation of array our
|
||||
* supposition isn't true), that total result will not more than
|
||||
* minimal predictNumberResult.
|
||||
*/
|
||||
|
||||
for(i=0;i<key->nentries;i++)
|
||||
if ( key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit )
|
||||
return;
|
||||
|
||||
for(i=0;i<key->nentries;i++)
|
||||
if ( key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit ) {
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
if (key->scanEntry[i].predictNumberResult <= key->nentries * GinFuzzySearchLimit)
|
||||
return;
|
||||
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
if (key->scanEntry[i].predictNumberResult > key->nentries * GinFuzzySearchLimit)
|
||||
{
|
||||
key->scanEntry[i].predictNumberResult /= key->nentries;
|
||||
key->scanEntry[i].reduceResult = TRUE;
|
||||
}
|
||||
@@ -170,50 +189,60 @@ startScanKey( Relation index, GinState *ginstate, GinScanKey key ) {
|
||||
}
|
||||
|
||||
static void
|
||||
stopScanKey( GinScanKey key ) {
|
||||
uint32 i;
|
||||
stopScanKey(GinScanKey key)
|
||||
{
|
||||
uint32 i;
|
||||
|
||||
for(i=0;i<key->nentries;i++)
|
||||
stopScanEntry( key->scanEntry+i );
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
stopScanEntry(key->scanEntry + i);
|
||||
}
|
||||
|
||||
static void
|
||||
startScan( IndexScanDesc scan ) {
|
||||
uint32 i;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
startScan(IndexScanDesc scan)
|
||||
{
|
||||
uint32 i;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
|
||||
for(i=0; i<so->nkeys; i++)
|
||||
startScanKey( scan->indexRelation, &so->ginstate, so->keys + i );
|
||||
for (i = 0; i < so->nkeys; i++)
|
||||
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
|
||||
}
|
||||
|
||||
static void
|
||||
stopScan( IndexScanDesc scan ) {
|
||||
uint32 i;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
stopScan(IndexScanDesc scan)
|
||||
{
|
||||
uint32 i;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
|
||||
for(i=0; i<so->nkeys; i++)
|
||||
stopScanKey( so->keys + i );
|
||||
for (i = 0; i < so->nkeys; i++)
|
||||
stopScanKey(so->keys + i);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
entryGetNextItem( Relation index, GinScanEntry entry ) {
|
||||
Page page = BufferGetPage( entry->buffer );
|
||||
entryGetNextItem(Relation index, GinScanEntry entry)
|
||||
{
|
||||
Page page = BufferGetPage(entry->buffer);
|
||||
|
||||
entry->offset++;
|
||||
if ( entry->offset <= GinPageGetOpaque( page )->maxoff && GinPageGetOpaque( page )->maxoff >= FirstOffsetNumber ) {
|
||||
entry->curItem = *(ItemPointerData*)GinDataPageGetItem(page, entry->offset);
|
||||
} else {
|
||||
BlockNumber blkno = GinPageGetOpaque( page )->rightlink;
|
||||
|
||||
LockBuffer( entry->buffer, GIN_UNLOCK );
|
||||
if ( blkno == InvalidBlockNumber ) {
|
||||
ReleaseBuffer( entry->buffer );
|
||||
if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
|
||||
{
|
||||
entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||
if (blkno == InvalidBlockNumber)
|
||||
{
|
||||
ReleaseBuffer(entry->buffer);
|
||||
entry->buffer = InvalidBuffer;
|
||||
entry->isFinished = TRUE;
|
||||
} else {
|
||||
entry->buffer = ReleaseAndReadBuffer( entry->buffer, index, blkno );
|
||||
LockBuffer( entry->buffer, GIN_SHARE );
|
||||
}
|
||||
else
|
||||
{
|
||||
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
|
||||
LockBuffer(entry->buffer, GIN_SHARE);
|
||||
entry->offset = InvalidOffsetNumber;
|
||||
entryGetNextItem(index, entry);
|
||||
}
|
||||
@@ -221,29 +250,37 @@ entryGetNextItem( Relation index, GinScanEntry entry ) {
|
||||
}
|
||||
|
||||
#define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
|
||||
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
|
||||
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
|
||||
|
||||
/*
|
||||
* Sets entry->curItem to new found heap item pointer for one
|
||||
* Sets entry->curItem to new found heap item pointer for one
|
||||
* entry of one scan key
|
||||
*/
|
||||
static bool
|
||||
entryGetItem( Relation index, GinScanEntry entry ) {
|
||||
if ( entry->master ) {
|
||||
entryGetItem(Relation index, GinScanEntry entry)
|
||||
{
|
||||
if (entry->master)
|
||||
{
|
||||
entry->isFinished = entry->master->isFinished;
|
||||
entry->curItem = entry->master->curItem;
|
||||
} else if ( entry->list ) {
|
||||
}
|
||||
else if (entry->list)
|
||||
{
|
||||
entry->offset++;
|
||||
if ( entry->offset <= entry->nlist )
|
||||
entry->curItem = entry->list[ entry->offset - 1 ];
|
||||
else {
|
||||
ItemPointerSet( &entry->curItem, InvalidBlockNumber, InvalidOffsetNumber );
|
||||
if (entry->offset <= entry->nlist)
|
||||
entry->curItem = entry->list[entry->offset - 1];
|
||||
else
|
||||
{
|
||||
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
|
||||
entry->isFinished = TRUE;
|
||||
}
|
||||
} else {
|
||||
do {
|
||||
}
|
||||
else
|
||||
{
|
||||
do
|
||||
{
|
||||
entryGetNextItem(index, entry);
|
||||
} while ( entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry) );
|
||||
} while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
|
||||
}
|
||||
|
||||
return entry->isFinished;
|
||||
@@ -254,155 +291,180 @@ entryGetItem( Relation index, GinScanEntry entry ) {
|
||||
* returns isFinished!
|
||||
*/
|
||||
static bool
|
||||
keyGetItem( Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey key ) {
|
||||
uint32 i;
|
||||
GinScanEntry entry;
|
||||
bool res;
|
||||
MemoryContext oldCtx;
|
||||
keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
|
||||
{
|
||||
uint32 i;
|
||||
GinScanEntry entry;
|
||||
bool res;
|
||||
MemoryContext oldCtx;
|
||||
|
||||
if ( key->isFinished )
|
||||
if (key->isFinished)
|
||||
return TRUE;
|
||||
|
||||
do {
|
||||
/* move forward from previously value and set new curItem,
|
||||
which is minimal from entries->curItems */
|
||||
ItemPointerSetMax( &key->curItem );
|
||||
for(i=0;i<key->nentries;i++) {
|
||||
entry = key->scanEntry+i;
|
||||
|
||||
if ( key->entryRes[i] ) {
|
||||
if ( entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE ) {
|
||||
if (compareItemPointers( &entry->curItem, &key->curItem ) < 0)
|
||||
do
|
||||
{
|
||||
/*
|
||||
* move forward from previously value and set new curItem, which is
|
||||
* minimal from entries->curItems
|
||||
*/
|
||||
ItemPointerSetMax(&key->curItem);
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
{
|
||||
entry = key->scanEntry + i;
|
||||
|
||||
if (key->entryRes[i])
|
||||
{
|
||||
if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE)
|
||||
{
|
||||
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
|
||||
key->curItem = entry->curItem;
|
||||
} else
|
||||
}
|
||||
else
|
||||
key->entryRes[i] = FALSE;
|
||||
} else if ( entry->isFinished == FALSE ) {
|
||||
if (compareItemPointers( &entry->curItem, &key->curItem ) < 0)
|
||||
}
|
||||
else if (entry->isFinished == FALSE)
|
||||
{
|
||||
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
|
||||
key->curItem = entry->curItem;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( ItemPointerIsMax( &key->curItem ) ) {
|
||||
if (ItemPointerIsMax(&key->curItem))
|
||||
{
|
||||
/* all entries are finished */
|
||||
key->isFinished = TRUE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if ( key->nentries == 1 ) {
|
||||
|
||||
if (key->nentries == 1)
|
||||
{
|
||||
/* we can do not call consistentFn !! */
|
||||
key->entryRes[0] = TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* setting up array for consistentFn */
|
||||
for(i=0;i<key->nentries;i++) {
|
||||
entry = key->scanEntry+i;
|
||||
|
||||
if ( entry->isFinished == FALSE && compareItemPointers( &entry->curItem, &key->curItem )==0 )
|
||||
for (i = 0; i < key->nentries; i++)
|
||||
{
|
||||
entry = key->scanEntry + i;
|
||||
|
||||
if (entry->isFinished == FALSE && compareItemPointers(&entry->curItem, &key->curItem) == 0)
|
||||
key->entryRes[i] = TRUE;
|
||||
else
|
||||
key->entryRes[i] = FALSE;
|
||||
}
|
||||
|
||||
oldCtx = MemoryContextSwitchTo(tempCtx);
|
||||
res = DatumGetBool( FunctionCall3(
|
||||
&ginstate->consistentFn,
|
||||
PointerGetDatum( key->entryRes ),
|
||||
UInt16GetDatum( key->strategy ),
|
||||
key->query
|
||||
));
|
||||
res = DatumGetBool(FunctionCall3(
|
||||
&ginstate->consistentFn,
|
||||
PointerGetDatum(key->entryRes),
|
||||
UInt16GetDatum(key->strategy),
|
||||
key->query
|
||||
));
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
MemoryContextReset(tempCtx);
|
||||
} while( !res );
|
||||
|
||||
} while (!res);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get heap item pointer from scan
|
||||
* returns true if found
|
||||
* Get heap item pointer from scan
|
||||
* returns true if found
|
||||
*/
|
||||
static bool
|
||||
scanGetItem( IndexScanDesc scan, ItemPointerData *item ) {
|
||||
uint32 i;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
scanGetItem(IndexScanDesc scan, ItemPointerData *item)
|
||||
{
|
||||
uint32 i;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
|
||||
ItemPointerSetMin( item );
|
||||
for(i=0;i<so->nkeys;i++) {
|
||||
GinScanKey key = so->keys+i;
|
||||
ItemPointerSetMin(item);
|
||||
for (i = 0; i < so->nkeys; i++)
|
||||
{
|
||||
GinScanKey key = so->keys + i;
|
||||
|
||||
if ( keyGetItem( scan->indexRelation, &so->ginstate, so->tempCtx, key )==FALSE ) {
|
||||
if ( compareItemPointers( item, &key->curItem ) < 0 )
|
||||
if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, key) == FALSE)
|
||||
{
|
||||
if (compareItemPointers(item, &key->curItem) < 0)
|
||||
*item = key->curItem;
|
||||
} else
|
||||
return FALSE; /* finshed one of keys */
|
||||
}
|
||||
else
|
||||
return FALSE; /* finshed one of keys */
|
||||
}
|
||||
|
||||
for(i=1;i<=so->nkeys;i++) {
|
||||
GinScanKey key = so->keys+i-1;
|
||||
|
||||
for(;;) {
|
||||
int cmp = compareItemPointers( item, &key->curItem );
|
||||
for (i = 1; i <= so->nkeys; i++)
|
||||
{
|
||||
GinScanKey key = so->keys + i - 1;
|
||||
|
||||
if ( cmp == 0 )
|
||||
for (;;)
|
||||
{
|
||||
int cmp = compareItemPointers(item, &key->curItem);
|
||||
|
||||
if (cmp == 0)
|
||||
break;
|
||||
else if ( cmp > 0 ) {
|
||||
if ( keyGetItem( scan->indexRelation, &so->ginstate, so->tempCtx, key )==TRUE )
|
||||
return FALSE; /* finshed one of keys */
|
||||
} else { /* returns to begin */
|
||||
else if (cmp > 0)
|
||||
{
|
||||
if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, key) == TRUE)
|
||||
return FALSE; /* finshed one of keys */
|
||||
}
|
||||
else
|
||||
{ /* returns to begin */
|
||||
*item = key->curItem;
|
||||
i=0;
|
||||
i = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL )
|
||||
|
||||
Datum
|
||||
gingetmulti(PG_FUNCTION_ARGS) {
|
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
||||
Datum
|
||||
gingetmulti(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
||||
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
|
||||
int32 max_tids = PG_GETARG_INT32(2);
|
||||
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
|
||||
int32 max_tids = PG_GETARG_INT32(2);
|
||||
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
|
||||
|
||||
if ( GinIsNewKey(scan) )
|
||||
newScanKey( scan );
|
||||
if (GinIsNewKey(scan))
|
||||
newScanKey(scan);
|
||||
|
||||
startScan( scan );
|
||||
startScan(scan);
|
||||
|
||||
*returned_tids = 0;
|
||||
|
||||
do {
|
||||
if ( scanGetItem( scan, tids + *returned_tids ) )
|
||||
do
|
||||
{
|
||||
if (scanGetItem(scan, tids + *returned_tids))
|
||||
(*returned_tids)++;
|
||||
else
|
||||
break;
|
||||
} while ( *returned_tids < max_tids );
|
||||
} while (*returned_tids < max_tids);
|
||||
|
||||
stopScan( scan );
|
||||
stopScan(scan);
|
||||
|
||||
PG_RETURN_BOOL(*returned_tids == max_tids);
|
||||
}
|
||||
|
||||
Datum
|
||||
gingettuple(PG_FUNCTION_ARGS) {
|
||||
gingettuple(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
||||
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
|
||||
bool res;
|
||||
bool res;
|
||||
|
||||
if ( dir != ForwardScanDirection )
|
||||
if (dir != ForwardScanDirection)
|
||||
elog(ERROR, "Gin doesn't support other scan directions than forward");
|
||||
|
||||
if ( GinIsNewKey(scan) )
|
||||
newScanKey( scan );
|
||||
|
||||
startScan( scan );
|
||||
if (GinIsNewKey(scan))
|
||||
newScanKey(scan);
|
||||
|
||||
startScan(scan);
|
||||
res = scanGetItem(scan, &scan->xs_ctup.t_self);
|
||||
stopScan( scan );
|
||||
stopScan(scan);
|
||||
|
||||
PG_RETURN_BOOL(res);
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* gininsert.c
|
||||
* insert routines for the postgres inverted index access method.
|
||||
* insert routines for the postgres inverted index access method.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.4 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.5 2006/10/04 00:29:47 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -19,12 +19,13 @@
|
||||
#include "miscadmin.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
typedef struct {
|
||||
GinState ginstate;
|
||||
double indtuples;
|
||||
MemoryContext tmpCtx;
|
||||
MemoryContext funcCtx;
|
||||
BuildAccumulator accum;
|
||||
typedef struct
|
||||
{
|
||||
GinState ginstate;
|
||||
double indtuples;
|
||||
MemoryContext tmpCtx;
|
||||
MemoryContext funcCtx;
|
||||
BuildAccumulator accum;
|
||||
} GinBuildState;
|
||||
|
||||
/*
|
||||
@@ -32,24 +33,26 @@ typedef struct {
|
||||
* suppose that items[] fits to page
|
||||
*/
|
||||
static BlockNumber
|
||||
createPostingTree( Relation index, ItemPointerData *items, uint32 nitems ) {
|
||||
createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
Buffer buffer = GinNewBuffer(index);
|
||||
Page page;
|
||||
Buffer buffer = GinNewBuffer(index);
|
||||
Page page;
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
GinInitBuffer( buffer, GIN_DATA|GIN_LEAF );
|
||||
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
|
||||
page = BufferGetPage(buffer);
|
||||
blkno = BufferGetBlockNumber(buffer);
|
||||
|
||||
memcpy( GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems );
|
||||
memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nitems);
|
||||
GinPageGetOpaque(page)->maxoff = nitems;
|
||||
|
||||
if (!index->rd_istemp) {
|
||||
XLogRecPtr recptr;
|
||||
if (!index->rd_istemp)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[2];
|
||||
ginxlogCreatePostingTree data;
|
||||
ginxlogCreatePostingTree data;
|
||||
|
||||
data.node = index->rd_node;
|
||||
data.blkno = blkno;
|
||||
@@ -71,7 +74,7 @@ createPostingTree( Relation index, ItemPointerData *items, uint32 nitems ) {
|
||||
PageSetLSN(page, recptr);
|
||||
PageSetTLI(page, ThisTimeLineID);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
@@ -89,21 +92,25 @@ createPostingTree( Relation index, ItemPointerData *items, uint32 nitems ) {
|
||||
* GinFormTuple().
|
||||
*/
|
||||
static IndexTuple
|
||||
addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
|
||||
IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild) {
|
||||
bool isnull;
|
||||
Datum key = index_getattr(old, FirstOffsetNumber, ginstate->tupdesc, &isnull);
|
||||
IndexTuple res = GinFormTuple(ginstate, key, NULL, nitem + GinGetNPosting(old));
|
||||
addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
|
||||
IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild)
|
||||
{
|
||||
bool isnull;
|
||||
Datum key = index_getattr(old, FirstOffsetNumber, ginstate->tupdesc, &isnull);
|
||||
IndexTuple res = GinFormTuple(ginstate, key, NULL, nitem + GinGetNPosting(old));
|
||||
|
||||
if ( res ) {
|
||||
if (res)
|
||||
{
|
||||
/* good, small enough */
|
||||
MergeItemPointers( GinGetPosting(res),
|
||||
GinGetPosting(old), GinGetNPosting(old),
|
||||
items, nitem
|
||||
);
|
||||
|
||||
MergeItemPointers(GinGetPosting(res),
|
||||
GinGetPosting(old), GinGetNPosting(old),
|
||||
items, nitem
|
||||
);
|
||||
|
||||
GinSetNPosting(res, nitem + GinGetNPosting(old));
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
BlockNumber postingRoot;
|
||||
GinPostingTreeScan *gdi;
|
||||
|
||||
@@ -112,7 +119,7 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
|
||||
postingRoot = createPostingTree(index, GinGetPosting(old), GinGetNPosting(old));
|
||||
GinSetPostingTree(res, postingRoot);
|
||||
|
||||
gdi = prepareScanPostingTree(index, postingRoot, FALSE);
|
||||
gdi = prepareScanPostingTree(index, postingRoot, FALSE);
|
||||
gdi->btree.isBuild = isBuild;
|
||||
|
||||
insertItemPointer(gdi, items, nitem);
|
||||
@@ -124,36 +131,39 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
|
||||
}
|
||||
|
||||
/*
|
||||
* Inserts only one entry to the index, but it can adds more that 1
|
||||
* ItemPointer.
|
||||
* Inserts only one entry to the index, but it can adds more that 1
|
||||
* ItemPointer.
|
||||
*/
|
||||
static void
|
||||
ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild) {
|
||||
GinBtreeData btree;
|
||||
ginEntryInsert(Relation index, GinState *ginstate, Datum value, ItemPointerData *items, uint32 nitem, bool isBuild)
|
||||
{
|
||||
GinBtreeData btree;
|
||||
GinBtreeStack *stack;
|
||||
IndexTuple itup;
|
||||
Page page;
|
||||
IndexTuple itup;
|
||||
Page page;
|
||||
|
||||
prepareEntryScan( &btree, index, value, ginstate );
|
||||
prepareEntryScan(&btree, index, value, ginstate);
|
||||
|
||||
stack = ginFindLeafPage(&btree, NULL);
|
||||
page = BufferGetPage( stack->buffer );
|
||||
page = BufferGetPage(stack->buffer);
|
||||
|
||||
if ( btree.findItem( &btree, stack ) ) {
|
||||
if (btree.findItem(&btree, stack))
|
||||
{
|
||||
/* found entry */
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
|
||||
|
||||
if ( GinIsPostingTree(itup) ) {
|
||||
if (GinIsPostingTree(itup))
|
||||
{
|
||||
/* lock root of posting tree */
|
||||
GinPostingTreeScan *gdi;
|
||||
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
||||
BlockNumber rootPostingTree = GinGetPostingTree(itup);
|
||||
|
||||
/* release all stack */
|
||||
LockBuffer(stack->buffer, GIN_UNLOCK);
|
||||
freeGinBtreeStack( stack );
|
||||
freeGinBtreeStack(stack);
|
||||
|
||||
/* insert into posting tree */
|
||||
gdi = prepareScanPostingTree( index, rootPostingTree, FALSE );
|
||||
gdi = prepareScanPostingTree(index, rootPostingTree, FALSE);
|
||||
gdi->btree.isBuild = isBuild;
|
||||
insertItemPointer(gdi, items, nitem);
|
||||
|
||||
@@ -163,23 +173,26 @@ ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData
|
||||
itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild);
|
||||
|
||||
btree.isDelete = TRUE;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We suppose, that tuple can store at list one itempointer */
|
||||
itup = GinFormTuple( ginstate, value, items, 1);
|
||||
if ( itup==NULL || IndexTupleSize(itup) >= GinMaxItemSize )
|
||||
itup = GinFormTuple(ginstate, value, items, 1);
|
||||
if (itup == NULL || IndexTupleSize(itup) >= GinMaxItemSize)
|
||||
elog(ERROR, "huge tuple");
|
||||
|
||||
if ( nitem>1 ) {
|
||||
if (nitem > 1)
|
||||
{
|
||||
IndexTuple previtup = itup;
|
||||
|
||||
itup = addItemPointersToTuple(index, ginstate, stack, previtup, items+1, nitem-1, isBuild);
|
||||
itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild);
|
||||
pfree(previtup);
|
||||
}
|
||||
}
|
||||
|
||||
btree.entry = itup;
|
||||
ginInsertValue(&btree, stack);
|
||||
pfree( itup );
|
||||
pfree(itup);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -187,48 +200,53 @@ ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData
|
||||
* Function isnt use during normal insert
|
||||
*/
|
||||
static uint32
|
||||
ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr) {
|
||||
Datum *entries;
|
||||
uint32 nentries;
|
||||
ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr)
|
||||
{
|
||||
Datum *entries;
|
||||
uint32 nentries;
|
||||
MemoryContext oldCtx;
|
||||
|
||||
oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
|
||||
entries = extractEntriesSU( buildstate->accum.ginstate, value, &nentries);
|
||||
entries = extractEntriesSU(buildstate->accum.ginstate, value, &nentries);
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
if ( nentries==0 )
|
||||
if (nentries == 0)
|
||||
/* nothing to insert */
|
||||
return 0;
|
||||
|
||||
ginInsertRecordBA( &buildstate->accum, heapptr, entries, nentries);
|
||||
ginInsertRecordBA(&buildstate->accum, heapptr, entries, nentries);
|
||||
|
||||
MemoryContextReset(buildstate->funcCtx);
|
||||
|
||||
return nentries;
|
||||
}
|
||||
|
||||
static void
|
||||
static void
|
||||
ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
|
||||
bool *isnull, bool tupleIsAlive, void *state) {
|
||||
bool *isnull, bool tupleIsAlive, void *state)
|
||||
{
|
||||
|
||||
GinBuildState *buildstate = (GinBuildState*)state;
|
||||
GinBuildState *buildstate = (GinBuildState *) state;
|
||||
MemoryContext oldCtx;
|
||||
|
||||
if ( *isnull )
|
||||
if (*isnull)
|
||||
return;
|
||||
|
||||
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
|
||||
|
||||
buildstate->indtuples += ginHeapTupleBulkInsert(buildstate, *values, &htup->t_self);
|
||||
|
||||
/* we use only half maintenance_work_mem, because there is some leaks
|
||||
during insertion and extract values */
|
||||
if ( buildstate->accum.allocatedMemory >= maintenance_work_mem*1024L/2L ) {
|
||||
ItemPointerData *list;
|
||||
Datum entry;
|
||||
uint32 nlist;
|
||||
/*
|
||||
* we use only half maintenance_work_mem, because there is some leaks
|
||||
* during insertion and extract values
|
||||
*/
|
||||
if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L / 2L)
|
||||
{
|
||||
ItemPointerData *list;
|
||||
Datum entry;
|
||||
uint32 nlist;
|
||||
|
||||
while( (list=ginGetEntry(&buildstate->accum, &entry, &nlist)) != NULL )
|
||||
while ((list = ginGetEntry(&buildstate->accum, &entry, &nlist)) != NULL)
|
||||
ginEntryInsert(index, &buildstate->ginstate, entry, list, nlist, TRUE);
|
||||
|
||||
MemoryContextReset(buildstate->tmpCtx);
|
||||
@@ -239,22 +257,23 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
|
||||
}
|
||||
|
||||
Datum
|
||||
ginbuild(PG_FUNCTION_ARGS) {
|
||||
Relation heap = (Relation) PG_GETARG_POINTER(0);
|
||||
Relation index = (Relation) PG_GETARG_POINTER(1);
|
||||
ginbuild(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Relation heap = (Relation) PG_GETARG_POINTER(0);
|
||||
Relation index = (Relation) PG_GETARG_POINTER(1);
|
||||
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
|
||||
IndexBuildResult *result;
|
||||
double reltuples;
|
||||
GinBuildState buildstate;
|
||||
double reltuples;
|
||||
GinBuildState buildstate;
|
||||
Buffer buffer;
|
||||
ItemPointerData *list;
|
||||
Datum entry;
|
||||
uint32 nlist;
|
||||
ItemPointerData *list;
|
||||
Datum entry;
|
||||
uint32 nlist;
|
||||
MemoryContext oldCtx;
|
||||
|
||||
if (RelationGetNumberOfBlocks(index) != 0)
|
||||
elog(ERROR, "index \"%s\" already contains data",
|
||||
RelationGetRelationName(index));
|
||||
RelationGetRelationName(index));
|
||||
|
||||
initGinState(&buildstate.ginstate, index);
|
||||
|
||||
@@ -262,10 +281,11 @@ ginbuild(PG_FUNCTION_ARGS) {
|
||||
buffer = GinNewBuffer(index);
|
||||
START_CRIT_SECTION();
|
||||
GinInitBuffer(buffer, GIN_LEAF);
|
||||
if (!index->rd_istemp) {
|
||||
XLogRecPtr recptr;
|
||||
if (!index->rd_istemp)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata;
|
||||
Page page;
|
||||
Page page;
|
||||
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.data = (char *) &(index->rd_node);
|
||||
@@ -279,7 +299,7 @@ ginbuild(PG_FUNCTION_ARGS) {
|
||||
PageSetLSN(page, recptr);
|
||||
PageSetTLI(page, ThisTimeLineID);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
@@ -293,26 +313,26 @@ ginbuild(PG_FUNCTION_ARGS) {
|
||||
* inserted into the index
|
||||
*/
|
||||
buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"Gin build temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
"Gin build temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
|
||||
buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx,
|
||||
"Gin build temporary context for user-defined function",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
"Gin build temporary context for user-defined function",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
|
||||
buildstate.accum.ginstate = &buildstate.ginstate;
|
||||
ginInitBA( &buildstate.accum );
|
||||
ginInitBA(&buildstate.accum);
|
||||
|
||||
/* do the heap scan */
|
||||
reltuples = IndexBuildHeapScan(heap, index, indexInfo,
|
||||
ginBuildCallback, (void *) &buildstate);
|
||||
ginBuildCallback, (void *) &buildstate);
|
||||
|
||||
oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
|
||||
while( (list=ginGetEntry(&buildstate.accum, &entry, &nlist)) != NULL )
|
||||
while ((list = ginGetEntry(&buildstate.accum, &entry, &nlist)) != NULL)
|
||||
ginEntryInsert(index, &buildstate.ginstate, entry, list, nlist, TRUE);
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
@@ -333,55 +353,58 @@ ginbuild(PG_FUNCTION_ARGS) {
|
||||
* Inserts value during normal insertion
|
||||
*/
|
||||
static uint32
|
||||
ginHeapTupleInsert( Relation index, GinState *ginstate, Datum value, ItemPointer item) {
|
||||
Datum *entries;
|
||||
uint32 i,nentries;
|
||||
ginHeapTupleInsert(Relation index, GinState *ginstate, Datum value, ItemPointer item)
|
||||
{
|
||||
Datum *entries;
|
||||
uint32 i,
|
||||
nentries;
|
||||
|
||||
entries = extractEntriesSU( ginstate, value, &nentries);
|
||||
entries = extractEntriesSU(ginstate, value, &nentries);
|
||||
|
||||
if ( nentries==0 )
|
||||
if (nentries == 0)
|
||||
/* nothing to insert */
|
||||
return 0;
|
||||
|
||||
for(i=0;i<nentries;i++)
|
||||
for (i = 0; i < nentries; i++)
|
||||
ginEntryInsert(index, ginstate, entries[i], item, 1, FALSE);
|
||||
|
||||
return nentries;
|
||||
}
|
||||
|
||||
Datum
|
||||
gininsert(PG_FUNCTION_ARGS) {
|
||||
Relation index = (Relation) PG_GETARG_POINTER(0);
|
||||
Datum *values = (Datum *) PG_GETARG_POINTER(1);
|
||||
bool *isnull = (bool *) PG_GETARG_POINTER(2);
|
||||
gininsert(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Relation index = (Relation) PG_GETARG_POINTER(0);
|
||||
Datum *values = (Datum *) PG_GETARG_POINTER(1);
|
||||
bool *isnull = (bool *) PG_GETARG_POINTER(2);
|
||||
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
||||
|
||||
#ifdef NOT_USED
|
||||
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
||||
bool checkUnique = PG_GETARG_BOOL(5);
|
||||
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
||||
bool checkUnique = PG_GETARG_BOOL(5);
|
||||
#endif
|
||||
GinState ginstate;
|
||||
GinState ginstate;
|
||||
MemoryContext oldCtx;
|
||||
MemoryContext insertCtx;
|
||||
uint32 res;
|
||||
uint32 res;
|
||||
|
||||
if ( *isnull )
|
||||
if (*isnull)
|
||||
PG_RETURN_BOOL(false);
|
||||
|
||||
insertCtx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"Gin insert temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
"Gin insert temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
|
||||
oldCtx = MemoryContextSwitchTo(insertCtx);
|
||||
|
||||
initGinState(&ginstate, index);
|
||||
|
||||
res = ginHeapTupleInsert(index, &ginstate, *values, ht_ctid);
|
||||
res = ginHeapTupleInsert(index, &ginstate, *values, ht_ctid);
|
||||
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
MemoryContextDelete(insertCtx);
|
||||
|
||||
PG_RETURN_BOOL(res>0);
|
||||
PG_RETURN_BOOL(res > 0);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginscan.c
|
||||
* routines to manage scans inverted index relations
|
||||
* routines to manage scans inverted index relations
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.5 2006/09/14 11:26:49 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.6 2006/10/04 00:29:48 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -19,11 +19,12 @@
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
Datum
|
||||
ginbeginscan(PG_FUNCTION_ARGS) {
|
||||
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
||||
int keysz = PG_GETARG_INT32(1);
|
||||
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2);
|
||||
Datum
|
||||
ginbeginscan(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
||||
int keysz = PG_GETARG_INT32(1);
|
||||
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2);
|
||||
IndexScanDesc scan;
|
||||
|
||||
scan = RelationGetIndexScan(rel, keysz, scankey);
|
||||
@@ -32,22 +33,25 @@ ginbeginscan(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
|
||||
static void
|
||||
fillScanKey( GinState *ginstate, GinScanKey key, Datum query,
|
||||
Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy ) {
|
||||
uint32 i,j;
|
||||
fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
|
||||
Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy)
|
||||
{
|
||||
uint32 i,
|
||||
j;
|
||||
|
||||
key->nentries = nEntryValues;
|
||||
key->entryRes = (bool*)palloc0( sizeof(bool) * nEntryValues );
|
||||
key->scanEntry = (GinScanEntry) palloc( sizeof(GinScanEntryData) * nEntryValues );
|
||||
key->entryRes = (bool *) palloc0(sizeof(bool) * nEntryValues);
|
||||
key->scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData) * nEntryValues);
|
||||
key->strategy = strategy;
|
||||
key->query = query;
|
||||
key->firstCall= TRUE;
|
||||
ItemPointerSet( &(key->curItem), InvalidBlockNumber, InvalidOffsetNumber );
|
||||
key->firstCall = TRUE;
|
||||
ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber);
|
||||
|
||||
for(i=0; i<nEntryValues; i++) {
|
||||
for (i = 0; i < nEntryValues; i++)
|
||||
{
|
||||
key->scanEntry[i].pval = key->entryRes + i;
|
||||
key->scanEntry[i].entry = entryValues[i];
|
||||
ItemPointerSet( &(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber );
|
||||
ItemPointerSet(&(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber);
|
||||
key->scanEntry[i].offset = InvalidOffsetNumber;
|
||||
key->scanEntry[i].buffer = InvalidBuffer;
|
||||
key->scanEntry[i].list = NULL;
|
||||
@@ -55,8 +59,9 @@ fillScanKey( GinState *ginstate, GinScanKey key, Datum query,
|
||||
|
||||
/* link to the equals entry in current scan key */
|
||||
key->scanEntry[i].master = NULL;
|
||||
for( j=0; j<i; j++)
|
||||
if ( compareEntries( ginstate, entryValues[i], entryValues[j] ) == 0 ) {
|
||||
for (j = 0; j < i; j++)
|
||||
if (compareEntries(ginstate, entryValues[i], entryValues[j]) == 0)
|
||||
{
|
||||
key->scanEntry[i].master = key->scanEntry + j;
|
||||
break;
|
||||
}
|
||||
@@ -66,23 +71,27 @@ fillScanKey( GinState *ginstate, GinScanKey key, Datum query,
|
||||
#ifdef NOT_USED
|
||||
|
||||
static void
|
||||
resetScanKeys(GinScanKey keys, uint32 nkeys) {
|
||||
uint32 i, j;
|
||||
resetScanKeys(GinScanKey keys, uint32 nkeys)
|
||||
{
|
||||
uint32 i,
|
||||
j;
|
||||
|
||||
if ( keys == NULL )
|
||||
if (keys == NULL)
|
||||
return;
|
||||
|
||||
for(i=0;i<nkeys;i++) {
|
||||
GinScanKey key = keys + i;
|
||||
for (i = 0; i < nkeys; i++)
|
||||
{
|
||||
GinScanKey key = keys + i;
|
||||
|
||||
key->firstCall = TRUE;
|
||||
ItemPointerSet( &(key->curItem), InvalidBlockNumber, InvalidOffsetNumber );
|
||||
ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber);
|
||||
|
||||
for(j=0;j<key->nentries;j++) {
|
||||
if ( key->scanEntry[j].buffer != InvalidBuffer )
|
||||
ReleaseBuffer( key->scanEntry[i].buffer );
|
||||
for (j = 0; j < key->nentries; j++)
|
||||
{
|
||||
if (key->scanEntry[j].buffer != InvalidBuffer)
|
||||
ReleaseBuffer(key->scanEntry[i].buffer);
|
||||
|
||||
ItemPointerSet( &(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber );
|
||||
ItemPointerSet(&(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber);
|
||||
key->scanEntry[j].offset = InvalidOffsetNumber;
|
||||
key->scanEntry[j].buffer = InvalidBuffer;
|
||||
key->scanEntry[j].list = NULL;
|
||||
@@ -90,111 +99,121 @@ resetScanKeys(GinScanKey keys, uint32 nkeys) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes) {
|
||||
uint32 i, j;
|
||||
freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes)
|
||||
{
|
||||
uint32 i,
|
||||
j;
|
||||
|
||||
if ( keys == NULL )
|
||||
if (keys == NULL)
|
||||
return;
|
||||
|
||||
for(i=0;i<nkeys;i++) {
|
||||
GinScanKey key = keys + i;
|
||||
for (i = 0; i < nkeys; i++)
|
||||
{
|
||||
GinScanKey key = keys + i;
|
||||
|
||||
for(j=0;j<key->nentries;j++) {
|
||||
if ( key->scanEntry[j].buffer != InvalidBuffer )
|
||||
ReleaseBuffer( key->scanEntry[j].buffer );
|
||||
if ( removeRes && key->scanEntry[j].list )
|
||||
for (j = 0; j < key->nentries; j++)
|
||||
{
|
||||
if (key->scanEntry[j].buffer != InvalidBuffer)
|
||||
ReleaseBuffer(key->scanEntry[j].buffer);
|
||||
if (removeRes && key->scanEntry[j].list)
|
||||
pfree(key->scanEntry[j].list);
|
||||
}
|
||||
|
||||
if ( removeRes )
|
||||
if (removeRes)
|
||||
pfree(key->entryRes);
|
||||
pfree(key->scanEntry);
|
||||
}
|
||||
|
||||
|
||||
pfree(keys);
|
||||
}
|
||||
|
||||
void
|
||||
newScanKey( IndexScanDesc scan ) {
|
||||
ScanKey scankey = scan->keyData;
|
||||
newScanKey(IndexScanDesc scan)
|
||||
{
|
||||
ScanKey scankey = scan->keyData;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
int i;
|
||||
uint32 nkeys = 0;
|
||||
int i;
|
||||
uint32 nkeys = 0;
|
||||
|
||||
so->keys = (GinScanKey) palloc( scan->numberOfKeys * sizeof(GinScanKeyData) );
|
||||
so->keys = (GinScanKey) palloc(scan->numberOfKeys * sizeof(GinScanKeyData));
|
||||
|
||||
if (scan->numberOfKeys < 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("GIN indexes do not support whole-index scans")));
|
||||
errmsg("GIN indexes do not support whole-index scans")));
|
||||
|
||||
for(i=0; i<scan->numberOfKeys; i++) {
|
||||
Datum* entryValues;
|
||||
uint32 nEntryValues;
|
||||
for (i = 0; i < scan->numberOfKeys; i++)
|
||||
{
|
||||
Datum *entryValues;
|
||||
uint32 nEntryValues;
|
||||
|
||||
if ( scankey[i].sk_flags & SK_ISNULL )
|
||||
if (scankey[i].sk_flags & SK_ISNULL)
|
||||
elog(ERROR, "Gin doesn't support NULL as scan key");
|
||||
Assert( scankey[i].sk_attno == 1 );
|
||||
Assert(scankey[i].sk_attno == 1);
|
||||
|
||||
entryValues = (Datum*)DatumGetPointer(
|
||||
FunctionCall3(
|
||||
&so->ginstate.extractQueryFn,
|
||||
scankey[i].sk_argument,
|
||||
PointerGetDatum( &nEntryValues ),
|
||||
UInt16GetDatum(scankey[i].sk_strategy)
|
||||
)
|
||||
);
|
||||
if ( entryValues==NULL || nEntryValues == 0 )
|
||||
entryValues = (Datum *) DatumGetPointer(
|
||||
FunctionCall3(
|
||||
&so->ginstate.extractQueryFn,
|
||||
scankey[i].sk_argument,
|
||||
PointerGetDatum(&nEntryValues),
|
||||
UInt16GetDatum(scankey[i].sk_strategy)
|
||||
)
|
||||
);
|
||||
if (entryValues == NULL || nEntryValues == 0)
|
||||
/* full scan... */
|
||||
continue;
|
||||
|
||||
fillScanKey( &so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
|
||||
entryValues, nEntryValues, scankey[i].sk_strategy );
|
||||
fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
|
||||
entryValues, nEntryValues, scankey[i].sk_strategy);
|
||||
nkeys++;
|
||||
}
|
||||
|
||||
so->nkeys = nkeys;
|
||||
|
||||
if ( so->nkeys == 0 )
|
||||
if (so->nkeys == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("GIN index doesn't support search with void query")));
|
||||
errmsg("GIN index doesn't support search with void query")));
|
||||
|
||||
pgstat_count_index_scan(&scan->xs_pgstat_info);
|
||||
}
|
||||
|
||||
Datum
|
||||
ginrescan(PG_FUNCTION_ARGS) {
|
||||
ginrescan(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
||||
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1);
|
||||
GinScanOpaque so;
|
||||
ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1);
|
||||
GinScanOpaque so;
|
||||
|
||||
so = (GinScanOpaque) scan->opaque;
|
||||
|
||||
if ( so == NULL ) {
|
||||
if (so == NULL)
|
||||
{
|
||||
/* if called from ginbeginscan */
|
||||
so = (GinScanOpaque)palloc( sizeof(GinScanOpaqueData) );
|
||||
so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData));
|
||||
so->tempCtx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"Gin scan temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
"Gin scan temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
initGinState(&so->ginstate, scan->indexRelation);
|
||||
scan->opaque = so;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
freeScanKeys(so->keys, so->nkeys, TRUE);
|
||||
freeScanKeys(so->markPos, so->nkeys, FALSE);
|
||||
}
|
||||
|
||||
so->markPos=so->keys=NULL;
|
||||
so->markPos = so->keys = NULL;
|
||||
|
||||
if ( scankey && scan->numberOfKeys > 0 ) {
|
||||
if (scankey && scan->numberOfKeys > 0)
|
||||
{
|
||||
memmove(scan->keyData, scankey,
|
||||
scan->numberOfKeys * sizeof(ScanKeyData));
|
||||
scan->numberOfKeys * sizeof(ScanKeyData));
|
||||
}
|
||||
|
||||
PG_RETURN_VOID();
|
||||
@@ -202,13 +221,15 @@ ginrescan(PG_FUNCTION_ARGS) {
|
||||
|
||||
|
||||
Datum
|
||||
ginendscan(PG_FUNCTION_ARGS) {
|
||||
ginendscan(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
|
||||
if ( so != NULL ) {
|
||||
freeScanKeys(so->keys, so->nkeys, TRUE);
|
||||
freeScanKeys(so->markPos, so->nkeys, FALSE);
|
||||
if (so != NULL)
|
||||
{
|
||||
freeScanKeys(so->keys, so->nkeys, TRUE);
|
||||
freeScanKeys(so->markPos, so->nkeys, FALSE);
|
||||
|
||||
MemoryContextDelete(so->tempCtx);
|
||||
|
||||
@@ -219,22 +240,28 @@ ginendscan(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
|
||||
static GinScanKey
|
||||
copyScanKeys( GinScanKey keys, uint32 nkeys ) {
|
||||
copyScanKeys(GinScanKey keys, uint32 nkeys)
|
||||
{
|
||||
GinScanKey newkeys;
|
||||
uint32 i, j;
|
||||
uint32 i,
|
||||
j;
|
||||
|
||||
newkeys = (GinScanKey)palloc( sizeof(GinScanKeyData) * nkeys );
|
||||
memcpy( newkeys, keys, sizeof(GinScanKeyData) * nkeys );
|
||||
newkeys = (GinScanKey) palloc(sizeof(GinScanKeyData) * nkeys);
|
||||
memcpy(newkeys, keys, sizeof(GinScanKeyData) * nkeys);
|
||||
|
||||
for(i=0;i<nkeys;i++) {
|
||||
newkeys[i].scanEntry = (GinScanEntry)palloc(sizeof(GinScanEntryData) * keys[i].nentries );
|
||||
memcpy( newkeys[i].scanEntry, keys[i].scanEntry, sizeof(GinScanEntryData) * keys[i].nentries );
|
||||
for (i = 0; i < nkeys; i++)
|
||||
{
|
||||
newkeys[i].scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData) * keys[i].nentries);
|
||||
memcpy(newkeys[i].scanEntry, keys[i].scanEntry, sizeof(GinScanEntryData) * keys[i].nentries);
|
||||
|
||||
for (j = 0; j < keys[i].nentries; j++)
|
||||
{
|
||||
if (keys[i].scanEntry[j].buffer != InvalidBuffer)
|
||||
IncrBufferRefCount(keys[i].scanEntry[j].buffer);
|
||||
if (keys[i].scanEntry[j].master)
|
||||
{
|
||||
int masterN = keys[i].scanEntry[j].master - keys[i].scanEntry;
|
||||
|
||||
for(j=0;j<keys[i].nentries; j++) {
|
||||
if ( keys[i].scanEntry[j].buffer != InvalidBuffer )
|
||||
IncrBufferRefCount( keys[i].scanEntry[j].buffer );
|
||||
if ( keys[i].scanEntry[j].master ) {
|
||||
int masterN = keys[i].scanEntry[j].master - keys[i].scanEntry;
|
||||
newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN;
|
||||
}
|
||||
}
|
||||
@@ -243,24 +270,26 @@ copyScanKeys( GinScanKey keys, uint32 nkeys ) {
|
||||
return newkeys;
|
||||
}
|
||||
|
||||
Datum
|
||||
ginmarkpos(PG_FUNCTION_ARGS) {
|
||||
Datum
|
||||
ginmarkpos(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
|
||||
freeScanKeys(so->markPos, so->nkeys, FALSE);
|
||||
so->markPos = copyScanKeys( so->keys, so->nkeys );
|
||||
so->markPos = copyScanKeys(so->keys, so->nkeys);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
Datum
|
||||
ginrestrpos(PG_FUNCTION_ARGS) {
|
||||
Datum
|
||||
ginrestrpos(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
||||
|
||||
freeScanKeys(so->keys, so->nkeys, FALSE);
|
||||
so->keys = copyScanKeys( so->markPos, so->nkeys );
|
||||
so->keys = copyScanKeys(so->markPos, so->nkeys);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginutil.c
|
||||
* utilities routines for the postgres inverted index access method.
|
||||
* utilities routines for the postgres inverted index access method.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.6 2006/09/05 18:25:10 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.7 2006/10/04 00:29:48 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -19,26 +19,27 @@
|
||||
#include "access/reloptions.h"
|
||||
#include "storage/freespace.h"
|
||||
|
||||
void
|
||||
initGinState( GinState *state, Relation index ) {
|
||||
if ( index->rd_att->natts != 1 )
|
||||
elog(ERROR, "numberOfAttributes %d != 1",
|
||||
index->rd_att->natts);
|
||||
|
||||
void
|
||||
initGinState(GinState *state, Relation index)
|
||||
{
|
||||
if (index->rd_att->natts != 1)
|
||||
elog(ERROR, "numberOfAttributes %d != 1",
|
||||
index->rd_att->natts);
|
||||
|
||||
state->tupdesc = index->rd_att;
|
||||
|
||||
fmgr_info_copy(&(state->compareFn),
|
||||
index_getprocinfo(index, 1, GIN_COMPARE_PROC),
|
||||
CurrentMemoryContext);
|
||||
index_getprocinfo(index, 1, GIN_COMPARE_PROC),
|
||||
CurrentMemoryContext);
|
||||
fmgr_info_copy(&(state->extractValueFn),
|
||||
index_getprocinfo(index, 1, GIN_EXTRACTVALUE_PROC),
|
||||
CurrentMemoryContext);
|
||||
index_getprocinfo(index, 1, GIN_EXTRACTVALUE_PROC),
|
||||
CurrentMemoryContext);
|
||||
fmgr_info_copy(&(state->extractQueryFn),
|
||||
index_getprocinfo(index, 1, GIN_EXTRACTQUERY_PROC),
|
||||
CurrentMemoryContext);
|
||||
index_getprocinfo(index, 1, GIN_EXTRACTQUERY_PROC),
|
||||
CurrentMemoryContext);
|
||||
fmgr_info_copy(&(state->consistentFn),
|
||||
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
|
||||
CurrentMemoryContext);
|
||||
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
|
||||
CurrentMemoryContext);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -48,13 +49,16 @@ initGinState( GinState *state, Relation index ) {
|
||||
*/
|
||||
|
||||
Buffer
|
||||
GinNewBuffer(Relation index) {
|
||||
Buffer buffer;
|
||||
bool needLock;
|
||||
GinNewBuffer(Relation index)
|
||||
{
|
||||
Buffer buffer;
|
||||
bool needLock;
|
||||
|
||||
/* First, try to get a page from FSM */
|
||||
for(;;) {
|
||||
for (;;)
|
||||
{
|
||||
BlockNumber blkno = GetFreeIndexPage(&index->rd_node);
|
||||
|
||||
if (blkno == InvalidBlockNumber)
|
||||
break;
|
||||
|
||||
@@ -64,14 +68,15 @@ GinNewBuffer(Relation index) {
|
||||
* We have to guard against the possibility that someone else already
|
||||
* recycled this page; the buffer may be locked if so.
|
||||
*/
|
||||
if (ConditionalLockBuffer(buffer)) {
|
||||
Page page = BufferGetPage(buffer);
|
||||
if (ConditionalLockBuffer(buffer))
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
|
||||
if (PageIsNew(page))
|
||||
return buffer; /* OK to use, if never initialized */
|
||||
return buffer; /* OK to use, if never initialized */
|
||||
|
||||
if (GinPageIsDeleted(page))
|
||||
return buffer; /* OK to use */
|
||||
return buffer; /* OK to use */
|
||||
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
}
|
||||
@@ -95,36 +100,39 @@ GinNewBuffer(Relation index) {
|
||||
}
|
||||
|
||||
void
|
||||
GinInitPage(Page page, uint32 f, Size pageSize) {
|
||||
GinInitPage(Page page, uint32 f, Size pageSize)
|
||||
{
|
||||
GinPageOpaque opaque;
|
||||
|
||||
PageInit(page, pageSize, sizeof(GinPageOpaqueData));
|
||||
|
||||
opaque = GinPageGetOpaque(page);
|
||||
memset( opaque, 0, sizeof(GinPageOpaqueData) );
|
||||
opaque->flags = f;
|
||||
memset(opaque, 0, sizeof(GinPageOpaqueData));
|
||||
opaque->flags = f;
|
||||
opaque->rightlink = InvalidBlockNumber;
|
||||
}
|
||||
|
||||
void
|
||||
GinInitBuffer(Buffer b, uint32 f) {
|
||||
GinInitPage( BufferGetPage(b), f, BufferGetPageSize(b) );
|
||||
GinInitBuffer(Buffer b, uint32 f)
|
||||
{
|
||||
GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
|
||||
}
|
||||
|
||||
int
|
||||
compareEntries(GinState *ginstate, Datum a, Datum b) {
|
||||
compareEntries(GinState *ginstate, Datum a, Datum b)
|
||||
{
|
||||
return DatumGetInt32(
|
||||
FunctionCall2(
|
||||
&ginstate->compareFn,
|
||||
a, b
|
||||
)
|
||||
FunctionCall2(
|
||||
&ginstate->compareFn,
|
||||
a, b
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
static FmgrInfo* cmpDatumPtr=NULL;
|
||||
static FmgrInfo *cmpDatumPtr = NULL;
|
||||
|
||||
#if defined(__INTEL_COMPILER) && (defined(__ia64__) || defined(__ia64))
|
||||
/*
|
||||
#if defined(__INTEL_COMPILER) && (defined(__ia64__) || defined(__ia64))
|
||||
/*
|
||||
* Intel Compiler on Intel Itanium with -O2 has a bug around
|
||||
* change static variable by user function called from
|
||||
* libc func: it doesn't change. So mark it as volatile.
|
||||
@@ -132,7 +140,7 @@ static FmgrInfo* cmpDatumPtr=NULL;
|
||||
* It's a pity, but it's impossible to define optimization
|
||||
* level here.
|
||||
*/
|
||||
#define VOLATILE volatile
|
||||
#define VOLATILE volatile
|
||||
#else
|
||||
#define VOLATILE
|
||||
#endif
|
||||
@@ -140,57 +148,64 @@ static FmgrInfo* cmpDatumPtr=NULL;
|
||||
static bool VOLATILE needUnique = FALSE;
|
||||
|
||||
static int
|
||||
cmpEntries(const void * a, const void * b) {
|
||||
int res = DatumGetInt32(
|
||||
FunctionCall2(
|
||||
cmpDatumPtr,
|
||||
*(Datum*)a,
|
||||
*(Datum*)b
|
||||
)
|
||||
cmpEntries(const void *a, const void *b)
|
||||
{
|
||||
int res = DatumGetInt32(
|
||||
FunctionCall2(
|
||||
cmpDatumPtr,
|
||||
*(Datum *) a,
|
||||
*(Datum *) b
|
||||
)
|
||||
);
|
||||
|
||||
if ( res == 0 )
|
||||
if (res == 0)
|
||||
needUnique = TRUE;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Datum*
|
||||
extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries) {
|
||||
Datum *entries;
|
||||
Datum *
|
||||
extractEntriesS(GinState *ginstate, Datum value, uint32 *nentries)
|
||||
{
|
||||
Datum *entries;
|
||||
|
||||
entries = (Datum*)DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&ginstate->extractValueFn,
|
||||
value,
|
||||
PointerGetDatum( nentries )
|
||||
)
|
||||
);
|
||||
entries = (Datum *) DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&ginstate->extractValueFn,
|
||||
value,
|
||||
PointerGetDatum(nentries)
|
||||
)
|
||||
);
|
||||
|
||||
if ( entries == NULL )
|
||||
if (entries == NULL)
|
||||
*nentries = 0;
|
||||
|
||||
if ( *nentries > 1 ) {
|
||||
if (*nentries > 1)
|
||||
{
|
||||
cmpDatumPtr = &ginstate->compareFn;
|
||||
needUnique = FALSE;
|
||||
qsort(entries, *nentries, sizeof(Datum), cmpEntries);
|
||||
qsort(entries, *nentries, sizeof(Datum), cmpEntries);
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
||||
Datum*
|
||||
extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries) {
|
||||
Datum *entries = extractEntriesS(ginstate, value, nentries);
|
||||
Datum *
|
||||
extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries)
|
||||
{
|
||||
Datum *entries = extractEntriesS(ginstate, value, nentries);
|
||||
|
||||
if ( *nentries>1 && needUnique ) {
|
||||
Datum *ptr, *res;
|
||||
if (*nentries > 1 && needUnique)
|
||||
{
|
||||
Datum *ptr,
|
||||
*res;
|
||||
|
||||
ptr = res = entries;
|
||||
|
||||
while( ptr - entries < *nentries ) {
|
||||
if ( compareEntries(ginstate, *ptr, *res ) != 0 )
|
||||
while (ptr - entries < *nentries)
|
||||
{
|
||||
if (compareEntries(ginstate, *ptr, *res) != 0)
|
||||
*(++res) = *ptr++;
|
||||
else
|
||||
ptr++;
|
||||
@@ -206,13 +221,14 @@ extractEntriesSU(GinState *ginstate, Datum value, uint32 *nentries) {
|
||||
* It's analog of PageGetTempPage(), but copies whole page
|
||||
*/
|
||||
Page
|
||||
GinPageGetCopyPage( Page page ) {
|
||||
Size pageSize = PageGetPageSize( page );
|
||||
Page tmppage;
|
||||
GinPageGetCopyPage(Page page)
|
||||
{
|
||||
Size pageSize = PageGetPageSize(page);
|
||||
Page tmppage;
|
||||
|
||||
tmppage = (Page) palloc(pageSize);
|
||||
memcpy(tmppage, page, pageSize);
|
||||
|
||||
tmppage=(Page)palloc( pageSize );
|
||||
memcpy( tmppage, page, pageSize );
|
||||
|
||||
return tmppage;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* ginvacuum.c
|
||||
* delete & vacuum routines for the postgres GIN
|
||||
* delete & vacuum routines for the postgres GIN
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.6 2006/09/21 20:31:21 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.7 2006/10/04 00:29:48 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
@@ -21,42 +21,50 @@
|
||||
#include "storage/freespace.h"
|
||||
#include "commands/vacuum.h"
|
||||
|
||||
typedef struct {
|
||||
Relation index;
|
||||
IndexBulkDeleteResult *result;
|
||||
IndexBulkDeleteCallback callback;
|
||||
void *callback_state;
|
||||
GinState ginstate;
|
||||
typedef struct
|
||||
{
|
||||
Relation index;
|
||||
IndexBulkDeleteResult *result;
|
||||
IndexBulkDeleteCallback callback;
|
||||
void *callback_state;
|
||||
GinState ginstate;
|
||||
} GinVacuumState;
|
||||
|
||||
|
||||
/*
|
||||
* Cleans array of ItemPointer (removes dead pointers)
|
||||
* Results are always stored in *cleaned, which will be allocated
|
||||
* if its needed. In case of *cleaned!=NULL caller is resposible to
|
||||
* if its needed. In case of *cleaned!=NULL caller is resposible to
|
||||
* enough space. *cleaned and items may point to the same
|
||||
* memory addres.
|
||||
*/
|
||||
|
||||
static uint32
|
||||
ginVacuumPostingList( GinVacuumState *gvs, ItemPointerData *items, uint32 nitem, ItemPointerData **cleaned ) {
|
||||
uint32 i,j=0;
|
||||
ginVacuumPostingList(GinVacuumState *gvs, ItemPointerData *items, uint32 nitem, ItemPointerData **cleaned)
|
||||
{
|
||||
uint32 i,
|
||||
j = 0;
|
||||
|
||||
/*
|
||||
* just scan over ItemPointer array
|
||||
*/
|
||||
|
||||
for(i=0;i<nitem;i++) {
|
||||
if ( gvs->callback(items+i, gvs->callback_state) ) {
|
||||
for (i = 0; i < nitem; i++)
|
||||
{
|
||||
if (gvs->callback(items + i, gvs->callback_state))
|
||||
{
|
||||
gvs->result->tuples_removed += 1;
|
||||
if ( !*cleaned ) {
|
||||
*cleaned = (ItemPointerData*)palloc(sizeof(ItemPointerData)*nitem);
|
||||
if ( i!=0 )
|
||||
memcpy( *cleaned, items, sizeof(ItemPointerData)*i);
|
||||
if (!*cleaned)
|
||||
{
|
||||
*cleaned = (ItemPointerData *) palloc(sizeof(ItemPointerData) * nitem);
|
||||
if (i != 0)
|
||||
memcpy(*cleaned, items, sizeof(ItemPointerData) * i);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
gvs->result->num_index_tuples += 1;
|
||||
if (i!=j)
|
||||
if (i != j)
|
||||
(*cleaned)[j] = items[i];
|
||||
j++;
|
||||
}
|
||||
@@ -69,56 +77,65 @@ ginVacuumPostingList( GinVacuumState *gvs, ItemPointerData *items, uint32 nitem,
|
||||
* fills WAL record for vacuum leaf page
|
||||
*/
|
||||
static void
|
||||
xlogVacuumPage(Relation index, Buffer buffer) {
|
||||
Page page = BufferGetPage( buffer );
|
||||
XLogRecPtr recptr;
|
||||
xlogVacuumPage(Relation index, Buffer buffer)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[3];
|
||||
ginxlogVacuumPage data;
|
||||
char *backup;
|
||||
char itups[BLCKSZ];
|
||||
uint32 len=0;
|
||||
ginxlogVacuumPage data;
|
||||
char *backup;
|
||||
char itups[BLCKSZ];
|
||||
uint32 len = 0;
|
||||
|
||||
Assert( GinPageIsLeaf( page ) );
|
||||
Assert(GinPageIsLeaf(page));
|
||||
|
||||
if (index->rd_istemp)
|
||||
return;
|
||||
return;
|
||||
|
||||
data.node = index->rd_node;
|
||||
data.blkno = BufferGetBlockNumber(buffer);
|
||||
|
||||
if ( GinPageIsData( page ) ) {
|
||||
backup = GinDataPageGetData( page );
|
||||
data.nitem = GinPageGetOpaque( page )->maxoff;
|
||||
if ( data.nitem )
|
||||
len = MAXALIGN( sizeof(ItemPointerData)*data.nitem );
|
||||
} else {
|
||||
char *ptr;
|
||||
if (GinPageIsData(page))
|
||||
{
|
||||
backup = GinDataPageGetData(page);
|
||||
data.nitem = GinPageGetOpaque(page)->maxoff;
|
||||
if (data.nitem)
|
||||
len = MAXALIGN(sizeof(ItemPointerData) * data.nitem);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *ptr;
|
||||
OffsetNumber i;
|
||||
|
||||
ptr = backup = itups;
|
||||
for(i=FirstOffsetNumber;i<=PageGetMaxOffsetNumber(page);i++) {
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
||||
memcpy( ptr, itup, IndexTupleSize( itup ) );
|
||||
ptr += MAXALIGN( IndexTupleSize( itup ) );
|
||||
for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++)
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
||||
|
||||
memcpy(ptr, itup, IndexTupleSize(itup));
|
||||
ptr += MAXALIGN(IndexTupleSize(itup));
|
||||
}
|
||||
|
||||
data.nitem = PageGetMaxOffsetNumber(page);
|
||||
len = ptr-backup;
|
||||
len = ptr - backup;
|
||||
}
|
||||
|
||||
rdata[0].buffer = buffer;
|
||||
rdata[0].buffer_std = ( GinPageIsData( page ) ) ? FALSE : TRUE;
|
||||
rdata[0].buffer_std = (GinPageIsData(page)) ? FALSE : TRUE;
|
||||
rdata[0].len = 0;
|
||||
rdata[0].data = NULL;
|
||||
rdata[0].next = rdata + 1;
|
||||
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].len = sizeof(ginxlogVacuumPage);
|
||||
rdata[1].data = (char*)&data;
|
||||
rdata[1].data = (char *) &data;
|
||||
|
||||
if ( len == 0 ) {
|
||||
if (len == 0)
|
||||
{
|
||||
rdata[1].next = NULL;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
rdata[1].next = rdata + 2;
|
||||
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
@@ -133,71 +150,84 @@ xlogVacuumPage(Relation index, Buffer buffer) {
|
||||
}
|
||||
|
||||
static bool
|
||||
ginVacuumPostingTreeLeaves( GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer ) {
|
||||
Buffer buffer = ReadBuffer( gvs->index, blkno );
|
||||
Page page = BufferGetPage( buffer );
|
||||
bool hasVoidPage = FALSE;
|
||||
ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer)
|
||||
{
|
||||
Buffer buffer = ReadBuffer(gvs->index, blkno);
|
||||
Page page = BufferGetPage(buffer);
|
||||
bool hasVoidPage = FALSE;
|
||||
|
||||
/*
|
||||
/*
|
||||
* We should be sure that we don't concurrent with inserts, insert process
|
||||
* never release root page until end (but it can unlock it and lock again).
|
||||
* If we lock root with with LockBufferForCleanup, new scan process can't begin,
|
||||
* but previous may run.
|
||||
* ginmarkpos/start* keeps buffer pinned, so we will wait for it.
|
||||
* We lock only one posting tree in whole index, so, it's concurrent enough..
|
||||
* Side effect: after this is full complete, tree is unused by any other process
|
||||
* never release root page until end (but it can unlock it and lock
|
||||
* again). If we lock root with with LockBufferForCleanup, new scan
|
||||
* process can't begin, but previous may run. ginmarkpos/start* keeps
|
||||
* buffer pinned, so we will wait for it. We lock only one posting tree in
|
||||
* whole index, so, it's concurrent enough.. Side effect: after this is
|
||||
* full complete, tree is unused by any other process
|
||||
*/
|
||||
|
||||
LockBufferForCleanup( buffer );
|
||||
LockBufferForCleanup(buffer);
|
||||
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
if ( GinPageIsLeaf(page) ) {
|
||||
OffsetNumber newMaxOff, oldMaxOff = GinPageGetOpaque(page)->maxoff;
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
OffsetNumber newMaxOff,
|
||||
oldMaxOff = GinPageGetOpaque(page)->maxoff;
|
||||
ItemPointerData *cleaned = NULL;
|
||||
|
||||
newMaxOff = ginVacuumPostingList( gvs,
|
||||
(ItemPointer)GinDataPageGetData(page), oldMaxOff, &cleaned );
|
||||
newMaxOff = ginVacuumPostingList(gvs,
|
||||
(ItemPointer) GinDataPageGetData(page), oldMaxOff, &cleaned);
|
||||
|
||||
/* saves changes about deleted tuple ... */
|
||||
if ( oldMaxOff != newMaxOff ) {
|
||||
if (oldMaxOff != newMaxOff)
|
||||
{
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
if ( newMaxOff > 0 )
|
||||
memcpy( GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff );
|
||||
pfree( cleaned );
|
||||
if (newMaxOff > 0)
|
||||
memcpy(GinDataPageGetData(page), cleaned, sizeof(ItemPointerData) * newMaxOff);
|
||||
pfree(cleaned);
|
||||
GinPageGetOpaque(page)->maxoff = newMaxOff;
|
||||
|
||||
xlogVacuumPage(gvs->index, buffer);
|
||||
xlogVacuumPage(gvs->index, buffer);
|
||||
|
||||
MarkBufferDirty( buffer );
|
||||
MarkBufferDirty(buffer);
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/* if root is a leaf page, we don't desire futher processing */
|
||||
if ( !isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber )
|
||||
|
||||
/* if root is a leaf page, we don't desire futher processing */
|
||||
if (!isRoot && GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
|
||||
hasVoidPage = TRUE;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
OffsetNumber i;
|
||||
bool isChildHasVoid = FALSE;
|
||||
bool isChildHasVoid = FALSE;
|
||||
|
||||
for( i=FirstOffsetNumber ; i <= GinPageGetOpaque(page)->maxoff ; i++ ) {
|
||||
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page, i);
|
||||
if ( ginVacuumPostingTreeLeaves( gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL ) )
|
||||
for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
|
||||
{
|
||||
PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i);
|
||||
|
||||
if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL))
|
||||
isChildHasVoid = TRUE;
|
||||
}
|
||||
|
||||
if ( isChildHasVoid )
|
||||
if (isChildHasVoid)
|
||||
hasVoidPage = TRUE;
|
||||
}
|
||||
|
||||
/* if we have root and theres void pages in tree, then we don't release lock
|
||||
to go further processing and guarantee that tree is unused */
|
||||
if ( !(isRoot && hasVoidPage) ) {
|
||||
UnlockReleaseBuffer( buffer );
|
||||
} else {
|
||||
Assert( rootBuffer );
|
||||
/*
|
||||
* if we have root and theres void pages in tree, then we don't release
|
||||
* lock to go further processing and guarantee that tree is unused
|
||||
*/
|
||||
if (!(isRoot && hasVoidPage))
|
||||
{
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(rootBuffer);
|
||||
*rootBuffer = buffer;
|
||||
}
|
||||
|
||||
@@ -205,49 +235,54 @@ ginVacuumPostingTreeLeaves( GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
||||
}
|
||||
|
||||
static void
|
||||
ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno,
|
||||
BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot ) {
|
||||
Buffer dBuffer = ReadBuffer( gvs->index, deleteBlkno );
|
||||
Buffer lBuffer = (leftBlkno==InvalidBlockNumber) ? InvalidBuffer : ReadBuffer( gvs->index, leftBlkno );
|
||||
Buffer pBuffer = ReadBuffer( gvs->index, parentBlkno );
|
||||
Page page, parentPage;
|
||||
ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno,
|
||||
BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot)
|
||||
{
|
||||
Buffer dBuffer = ReadBuffer(gvs->index, deleteBlkno);
|
||||
Buffer lBuffer = (leftBlkno == InvalidBlockNumber) ? InvalidBuffer : ReadBuffer(gvs->index, leftBlkno);
|
||||
Buffer pBuffer = ReadBuffer(gvs->index, parentBlkno);
|
||||
Page page,
|
||||
parentPage;
|
||||
|
||||
LockBuffer( dBuffer, GIN_EXCLUSIVE );
|
||||
if ( !isParentRoot ) /* parent is already locked by LockBufferForCleanup() */
|
||||
LockBuffer( pBuffer, GIN_EXCLUSIVE );
|
||||
LockBuffer(dBuffer, GIN_EXCLUSIVE);
|
||||
if (!isParentRoot) /* parent is already locked by
|
||||
* LockBufferForCleanup() */
|
||||
LockBuffer(pBuffer, GIN_EXCLUSIVE);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
if ( leftBlkno!= InvalidBlockNumber ) {
|
||||
if (leftBlkno != InvalidBlockNumber)
|
||||
{
|
||||
BlockNumber rightlink;
|
||||
|
||||
LockBuffer( lBuffer, GIN_EXCLUSIVE );
|
||||
LockBuffer(lBuffer, GIN_EXCLUSIVE);
|
||||
|
||||
page = BufferGetPage( dBuffer );
|
||||
page = BufferGetPage(dBuffer);
|
||||
rightlink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
page = BufferGetPage( lBuffer );
|
||||
page = BufferGetPage(lBuffer);
|
||||
GinPageGetOpaque(page)->rightlink = rightlink;
|
||||
}
|
||||
|
||||
parentPage = BufferGetPage( pBuffer );
|
||||
parentPage = BufferGetPage(pBuffer);
|
||||
PageDeletePostingItem(parentPage, myoff);
|
||||
|
||||
page = BufferGetPage( dBuffer );
|
||||
page = BufferGetPage(dBuffer);
|
||||
GinPageGetOpaque(page)->flags = GIN_DELETED;
|
||||
|
||||
if (!gvs->index->rd_istemp) {
|
||||
XLogRecPtr recptr;
|
||||
if (!gvs->index->rd_istemp)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[4];
|
||||
ginxlogDeletePage data;
|
||||
int n;
|
||||
ginxlogDeletePage data;
|
||||
int n;
|
||||
|
||||
data.node = gvs->index->rd_node;
|
||||
data.blkno = deleteBlkno;
|
||||
data.parentBlkno = parentBlkno;
|
||||
data.parentOffset = myoff;
|
||||
data.leftBlkno = leftBlkno;
|
||||
data.rightLink = GinPageGetOpaque(page)->rightlink;
|
||||
data.leftBlkno = leftBlkno;
|
||||
data.rightLink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
rdata[0].buffer = dBuffer;
|
||||
rdata[0].buffer_std = FALSE;
|
||||
@@ -261,20 +296,22 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||
rdata[1].len = 0;
|
||||
rdata[1].next = rdata + 2;
|
||||
|
||||
if ( leftBlkno!= InvalidBlockNumber ) {
|
||||
if (leftBlkno != InvalidBlockNumber)
|
||||
{
|
||||
rdata[2].buffer = lBuffer;
|
||||
rdata[2].buffer_std = FALSE;
|
||||
rdata[2].data = NULL;
|
||||
rdata[2].len = 0;
|
||||
rdata[2].next = rdata + 3;
|
||||
n = 3;
|
||||
} else
|
||||
}
|
||||
else
|
||||
n = 2;
|
||||
|
||||
rdata[n].buffer = InvalidBuffer;
|
||||
rdata[n].buffer_std = FALSE;
|
||||
rdata[n].len = sizeof(ginxlogDeletePage);
|
||||
rdata[n].data = (char*)&data;
|
||||
rdata[n].data = (char *) &data;
|
||||
rdata[n].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
|
||||
@@ -282,122 +319,141 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||
PageSetTLI(page, ThisTimeLineID);
|
||||
PageSetLSN(parentPage, recptr);
|
||||
PageSetTLI(parentPage, ThisTimeLineID);
|
||||
if ( leftBlkno!= InvalidBlockNumber ) {
|
||||
page = BufferGetPage( lBuffer );
|
||||
if (leftBlkno != InvalidBlockNumber)
|
||||
{
|
||||
page = BufferGetPage(lBuffer);
|
||||
PageSetLSN(page, recptr);
|
||||
PageSetTLI(page, ThisTimeLineID);
|
||||
}
|
||||
}
|
||||
|
||||
MarkBufferDirty( pBuffer );
|
||||
if ( !isParentRoot )
|
||||
LockBuffer( pBuffer, GIN_UNLOCK );
|
||||
ReleaseBuffer( pBuffer );
|
||||
MarkBufferDirty(pBuffer);
|
||||
if (!isParentRoot)
|
||||
LockBuffer(pBuffer, GIN_UNLOCK);
|
||||
ReleaseBuffer(pBuffer);
|
||||
|
||||
if ( leftBlkno!= InvalidBlockNumber ) {
|
||||
MarkBufferDirty( lBuffer );
|
||||
UnlockReleaseBuffer( lBuffer );
|
||||
if (leftBlkno != InvalidBlockNumber)
|
||||
{
|
||||
MarkBufferDirty(lBuffer);
|
||||
UnlockReleaseBuffer(lBuffer);
|
||||
}
|
||||
|
||||
MarkBufferDirty( dBuffer );
|
||||
UnlockReleaseBuffer( dBuffer );
|
||||
MarkBufferDirty(dBuffer);
|
||||
UnlockReleaseBuffer(dBuffer);
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
gvs->result->pages_deleted++;
|
||||
}
|
||||
|
||||
typedef struct DataPageDeleteStack {
|
||||
struct DataPageDeleteStack *child;
|
||||
struct DataPageDeleteStack *parent;
|
||||
typedef struct DataPageDeleteStack
|
||||
{
|
||||
struct DataPageDeleteStack *child;
|
||||
struct DataPageDeleteStack *parent;
|
||||
|
||||
BlockNumber blkno;
|
||||
bool isRoot;
|
||||
BlockNumber blkno;
|
||||
bool isRoot;
|
||||
} DataPageDeleteStack;
|
||||
|
||||
/*
|
||||
* scans posting tree and deletes empty pages
|
||||
*/
|
||||
static bool
|
||||
ginScanToDelete( GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff ) {
|
||||
DataPageDeleteStack *me;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
bool meDelete = FALSE;
|
||||
ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff)
|
||||
{
|
||||
DataPageDeleteStack *me;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
bool meDelete = FALSE;
|
||||
|
||||
if ( isRoot ) {
|
||||
if (isRoot)
|
||||
{
|
||||
me = parent;
|
||||
} else {
|
||||
if ( ! parent->child ) {
|
||||
me = (DataPageDeleteStack*)palloc0(sizeof(DataPageDeleteStack));
|
||||
me->parent=parent;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!parent->child)
|
||||
{
|
||||
me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack));
|
||||
me->parent = parent;
|
||||
parent->child = me;
|
||||
me->blkno = InvalidBlockNumber;
|
||||
} else
|
||||
}
|
||||
else
|
||||
me = parent->child;
|
||||
}
|
||||
|
||||
buffer = ReadBuffer( gvs->index, blkno );
|
||||
page = BufferGetPage( buffer );
|
||||
buffer = ReadBuffer(gvs->index, blkno);
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
Assert( GinPageIsData(page) );
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
if ( !GinPageIsLeaf(page) ) {
|
||||
if (!GinPageIsLeaf(page))
|
||||
{
|
||||
OffsetNumber i;
|
||||
|
||||
for(i=FirstOffsetNumber;i<=GinPageGetOpaque(page)->maxoff;i++) {
|
||||
PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page, i);
|
||||
for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
|
||||
{
|
||||
PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, i);
|
||||
|
||||
if ( ginScanToDelete( gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i ) )
|
||||
if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i))
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
if ( GinPageGetOpaque(page)->maxoff < FirstOffsetNumber ) {
|
||||
if ( !( me->blkno == InvalidBlockNumber && GinPageRightMost(page) ) ) {
|
||||
if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
|
||||
{
|
||||
if (!(me->blkno == InvalidBlockNumber && GinPageRightMost(page)))
|
||||
{
|
||||
/* we never delete right most branch */
|
||||
Assert( !isRoot );
|
||||
if ( GinPageGetOpaque(page)->maxoff < FirstOffsetNumber ) {
|
||||
ginDeletePage( gvs, blkno, me->blkno, me->parent->blkno, myoff, me->parent->isRoot );
|
||||
Assert(!isRoot);
|
||||
if (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)
|
||||
{
|
||||
ginDeletePage(gvs, blkno, me->blkno, me->parent->blkno, myoff, me->parent->isRoot);
|
||||
meDelete = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ReleaseBuffer( buffer );
|
||||
ReleaseBuffer(buffer);
|
||||
|
||||
if ( !meDelete )
|
||||
if (!meDelete)
|
||||
me->blkno = blkno;
|
||||
|
||||
return meDelete;
|
||||
}
|
||||
|
||||
static void
|
||||
ginVacuumPostingTree( GinVacuumState *gvs, BlockNumber rootBlkno ) {
|
||||
Buffer rootBuffer = InvalidBuffer;
|
||||
DataPageDeleteStack root, *ptr, *tmp;
|
||||
ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
|
||||
{
|
||||
Buffer rootBuffer = InvalidBuffer;
|
||||
DataPageDeleteStack root,
|
||||
*ptr,
|
||||
*tmp;
|
||||
|
||||
if ( ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer)==FALSE ) {
|
||||
Assert( rootBuffer == InvalidBuffer );
|
||||
if (ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer) == FALSE)
|
||||
{
|
||||
Assert(rootBuffer == InvalidBuffer);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&root,0,sizeof(DataPageDeleteStack));
|
||||
memset(&root, 0, sizeof(DataPageDeleteStack));
|
||||
root.blkno = rootBlkno;
|
||||
root.isRoot = TRUE;
|
||||
|
||||
vacuum_delay_point();
|
||||
|
||||
ginScanToDelete( gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber );
|
||||
ginScanToDelete(gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber);
|
||||
|
||||
ptr = root.child;
|
||||
while( ptr ) {
|
||||
while (ptr)
|
||||
{
|
||||
tmp = ptr->child;
|
||||
pfree( ptr );
|
||||
pfree(ptr);
|
||||
ptr = tmp;
|
||||
}
|
||||
|
||||
UnlockReleaseBuffer( rootBuffer );
|
||||
UnlockReleaseBuffer(rootBuffer);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -406,48 +462,65 @@ ginVacuumPostingTree( GinVacuumState *gvs, BlockNumber rootBlkno ) {
|
||||
* then page is copied into temprorary one.
|
||||
*/
|
||||
static Page
|
||||
ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot) {
|
||||
Page origpage = BufferGetPage( buffer ), tmppage;
|
||||
OffsetNumber i, maxoff = PageGetMaxOffsetNumber( origpage );
|
||||
ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot)
|
||||
{
|
||||
Page origpage = BufferGetPage(buffer),
|
||||
tmppage;
|
||||
OffsetNumber i,
|
||||
maxoff = PageGetMaxOffsetNumber(origpage);
|
||||
|
||||
tmppage = origpage;
|
||||
|
||||
*nroot=0;
|
||||
*nroot = 0;
|
||||
|
||||
for(i=FirstOffsetNumber; i<= maxoff; i++) {
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
|
||||
|
||||
if ( GinIsPostingTree(itup) ) {
|
||||
/* store posting tree's roots for further processing,
|
||||
we can't vacuum it just now due to risk of deadlocks with scans/inserts */
|
||||
roots[ *nroot ] = GinItemPointerGetBlockNumber(&itup->t_tid);
|
||||
if (GinIsPostingTree(itup))
|
||||
{
|
||||
/*
|
||||
* store posting tree's roots for further processing, we can't
|
||||
* vacuum it just now due to risk of deadlocks with scans/inserts
|
||||
*/
|
||||
roots[*nroot] = GinItemPointerGetBlockNumber(&itup->t_tid);
|
||||
(*nroot)++;
|
||||
} else if ( GinGetNPosting(itup) > 0 ) {
|
||||
/* if we already create temrorary page, we will make changes in place */
|
||||
ItemPointerData *cleaned = (tmppage==origpage) ? NULL : GinGetPosting(itup );
|
||||
uint32 newN = ginVacuumPostingList( gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned );
|
||||
|
||||
if ( GinGetNPosting(itup) != newN ) {
|
||||
bool isnull;
|
||||
Datum value;
|
||||
}
|
||||
else if (GinGetNPosting(itup) > 0)
|
||||
{
|
||||
/*
|
||||
* if we already create temrorary page, we will make changes in
|
||||
* place
|
||||
*/
|
||||
ItemPointerData *cleaned = (tmppage == origpage) ? NULL : GinGetPosting(itup);
|
||||
uint32 newN = ginVacuumPostingList(gvs, GinGetPosting(itup), GinGetNPosting(itup), &cleaned);
|
||||
|
||||
if (GinGetNPosting(itup) != newN)
|
||||
{
|
||||
bool isnull;
|
||||
Datum value;
|
||||
|
||||
/*
|
||||
* Some ItemPointers was deleted, so we should remake our tuple
|
||||
* Some ItemPointers was deleted, so we should remake our
|
||||
* tuple
|
||||
*/
|
||||
|
||||
if ( tmppage==origpage ) {
|
||||
if (tmppage == origpage)
|
||||
{
|
||||
/*
|
||||
* On first difference we create temprorary page in memory
|
||||
* and copies content in to it.
|
||||
*/
|
||||
tmppage=GinPageGetCopyPage ( origpage );
|
||||
tmppage = GinPageGetCopyPage(origpage);
|
||||
|
||||
if ( newN > 0 ) {
|
||||
Size pos = ((char*)GinGetPosting(itup)) - ((char*)origpage);
|
||||
memcpy( tmppage+pos, cleaned, sizeof(ItemPointerData)*newN );
|
||||
if (newN > 0)
|
||||
{
|
||||
Size pos = ((char *) GinGetPosting(itup)) - ((char *) origpage);
|
||||
|
||||
memcpy(tmppage + pos, cleaned, sizeof(ItemPointerData) * newN);
|
||||
}
|
||||
|
||||
pfree( cleaned );
|
||||
pfree(cleaned);
|
||||
|
||||
/* set itup pointer to new page */
|
||||
itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
|
||||
@@ -457,30 +530,31 @@ ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint3
|
||||
itup = GinFormTuple(&gvs->ginstate, value, GinGetPosting(itup), newN);
|
||||
PageIndexTupleDelete(tmppage, i);
|
||||
|
||||
if ( PageAddItem( tmppage, (Item)itup, IndexTupleSize(itup), i, LP_USED ) != i )
|
||||
elog(ERROR, "failed to add item to index page in \"%s\"",
|
||||
RelationGetRelationName(gvs->index));
|
||||
if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, LP_USED) != i)
|
||||
elog(ERROR, "failed to add item to index page in \"%s\"",
|
||||
RelationGetRelationName(gvs->index));
|
||||
|
||||
pfree( itup );
|
||||
pfree(itup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ( tmppage==origpage ) ? NULL : tmppage;
|
||||
return (tmppage == origpage) ? NULL : tmppage;
|
||||
}
|
||||
|
||||
Datum
|
||||
ginbulkdelete(PG_FUNCTION_ARGS) {
|
||||
ginbulkdelete(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
|
||||
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
||||
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
|
||||
void *callback_state = (void *) PG_GETARG_POINTER(3);
|
||||
Relation index = info->index;
|
||||
BlockNumber blkno = GIN_ROOT_BLKNO;
|
||||
GinVacuumState gvs;
|
||||
Buffer buffer;
|
||||
BlockNumber rootOfPostingTree[ BLCKSZ/ (sizeof(IndexTupleData)+sizeof(ItemId)) ];
|
||||
uint32 nRoot;
|
||||
BlockNumber blkno = GIN_ROOT_BLKNO;
|
||||
GinVacuumState gvs;
|
||||
Buffer buffer;
|
||||
BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
|
||||
uint32 nRoot;
|
||||
|
||||
/* first time through? */
|
||||
if (stats == NULL)
|
||||
@@ -494,107 +568,117 @@ ginbulkdelete(PG_FUNCTION_ARGS) {
|
||||
gvs.callback_state = callback_state;
|
||||
initGinState(&gvs.ginstate, index);
|
||||
|
||||
buffer = ReadBuffer( index, blkno );
|
||||
buffer = ReadBuffer(index, blkno);
|
||||
|
||||
/* find leaf page */
|
||||
for(;;) {
|
||||
Page page = BufferGetPage( buffer );
|
||||
IndexTuple itup;
|
||||
for (;;)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
IndexTuple itup;
|
||||
|
||||
LockBuffer(buffer,GIN_SHARE);
|
||||
LockBuffer(buffer, GIN_SHARE);
|
||||
|
||||
Assert( !GinPageIsData(page) );
|
||||
Assert(!GinPageIsData(page));
|
||||
|
||||
if ( GinPageIsLeaf(page) ) {
|
||||
LockBuffer(buffer,GIN_UNLOCK);
|
||||
LockBuffer(buffer,GIN_EXCLUSIVE);
|
||||
if (GinPageIsLeaf(page))
|
||||
{
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
|
||||
if ( blkno==GIN_ROOT_BLKNO && !GinPageIsLeaf(page) ) {
|
||||
LockBuffer(buffer,GIN_UNLOCK);
|
||||
continue; /* check it one more */
|
||||
if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page))
|
||||
{
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
continue; /* check it one more */
|
||||
}
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
Assert( PageGetMaxOffsetNumber(page) >= FirstOffsetNumber );
|
||||
Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
|
||||
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
|
||||
blkno = GinItemPointerGetBlockNumber(&(itup)->t_tid);
|
||||
Assert( blkno!= InvalidBlockNumber );
|
||||
Assert(blkno != InvalidBlockNumber);
|
||||
|
||||
LockBuffer(buffer,GIN_UNLOCK);
|
||||
buffer = ReleaseAndReadBuffer( buffer, index, blkno );
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
buffer = ReleaseAndReadBuffer(buffer, index, blkno);
|
||||
}
|
||||
|
||||
/* right now we found leftmost page in entry's BTree */
|
||||
|
||||
for(;;) {
|
||||
Page page = BufferGetPage( buffer );
|
||||
Page resPage;
|
||||
uint32 i;
|
||||
for (;;)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
Page resPage;
|
||||
uint32 i;
|
||||
|
||||
Assert( !GinPageIsData(page) );
|
||||
Assert(!GinPageIsData(page));
|
||||
|
||||
resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot);
|
||||
|
||||
blkno = GinPageGetOpaque( page )->rightlink;
|
||||
blkno = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
if ( resPage ) {
|
||||
if (resPage)
|
||||
{
|
||||
START_CRIT_SECTION();
|
||||
PageRestoreTempPage( resPage, page );
|
||||
xlogVacuumPage(gvs.index, buffer);
|
||||
MarkBufferDirty( buffer );
|
||||
PageRestoreTempPage(resPage, page);
|
||||
xlogVacuumPage(gvs.index, buffer);
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
END_CRIT_SECTION();
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
vacuum_delay_point();
|
||||
|
||||
for(i=0; i<nRoot; i++) {
|
||||
ginVacuumPostingTree( &gvs, rootOfPostingTree[i] );
|
||||
for (i = 0; i < nRoot; i++)
|
||||
{
|
||||
ginVacuumPostingTree(&gvs, rootOfPostingTree[i]);
|
||||
vacuum_delay_point();
|
||||
}
|
||||
|
||||
if ( blkno==InvalidBlockNumber ) /*rightmost page*/
|
||||
if (blkno == InvalidBlockNumber) /* rightmost page */
|
||||
break;
|
||||
|
||||
buffer = ReadBuffer( index, blkno );
|
||||
LockBuffer(buffer,GIN_EXCLUSIVE);
|
||||
buffer = ReadBuffer(index, blkno);
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(gvs.result);
|
||||
}
|
||||
|
||||
Datum
|
||||
ginvacuumcleanup(PG_FUNCTION_ARGS) {
|
||||
Datum
|
||||
ginvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
|
||||
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
||||
Relation index = info->index;
|
||||
bool needLock;
|
||||
BlockNumber npages,
|
||||
Relation index = info->index;
|
||||
bool needLock;
|
||||
BlockNumber npages,
|
||||
blkno;
|
||||
BlockNumber totFreePages,
|
||||
nFreePages,
|
||||
*freePages,
|
||||
maxFreePages;
|
||||
maxFreePages;
|
||||
BlockNumber lastBlock = GIN_ROOT_BLKNO,
|
||||
lastFilledBlock = GIN_ROOT_BLKNO;
|
||||
lastFilledBlock = GIN_ROOT_BLKNO;
|
||||
|
||||
/* Set up all-zero stats if ginbulkdelete wasn't called */
|
||||
if (stats == NULL)
|
||||
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
|
||||
|
||||
/*
|
||||
* XXX we always report the heap tuple count as the number of index
|
||||
* entries. This is bogus if the index is partial, but it's real hard
|
||||
* to tell how many distinct heap entries are referenced by a GIN index.
|
||||
* entries. This is bogus if the index is partial, but it's real hard to
|
||||
* tell how many distinct heap entries are referenced by a GIN index.
|
||||
*/
|
||||
stats->num_index_tuples = info->num_heap_tuples;
|
||||
|
||||
/*
|
||||
* If vacuum full, we already have exclusive lock on the index.
|
||||
* Otherwise, need lock unless it's local to this backend.
|
||||
* If vacuum full, we already have exclusive lock on the index. Otherwise,
|
||||
* need lock unless it's local to this backend.
|
||||
*/
|
||||
if (info->vacuum_full)
|
||||
needLock = false;
|
||||
@@ -614,32 +698,38 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
|
||||
totFreePages = nFreePages = 0;
|
||||
freePages = (BlockNumber *) palloc(sizeof(BlockNumber) * maxFreePages);
|
||||
|
||||
for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++) {
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++)
|
||||
{
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
vacuum_delay_point();
|
||||
|
||||
|
||||
buffer = ReadBuffer(index, blkno);
|
||||
LockBuffer(buffer, GIN_SHARE);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if ( GinPageIsDeleted(page) ) {
|
||||
if (GinPageIsDeleted(page))
|
||||
{
|
||||
if (nFreePages < maxFreePages)
|
||||
freePages[nFreePages++] = blkno;
|
||||
totFreePages++;
|
||||
} else
|
||||
}
|
||||
else
|
||||
lastFilledBlock = blkno;
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
lastBlock = npages - 1;
|
||||
|
||||
if (info->vacuum_full && nFreePages > 0) {
|
||||
if (info->vacuum_full && nFreePages > 0)
|
||||
{
|
||||
/* try to truncate index */
|
||||
int i;
|
||||
for (i = 0; i < nFreePages; i++)
|
||||
if (freePages[i] >= lastFilledBlock) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nFreePages; i++)
|
||||
if (freePages[i] >= lastFilledBlock)
|
||||
{
|
||||
totFreePages = nFreePages = i;
|
||||
break;
|
||||
}
|
||||
@@ -661,4 +751,3 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) {
|
||||
|
||||
PG_RETURN_POINTER(stats);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.4 2006/08/07 16:57:56 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.5 2006/10/04 00:29:48 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
@@ -17,12 +17,13 @@
|
||||
#include "access/heapam.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
static MemoryContext opCtx; /* working memory for operations */
|
||||
static MemoryContext opCtx; /* working memory for operations */
|
||||
static MemoryContext topCtx;
|
||||
|
||||
typedef struct ginIncompleteSplit {
|
||||
RelFileNode node;
|
||||
BlockNumber leftBlkno;
|
||||
typedef struct ginIncompleteSplit
|
||||
{
|
||||
RelFileNode node;
|
||||
BlockNumber leftBlkno;
|
||||
BlockNumber rightBlkno;
|
||||
BlockNumber rootBlkno;
|
||||
} ginIncompleteSplit;
|
||||
@@ -30,10 +31,11 @@ typedef struct ginIncompleteSplit {
|
||||
static List *incomplete_splits;
|
||||
|
||||
static void
|
||||
pushIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber rightBlkno, BlockNumber rootBlkno) {
|
||||
ginIncompleteSplit *split;
|
||||
pushIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber rightBlkno, BlockNumber rootBlkno)
|
||||
{
|
||||
ginIncompleteSplit *split;
|
||||
|
||||
MemoryContextSwitchTo( topCtx );
|
||||
MemoryContextSwitchTo(topCtx);
|
||||
|
||||
split = palloc(sizeof(ginIncompleteSplit));
|
||||
|
||||
@@ -44,17 +46,20 @@ pushIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber rightBl
|
||||
|
||||
incomplete_splits = lappend(incomplete_splits, split);
|
||||
|
||||
MemoryContextSwitchTo( opCtx );
|
||||
MemoryContextSwitchTo(opCtx);
|
||||
}
|
||||
|
||||
static void
|
||||
forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno) {
|
||||
forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updateBlkno)
|
||||
{
|
||||
ListCell *l;
|
||||
|
||||
foreach(l, incomplete_splits) {
|
||||
ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l);
|
||||
foreach(l, incomplete_splits)
|
||||
{
|
||||
ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l);
|
||||
|
||||
if ( RelFileNodeEquals(node, split->node) && leftBlkno == split->leftBlkno && updateBlkno == split->rightBlkno ) {
|
||||
if (RelFileNodeEquals(node, split->node) && leftBlkno == split->leftBlkno && updateBlkno == split->rightBlkno)
|
||||
{
|
||||
incomplete_splits = list_delete_ptr(incomplete_splits, split);
|
||||
break;
|
||||
}
|
||||
@@ -62,7 +67,8 @@ forgetIncompleteSplit(RelFileNode node, BlockNumber leftBlkno, BlockNumber updat
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) {
|
||||
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
|
||||
Relation reln;
|
||||
Buffer buffer;
|
||||
@@ -83,9 +89,10 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record) {
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record) {
|
||||
ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree*)XLogRecGetData(record);
|
||||
ItemPointerData *items = (ItemPointerData*)(XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree));
|
||||
ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
|
||||
ItemPointerData *items = (ItemPointerData *) (XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree));
|
||||
Relation reln;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
@@ -95,8 +102,8 @@ ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record) {
|
||||
Assert(BufferIsValid(buffer));
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
GinInitBuffer(buffer, GIN_DATA|GIN_LEAF);
|
||||
memcpy( GinDataPageGetData(page), items, sizeof(ItemPointerData) * data->nitem );
|
||||
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
|
||||
memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * data->nitem);
|
||||
GinPageGetOpaque(page)->maxoff = data->nitem;
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -107,8 +114,9 @@ ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record) {
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) {
|
||||
ginxlogInsert *data = (ginxlogInsert*)XLogRecGetData(record);
|
||||
ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
|
||||
Relation reln;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
@@ -122,64 +130,73 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) {
|
||||
Assert(BufferIsValid(buffer));
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if ( data->isData ) {
|
||||
Assert( data->isDelete == FALSE );
|
||||
Assert( GinPageIsData( page ) );
|
||||
if (data->isData)
|
||||
{
|
||||
Assert(data->isDelete == FALSE);
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
if ( data->isLeaf ) {
|
||||
if (data->isLeaf)
|
||||
{
|
||||
OffsetNumber i;
|
||||
ItemPointerData *items = (ItemPointerData*)( XLogRecGetData(record) + sizeof(ginxlogInsert) );
|
||||
ItemPointerData *items = (ItemPointerData *) (XLogRecGetData(record) + sizeof(ginxlogInsert));
|
||||
|
||||
Assert( GinPageIsLeaf( page ) );
|
||||
Assert( data->updateBlkno == InvalidBlockNumber );
|
||||
Assert(GinPageIsLeaf(page));
|
||||
Assert(data->updateBlkno == InvalidBlockNumber);
|
||||
|
||||
for(i=0;i<data->nitem;i++)
|
||||
GinDataPageAddItem( page, items+i, data->offset + i );
|
||||
} else {
|
||||
for (i = 0; i < data->nitem; i++)
|
||||
GinDataPageAddItem(page, items + i, data->offset + i);
|
||||
}
|
||||
else
|
||||
{
|
||||
PostingItem *pitem;
|
||||
|
||||
Assert( !GinPageIsLeaf( page ) );
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
|
||||
if ( data->updateBlkno != InvalidBlockNumber ) {
|
||||
/* update link to right page after split */
|
||||
pitem = (PostingItem*)GinDataPageGetItem(page, data->offset);
|
||||
PostingItemSetBlockNumber( pitem, data->updateBlkno );
|
||||
if (data->updateBlkno != InvalidBlockNumber)
|
||||
{
|
||||
/* update link to right page after split */
|
||||
pitem = (PostingItem *) GinDataPageGetItem(page, data->offset);
|
||||
PostingItemSetBlockNumber(pitem, data->updateBlkno);
|
||||
}
|
||||
|
||||
pitem = (PostingItem*)( XLogRecGetData(record) + sizeof(ginxlogInsert) );
|
||||
pitem = (PostingItem *) (XLogRecGetData(record) + sizeof(ginxlogInsert));
|
||||
|
||||
GinDataPageAddItem( page, pitem, data->offset );
|
||||
GinDataPageAddItem(page, pitem, data->offset);
|
||||
|
||||
if ( data->updateBlkno != InvalidBlockNumber )
|
||||
forgetIncompleteSplit(data->node, PostingItemGetBlockNumber( pitem ), data->updateBlkno);
|
||||
if (data->updateBlkno != InvalidBlockNumber)
|
||||
forgetIncompleteSplit(data->node, PostingItemGetBlockNumber(pitem), data->updateBlkno);
|
||||
}
|
||||
} else {
|
||||
IndexTuple itup;
|
||||
}
|
||||
else
|
||||
{
|
||||
IndexTuple itup;
|
||||
|
||||
Assert( !GinPageIsData( page ) );
|
||||
Assert(!GinPageIsData(page));
|
||||
|
||||
if ( data->updateBlkno != InvalidBlockNumber ) {
|
||||
/* update link to right page after split */
|
||||
Assert( !GinPageIsLeaf( page ) );
|
||||
Assert( data->offset>=FirstOffsetNumber && data->offset<=PageGetMaxOffsetNumber(page) );
|
||||
if (data->updateBlkno != InvalidBlockNumber)
|
||||
{
|
||||
/* update link to right page after split */
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
Assert(data->offset >= FirstOffsetNumber && data->offset <= PageGetMaxOffsetNumber(page));
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, data->offset));
|
||||
ItemPointerSet(&itup->t_tid, data->updateBlkno, InvalidOffsetNumber);
|
||||
}
|
||||
|
||||
if ( data->isDelete ) {
|
||||
Assert( GinPageIsLeaf( page ) );
|
||||
Assert( data->offset>=FirstOffsetNumber && data->offset<=PageGetMaxOffsetNumber(page) );
|
||||
if (data->isDelete)
|
||||
{
|
||||
Assert(GinPageIsLeaf(page));
|
||||
Assert(data->offset >= FirstOffsetNumber && data->offset <= PageGetMaxOffsetNumber(page));
|
||||
PageIndexTupleDelete(page, data->offset);
|
||||
}
|
||||
|
||||
itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogInsert) );
|
||||
itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsert));
|
||||
|
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), data->offset, LP_USED) == InvalidOffsetNumber )
|
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode );
|
||||
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), data->offset, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode);
|
||||
|
||||
if ( !data->isLeaf && data->updateBlkno != InvalidBlockNumber )
|
||||
forgetIncompleteSplit(data->node, GinItemPointerGetBlockNumber( &itup->t_tid ), data->updateBlkno);
|
||||
if (!data->isLeaf && data->updateBlkno != InvalidBlockNumber)
|
||||
forgetIncompleteSplit(data->node, GinItemPointerGetBlockNumber(&itup->t_tid), data->updateBlkno);
|
||||
}
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -190,18 +207,21 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record) {
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
|
||||
ginxlogSplit *data = (ginxlogSplit*)XLogRecGetData(record);
|
||||
ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
|
||||
Relation reln;
|
||||
Buffer lbuffer, rbuffer;
|
||||
Page lpage, rpage;
|
||||
Buffer lbuffer,
|
||||
rbuffer;
|
||||
Page lpage,
|
||||
rpage;
|
||||
uint32 flags = 0;
|
||||
|
||||
reln = XLogOpenRelation(data->node);
|
||||
|
||||
if ( data->isLeaf )
|
||||
if (data->isLeaf)
|
||||
flags |= GIN_LEAF;
|
||||
if ( data->isData )
|
||||
if (data->isData)
|
||||
flags |= GIN_DATA;
|
||||
|
||||
lbuffer = XLogReadBuffer(reln, data->lblkno, data->isRootSplit);
|
||||
@@ -214,50 +234,57 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
|
||||
rpage = (Page) BufferGetPage(rbuffer);
|
||||
GinInitBuffer(rbuffer, flags);
|
||||
|
||||
GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber( rbuffer );
|
||||
GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
|
||||
GinPageGetOpaque(rpage)->rightlink = data->rrlink;
|
||||
|
||||
if ( data->isData ) {
|
||||
char *ptr = XLogRecGetData(record) + sizeof(ginxlogSplit);
|
||||
Size sizeofitem = GinSizeOfItem(lpage);
|
||||
if (data->isData)
|
||||
{
|
||||
char *ptr = XLogRecGetData(record) + sizeof(ginxlogSplit);
|
||||
Size sizeofitem = GinSizeOfItem(lpage);
|
||||
OffsetNumber i;
|
||||
ItemPointer bound;
|
||||
ItemPointer bound;
|
||||
|
||||
for(i=0;i<data->separator;i++) {
|
||||
GinDataPageAddItem( lpage, ptr, InvalidOffsetNumber );
|
||||
for (i = 0; i < data->separator; i++)
|
||||
{
|
||||
GinDataPageAddItem(lpage, ptr, InvalidOffsetNumber);
|
||||
ptr += sizeofitem;
|
||||
}
|
||||
|
||||
for(i=data->separator;i<data->nitem;i++) {
|
||||
GinDataPageAddItem( rpage, ptr, InvalidOffsetNumber );
|
||||
for (i = data->separator; i < data->nitem; i++)
|
||||
{
|
||||
GinDataPageAddItem(rpage, ptr, InvalidOffsetNumber);
|
||||
ptr += sizeofitem;
|
||||
}
|
||||
|
||||
/* set up right key */
|
||||
bound = GinDataPageGetRightBound(lpage);
|
||||
if ( data->isLeaf )
|
||||
*bound = *(ItemPointerData*)GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff);
|
||||
if (data->isLeaf)
|
||||
*bound = *(ItemPointerData *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff);
|
||||
else
|
||||
*bound = ((PostingItem*)GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff))->key;
|
||||
*bound = ((PostingItem *) GinDataPageGetItem(lpage, GinPageGetOpaque(lpage)->maxoff))->key;
|
||||
|
||||
bound = GinDataPageGetRightBound(rpage);
|
||||
*bound = data->rightbound;
|
||||
} else {
|
||||
IndexTuple itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogSplit) );
|
||||
}
|
||||
else
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogSplit));
|
||||
OffsetNumber i;
|
||||
|
||||
for(i=0;i<data->separator;i++) {
|
||||
if ( PageAddItem( lpage, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
|
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode );
|
||||
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) );
|
||||
for (i = 0; i < data->separator; i++)
|
||||
{
|
||||
if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode);
|
||||
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
|
||||
}
|
||||
|
||||
for(i=data->separator;i<data->nitem;i++) {
|
||||
if ( PageAddItem( rpage, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
|
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode );
|
||||
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) );
|
||||
for (i = data->separator; i < data->nitem; i++)
|
||||
{
|
||||
if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode);
|
||||
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,20 +296,24 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
|
||||
PageSetTLI(lpage, ThisTimeLineID);
|
||||
MarkBufferDirty(lbuffer);
|
||||
|
||||
if ( !data->isLeaf && data->updateBlkno != InvalidBlockNumber )
|
||||
if (!data->isLeaf && data->updateBlkno != InvalidBlockNumber)
|
||||
forgetIncompleteSplit(data->node, data->leftChildBlkno, data->updateBlkno);
|
||||
|
||||
if ( data->isRootSplit ) {
|
||||
Buffer rootBuf = XLogReadBuffer(reln, data->rootBlkno, false);
|
||||
Page rootPage = BufferGetPage( rootBuf );
|
||||
if (data->isRootSplit)
|
||||
{
|
||||
Buffer rootBuf = XLogReadBuffer(reln, data->rootBlkno, false);
|
||||
Page rootPage = BufferGetPage(rootBuf);
|
||||
|
||||
GinInitBuffer( rootBuf, flags & ~GIN_LEAF );
|
||||
GinInitBuffer(rootBuf, flags & ~GIN_LEAF);
|
||||
|
||||
if ( data->isData ) {
|
||||
Assert( data->rootBlkno != GIN_ROOT_BLKNO );
|
||||
if (data->isData)
|
||||
{
|
||||
Assert(data->rootBlkno != GIN_ROOT_BLKNO);
|
||||
dataFillRoot(NULL, rootBuf, lbuffer, rbuffer);
|
||||
} else {
|
||||
Assert( data->rootBlkno == GIN_ROOT_BLKNO );
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(data->rootBlkno == GIN_ROOT_BLKNO);
|
||||
entryFillRoot(NULL, rootBuf, lbuffer, rbuffer);
|
||||
}
|
||||
|
||||
@@ -291,7 +322,8 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
|
||||
|
||||
MarkBufferDirty(rootBuf);
|
||||
UnlockReleaseBuffer(rootBuf);
|
||||
} else
|
||||
}
|
||||
else
|
||||
pushIncompleteSplit(data->node, data->lblkno, data->rblkno, data->rootBlkno);
|
||||
|
||||
UnlockReleaseBuffer(rbuffer);
|
||||
@@ -299,8 +331,9 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record) {
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record) {
|
||||
ginxlogVacuumPage *data = (ginxlogVacuumPage*)XLogRecGetData(record);
|
||||
ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
ginxlogVacuumPage *data = (ginxlogVacuumPage *) XLogRecGetData(record);
|
||||
Relation reln;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
@@ -314,25 +347,30 @@ ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record) {
|
||||
Assert(BufferIsValid(buffer));
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if ( GinPageIsData( page ) ) {
|
||||
memcpy( GinDataPageGetData(page), XLogRecGetData(record) + sizeof(ginxlogVacuumPage),
|
||||
GinSizeOfItem(page) * data->nitem );
|
||||
if (GinPageIsData(page))
|
||||
{
|
||||
memcpy(GinDataPageGetData(page), XLogRecGetData(record) + sizeof(ginxlogVacuumPage),
|
||||
GinSizeOfItem(page) *data->nitem);
|
||||
GinPageGetOpaque(page)->maxoff = data->nitem;
|
||||
} else {
|
||||
OffsetNumber i, *tod;
|
||||
IndexTuple itup = (IndexTuple)( XLogRecGetData(record) + sizeof(ginxlogVacuumPage) );
|
||||
}
|
||||
else
|
||||
{
|
||||
OffsetNumber i,
|
||||
*tod;
|
||||
IndexTuple itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogVacuumPage));
|
||||
|
||||
tod = (OffsetNumber*)palloc( sizeof(OffsetNumber) * PageGetMaxOffsetNumber(page) );
|
||||
for(i=FirstOffsetNumber;i<=PageGetMaxOffsetNumber(page);i++)
|
||||
tod[i-1] = i;
|
||||
tod = (OffsetNumber *) palloc(sizeof(OffsetNumber) * PageGetMaxOffsetNumber(page));
|
||||
for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++)
|
||||
tod[i - 1] = i;
|
||||
|
||||
PageIndexMultiDelete(page, tod, PageGetMaxOffsetNumber(page));
|
||||
|
||||
for(i=0;i<data->nitem;i++) {
|
||||
if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
|
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode );
|
||||
itup = (IndexTuple)( ((char*)itup) + MAXALIGN( IndexTupleSize(itup) ) );
|
||||
PageIndexMultiDelete(page, tod, PageGetMaxOffsetNumber(page));
|
||||
|
||||
for (i = 0; i < data->nitem; i++)
|
||||
{
|
||||
if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index page in %u/%u/%u",
|
||||
data->node.spcNode, data->node.dbNode, data->node.relNode);
|
||||
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -344,17 +382,19 @@ ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record) {
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) {
|
||||
ginxlogDeletePage *data = (ginxlogDeletePage*)XLogRecGetData(record);
|
||||
ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
|
||||
Relation reln;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
reln = XLogOpenRelation(data->node);
|
||||
|
||||
if ( !( record->xl_info & XLR_BKP_BLOCK_1) ) {
|
||||
if (!(record->xl_info & XLR_BKP_BLOCK_1))
|
||||
{
|
||||
buffer = XLogReadBuffer(reln, data->blkno, false);
|
||||
page = BufferGetPage( buffer );
|
||||
page = BufferGetPage(buffer);
|
||||
Assert(GinPageIsData(page));
|
||||
GinPageGetOpaque(page)->flags = GIN_DELETED;
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -363,9 +403,10 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) {
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
if ( !( record->xl_info & XLR_BKP_BLOCK_2) ) {
|
||||
if (!(record->xl_info & XLR_BKP_BLOCK_2))
|
||||
{
|
||||
buffer = XLogReadBuffer(reln, data->parentBlkno, false);
|
||||
page = BufferGetPage( buffer );
|
||||
page = BufferGetPage(buffer);
|
||||
Assert(GinPageIsData(page));
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
PageDeletePostingItem(page, data->parentOffset);
|
||||
@@ -375,9 +416,10 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) {
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
if ( !( record->xl_info & XLR_BKP_BLOCK_2) && data->leftBlkno != InvalidBlockNumber ) {
|
||||
if (!(record->xl_info & XLR_BKP_BLOCK_2) && data->leftBlkno != InvalidBlockNumber)
|
||||
{
|
||||
buffer = XLogReadBuffer(reln, data->leftBlkno, false);
|
||||
page = BufferGetPage( buffer );
|
||||
page = BufferGetPage(buffer);
|
||||
Assert(GinPageIsData(page));
|
||||
GinPageGetOpaque(page)->rightlink = data->rightLink;
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -387,28 +429,30 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record) {
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gin_redo(XLogRecPtr lsn, XLogRecord *record) {
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
void
|
||||
gin_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
|
||||
topCtx = MemoryContextSwitchTo(opCtx);
|
||||
switch (info) {
|
||||
case XLOG_GIN_CREATE_INDEX:
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_GIN_CREATE_INDEX:
|
||||
ginRedoCreateIndex(lsn, record);
|
||||
break;
|
||||
case XLOG_GIN_CREATE_PTREE:
|
||||
case XLOG_GIN_CREATE_PTREE:
|
||||
ginRedoCreatePTree(lsn, record);
|
||||
break;
|
||||
case XLOG_GIN_INSERT:
|
||||
case XLOG_GIN_INSERT:
|
||||
ginRedoInsert(lsn, record);
|
||||
break;
|
||||
case XLOG_GIN_SPLIT:
|
||||
case XLOG_GIN_SPLIT:
|
||||
ginRedoSplit(lsn, record);
|
||||
break;
|
||||
case XLOG_GIN_VACUUM_PAGE:
|
||||
case XLOG_GIN_VACUUM_PAGE:
|
||||
ginRedoVacuumPage(lsn, record);
|
||||
break;
|
||||
case XLOG_GIN_DELETE_PAGE:
|
||||
case XLOG_GIN_DELETE_PAGE:
|
||||
ginRedoDeletePage(lsn, record);
|
||||
break;
|
||||
default:
|
||||
@@ -419,110 +463,122 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record) {
|
||||
}
|
||||
|
||||
static void
|
||||
desc_node( StringInfo buf, RelFileNode node, BlockNumber blkno ) {
|
||||
appendStringInfo(buf,"node: %u/%u/%u blkno: %u",
|
||||
node.spcNode, node.dbNode, node.relNode, blkno);
|
||||
desc_node(StringInfo buf, RelFileNode node, BlockNumber blkno)
|
||||
{
|
||||
appendStringInfo(buf, "node: %u/%u/%u blkno: %u",
|
||||
node.spcNode, node.dbNode, node.relNode, blkno);
|
||||
}
|
||||
|
||||
void
|
||||
gin_desc(StringInfo buf, uint8 xl_info, char *rec) {
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
void
|
||||
gin_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
{
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
|
||||
switch (info) {
|
||||
case XLOG_GIN_CREATE_INDEX:
|
||||
appendStringInfo(buf,"Create index, ");
|
||||
desc_node(buf, *(RelFileNode*)rec, GIN_ROOT_BLKNO );
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_GIN_CREATE_INDEX:
|
||||
appendStringInfo(buf, "Create index, ");
|
||||
desc_node(buf, *(RelFileNode *) rec, GIN_ROOT_BLKNO);
|
||||
break;
|
||||
case XLOG_GIN_CREATE_PTREE:
|
||||
appendStringInfo(buf,"Create posting tree, ");
|
||||
desc_node(buf, ((ginxlogCreatePostingTree*)rec)->node, ((ginxlogCreatePostingTree*)rec)->blkno );
|
||||
case XLOG_GIN_CREATE_PTREE:
|
||||
appendStringInfo(buf, "Create posting tree, ");
|
||||
desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
|
||||
break;
|
||||
case XLOG_GIN_INSERT:
|
||||
appendStringInfo(buf,"Insert item, ");
|
||||
desc_node(buf, ((ginxlogInsert*)rec)->node, ((ginxlogInsert*)rec)->blkno );
|
||||
appendStringInfo(buf," offset: %u nitem: %u isdata: %c isleaf %c isdelete %c updateBlkno:%u",
|
||||
((ginxlogInsert*)rec)->offset,
|
||||
((ginxlogInsert*)rec)->nitem,
|
||||
( ((ginxlogInsert*)rec)->isData ) ? 'T' : 'F',
|
||||
( ((ginxlogInsert*)rec)->isLeaf ) ? 'T' : 'F',
|
||||
( ((ginxlogInsert*)rec)->isDelete ) ? 'T' : 'F',
|
||||
((ginxlogInsert*)rec)->updateBlkno
|
||||
);
|
||||
case XLOG_GIN_INSERT:
|
||||
appendStringInfo(buf, "Insert item, ");
|
||||
desc_node(buf, ((ginxlogInsert *) rec)->node, ((ginxlogInsert *) rec)->blkno);
|
||||
appendStringInfo(buf, " offset: %u nitem: %u isdata: %c isleaf %c isdelete %c updateBlkno:%u",
|
||||
((ginxlogInsert *) rec)->offset,
|
||||
((ginxlogInsert *) rec)->nitem,
|
||||
(((ginxlogInsert *) rec)->isData) ? 'T' : 'F',
|
||||
(((ginxlogInsert *) rec)->isLeaf) ? 'T' : 'F',
|
||||
(((ginxlogInsert *) rec)->isDelete) ? 'T' : 'F',
|
||||
((ginxlogInsert *) rec)->updateBlkno
|
||||
);
|
||||
|
||||
break;
|
||||
case XLOG_GIN_SPLIT:
|
||||
appendStringInfo(buf,"Page split, ");
|
||||
desc_node(buf, ((ginxlogSplit*)rec)->node, ((ginxlogSplit*)rec)->lblkno );
|
||||
appendStringInfo(buf," isrootsplit: %c", ( ((ginxlogSplit*)rec)->isRootSplit ) ? 'T' : 'F');
|
||||
case XLOG_GIN_SPLIT:
|
||||
appendStringInfo(buf, "Page split, ");
|
||||
desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
|
||||
appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->isRootSplit) ? 'T' : 'F');
|
||||
break;
|
||||
case XLOG_GIN_VACUUM_PAGE:
|
||||
appendStringInfo(buf,"Vacuum page, ");
|
||||
desc_node(buf, ((ginxlogVacuumPage*)rec)->node, ((ginxlogVacuumPage*)rec)->blkno );
|
||||
case XLOG_GIN_VACUUM_PAGE:
|
||||
appendStringInfo(buf, "Vacuum page, ");
|
||||
desc_node(buf, ((ginxlogVacuumPage *) rec)->node, ((ginxlogVacuumPage *) rec)->blkno);
|
||||
break;
|
||||
case XLOG_GIN_DELETE_PAGE:
|
||||
appendStringInfo(buf,"Delete page, ");
|
||||
desc_node(buf, ((ginxlogDeletePage*)rec)->node, ((ginxlogDeletePage*)rec)->blkno );
|
||||
case XLOG_GIN_DELETE_PAGE:
|
||||
appendStringInfo(buf, "Delete page, ");
|
||||
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
|
||||
break;
|
||||
default:
|
||||
elog(PANIC, "gin_desc: unknown op code %u", info);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gin_xlog_startup(void) {
|
||||
void
|
||||
gin_xlog_startup(void)
|
||||
{
|
||||
incomplete_splits = NIL;
|
||||
|
||||
opCtx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"GIN recovery temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
"GIN recovery temporary context",
|
||||
ALLOCSET_DEFAULT_MINSIZE,
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
ginContinueSplit( ginIncompleteSplit *split ) {
|
||||
ginContinueSplit(ginIncompleteSplit *split)
|
||||
{
|
||||
GinBtreeData btree;
|
||||
Relation reln;
|
||||
Buffer buffer;
|
||||
GinBtreeStack stack;
|
||||
GinBtreeStack stack;
|
||||
|
||||
/* elog(NOTICE,"ginContinueSplit root:%u l:%u r:%u", split->rootBlkno, split->leftBlkno, split->rightBlkno); */
|
||||
/*
|
||||
* elog(NOTICE,"ginContinueSplit root:%u l:%u r:%u", split->rootBlkno,
|
||||
* split->leftBlkno, split->rightBlkno);
|
||||
*/
|
||||
reln = XLogOpenRelation(split->node);
|
||||
|
||||
buffer = XLogReadBuffer(reln, split->leftBlkno, false);
|
||||
buffer = XLogReadBuffer(reln, split->leftBlkno, false);
|
||||
|
||||
if ( split->rootBlkno == GIN_ROOT_BLKNO ) {
|
||||
prepareEntryScan( &btree, reln, (Datum)0, NULL );
|
||||
btree.entry = ginPageGetLinkItup( buffer );
|
||||
} else {
|
||||
Page page = BufferGetPage( buffer );
|
||||
if (split->rootBlkno == GIN_ROOT_BLKNO)
|
||||
{
|
||||
prepareEntryScan(&btree, reln, (Datum) 0, NULL);
|
||||
btree.entry = ginPageGetLinkItup(buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
|
||||
prepareDataScan( &btree, reln );
|
||||
prepareDataScan(&btree, reln);
|
||||
|
||||
PostingItemSetBlockNumber( &(btree.pitem), split->leftBlkno );
|
||||
if ( GinPageIsLeaf(page) )
|
||||
btree.pitem.key = *(ItemPointerData*)GinDataPageGetItem(page,
|
||||
GinPageGetOpaque(page)->maxoff);
|
||||
PostingItemSetBlockNumber(&(btree.pitem), split->leftBlkno);
|
||||
if (GinPageIsLeaf(page))
|
||||
btree.pitem.key = *(ItemPointerData *) GinDataPageGetItem(page,
|
||||
GinPageGetOpaque(page)->maxoff);
|
||||
else
|
||||
btree.pitem.key = ((PostingItem*)GinDataPageGetItem(page,
|
||||
GinPageGetOpaque(page)->maxoff))->key;
|
||||
btree.pitem.key = ((PostingItem *) GinDataPageGetItem(page,
|
||||
GinPageGetOpaque(page)->maxoff))->key;
|
||||
}
|
||||
|
||||
btree.rightblkno = split->rightBlkno;
|
||||
btree.rightblkno = split->rightBlkno;
|
||||
|
||||
stack.blkno = split->leftBlkno;
|
||||
stack.buffer = buffer;
|
||||
stack.off = InvalidOffsetNumber;
|
||||
stack.parent = NULL;
|
||||
|
||||
findParents( &btree, &stack, split->rootBlkno);
|
||||
ginInsertValue( &btree, stack.parent );
|
||||
findParents(&btree, &stack, split->rootBlkno);
|
||||
ginInsertValue(&btree, stack.parent);
|
||||
|
||||
UnlockReleaseBuffer( buffer );
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
void
|
||||
gin_xlog_cleanup(void) {
|
||||
void
|
||||
gin_xlog_cleanup(void)
|
||||
{
|
||||
ListCell *l;
|
||||
MemoryContext topCtx;
|
||||
|
||||
@@ -531,8 +587,9 @@ gin_xlog_cleanup(void) {
|
||||
foreach(l, incomplete_splits)
|
||||
{
|
||||
ginIncompleteSplit *split = (ginIncompleteSplit *) lfirst(l);
|
||||
ginContinueSplit( split );
|
||||
MemoryContextReset( opCtx );
|
||||
|
||||
ginContinueSplit(split);
|
||||
MemoryContextReset(opCtx);
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(topCtx);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.142 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.143 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -185,7 +185,7 @@ gistbuildCallback(Relation index,
|
||||
|
||||
/* form an index tuple and point it at the heap tuple */
|
||||
itup = gistFormTuple(&buildstate->giststate, index,
|
||||
values, isnull, true /* size is currently bogus */);
|
||||
values, isnull, true /* size is currently bogus */ );
|
||||
itup->t_tid = htup->t_self;
|
||||
|
||||
/*
|
||||
@@ -199,7 +199,7 @@ gistbuildCallback(Relation index,
|
||||
* after initial build do not.
|
||||
*/
|
||||
gistdoinsert(index, itup,
|
||||
RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
|
||||
RelationGetTargetPageFreeSpace(index, GIST_DEFAULT_FILLFACTOR),
|
||||
&buildstate->giststate);
|
||||
|
||||
buildstate->indtuples += 1;
|
||||
@@ -236,7 +236,7 @@ gistinsert(PG_FUNCTION_ARGS)
|
||||
initGISTstate(&giststate, r);
|
||||
|
||||
itup = gistFormTuple(&giststate, r,
|
||||
values, isnull, true /* size is currently bogus */);
|
||||
values, isnull, true /* size is currently bogus */ );
|
||||
itup->t_tid = *ht_ctid;
|
||||
|
||||
gistdoinsert(r, itup, 0, &giststate);
|
||||
@@ -285,18 +285,17 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
|
||||
bool is_leaf = (GistPageIsLeaf(state->stack->page)) ? true : false;
|
||||
|
||||
/*
|
||||
* if (!is_leaf) remove old key:
|
||||
* This node's key has been modified, either because a child split
|
||||
* occurred or because we needed to adjust our key for an insert in a
|
||||
* child node. Therefore, remove the old version of this node's key.
|
||||
* if (!is_leaf) remove old key: This node's key has been modified, either
|
||||
* because a child split occurred or because we needed to adjust our key
|
||||
* for an insert in a child node. Therefore, remove the old version of
|
||||
* this node's key.
|
||||
*
|
||||
* for WAL replay, in the non-split case we handle this by
|
||||
* setting up a one-element todelete array; in the split case, it's
|
||||
* handled implicitly because the tuple vector passed to gistSplit
|
||||
* won't include this tuple.
|
||||
* for WAL replay, in the non-split case we handle this by setting up a
|
||||
* one-element todelete array; in the split case, it's handled implicitly
|
||||
* because the tuple vector passed to gistSplit won't include this tuple.
|
||||
*
|
||||
* XXX: If we want to change fillfactors between node and leaf,
|
||||
* fillfactor = (is_leaf ? state->leaf_fillfactor : state->node_fillfactor)
|
||||
* XXX: If we want to change fillfactors between node and leaf, fillfactor
|
||||
* = (is_leaf ? state->leaf_fillfactor : state->node_fillfactor)
|
||||
*/
|
||||
if (gistnospace(state->stack->page, state->itup, state->ituplen,
|
||||
is_leaf ? InvalidOffsetNumber : state->stack->childoffnum,
|
||||
@@ -307,80 +306,88 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
|
||||
int tlen;
|
||||
SplitedPageLayout *dist = NULL,
|
||||
*ptr;
|
||||
BlockNumber rrlink = InvalidBlockNumber;
|
||||
BlockNumber rrlink = InvalidBlockNumber;
|
||||
GistNSN oldnsn;
|
||||
|
||||
is_splitted = true;
|
||||
|
||||
/*
|
||||
* Form index tuples vector to split:
|
||||
* remove old tuple if t's needed and add new tuples to vector
|
||||
* Form index tuples vector to split: remove old tuple if t's needed
|
||||
* and add new tuples to vector
|
||||
*/
|
||||
itvec = gistextractpage(state->stack->page, &tlen);
|
||||
if ( !is_leaf ) {
|
||||
if (!is_leaf)
|
||||
{
|
||||
/* on inner page we should remove old tuple */
|
||||
int pos = state->stack->childoffnum - FirstOffsetNumber;
|
||||
int pos = state->stack->childoffnum - FirstOffsetNumber;
|
||||
|
||||
tlen--;
|
||||
if ( pos != tlen )
|
||||
memmove( itvec+pos, itvec + pos + 1, sizeof( IndexTuple ) * (tlen-pos) );
|
||||
tlen--;
|
||||
if (pos != tlen)
|
||||
memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
|
||||
}
|
||||
itvec = gistjoinvector(itvec, &tlen, state->itup, state->ituplen);
|
||||
dist = gistSplit(state->r, state->stack->page, itvec, tlen, giststate);
|
||||
|
||||
state->itup = (IndexTuple*)palloc( sizeof(IndexTuple) * tlen);
|
||||
state->itup = (IndexTuple *) palloc(sizeof(IndexTuple) * tlen);
|
||||
state->ituplen = 0;
|
||||
|
||||
if (state->stack->blkno != GIST_ROOT_BLKNO) {
|
||||
/* if non-root split then we should not allocate new buffer,
|
||||
but we must create temporary page to operate */
|
||||
if (state->stack->blkno != GIST_ROOT_BLKNO)
|
||||
{
|
||||
/*
|
||||
* if non-root split then we should not allocate new buffer, but
|
||||
* we must create temporary page to operate
|
||||
*/
|
||||
dist->buffer = state->stack->buffer;
|
||||
dist->page = PageGetTempPage( BufferGetPage(dist->buffer), sizeof(GISTPageOpaqueData) );
|
||||
dist->page = PageGetTempPage(BufferGetPage(dist->buffer), sizeof(GISTPageOpaqueData));
|
||||
|
||||
/*clean all flags except F_LEAF */
|
||||
/* clean all flags except F_LEAF */
|
||||
GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
|
||||
}
|
||||
|
||||
/* make new pages and fills them */
|
||||
for (ptr = dist; ptr; ptr = ptr->next) {
|
||||
int i;
|
||||
char *data;
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
{
|
||||
int i;
|
||||
char *data;
|
||||
|
||||
/* get new page */
|
||||
if ( ptr->buffer == InvalidBuffer ) {
|
||||
ptr->buffer = gistNewBuffer( state->r );
|
||||
GISTInitBuffer( ptr->buffer, (is_leaf) ? F_LEAF : 0 );
|
||||
if (ptr->buffer == InvalidBuffer)
|
||||
{
|
||||
ptr->buffer = gistNewBuffer(state->r);
|
||||
GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
|
||||
ptr->page = BufferGetPage(ptr->buffer);
|
||||
}
|
||||
ptr->block.blkno = BufferGetBlockNumber( ptr->buffer );
|
||||
ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
|
||||
|
||||
/* fill page, we can do it becouse all this pages are new (ie not linked in tree
|
||||
or masked by temp page */
|
||||
data = (char*)(ptr->list);
|
||||
for(i=0;i<ptr->block.num;i++) {
|
||||
if ( PageAddItem(ptr->page, (Item)data, IndexTupleSize((IndexTuple)data), i+FirstOffsetNumber, LP_USED) == InvalidOffsetNumber )
|
||||
/*
|
||||
* fill page, we can do it becouse all this pages are new (ie not
|
||||
* linked in tree or masked by temp page
|
||||
*/
|
||||
data = (char *) (ptr->list);
|
||||
for (i = 0; i < ptr->block.num; i++)
|
||||
{
|
||||
if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->r));
|
||||
data += IndexTupleSize((IndexTuple)data);
|
||||
data += IndexTupleSize((IndexTuple) data);
|
||||
}
|
||||
|
||||
/* set up ItemPointer and remmeber it for parent */
|
||||
ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
|
||||
state->itup[ state->ituplen ] = ptr->itup;
|
||||
state->itup[state->ituplen] = ptr->itup;
|
||||
state->ituplen++;
|
||||
}
|
||||
|
||||
/* saves old rightlink */
|
||||
if ( state->stack->blkno != GIST_ROOT_BLKNO )
|
||||
rrlink = GistPageGetOpaque(dist->page)->rightlink;
|
||||
if (state->stack->blkno != GIST_ROOT_BLKNO)
|
||||
rrlink = GistPageGetOpaque(dist->page)->rightlink;
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* must mark buffers dirty before XLogInsert, even though we'll
|
||||
* still be changing their opaque fields below.
|
||||
* set up right links.
|
||||
* must mark buffers dirty before XLogInsert, even though we'll still
|
||||
* be changing their opaque fields below. set up right links.
|
||||
*/
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
{
|
||||
MarkBufferDirty(ptr->buffer);
|
||||
GistPageGetOpaque(ptr->page)->rightlink = (ptr->next) ?
|
||||
@@ -388,9 +395,10 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
|
||||
}
|
||||
|
||||
/* restore splitted non-root page */
|
||||
if ( state->stack->blkno != GIST_ROOT_BLKNO ) {
|
||||
PageRestoreTempPage( dist->page, BufferGetPage( dist->buffer ) );
|
||||
dist->page = BufferGetPage( dist->buffer );
|
||||
if (state->stack->blkno != GIST_ROOT_BLKNO)
|
||||
{
|
||||
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
|
||||
dist->page = BufferGetPage(dist->buffer);
|
||||
}
|
||||
|
||||
if (!state->r->rd_istemp)
|
||||
@@ -419,25 +427,27 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
|
||||
|
||||
/* set up NSN */
|
||||
oldnsn = GistPageGetOpaque(dist->page)->nsn;
|
||||
if ( state->stack->blkno == GIST_ROOT_BLKNO )
|
||||
if (state->stack->blkno == GIST_ROOT_BLKNO)
|
||||
/* if root split we should put initial value */
|
||||
oldnsn = PageGetLSN(dist->page);
|
||||
|
||||
for (ptr = dist; ptr; ptr = ptr->next) {
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
{
|
||||
/* only for last set oldnsn */
|
||||
GistPageGetOpaque(ptr->page)->nsn = (ptr->next) ?
|
||||
PageGetLSN(ptr->page) : oldnsn;
|
||||
}
|
||||
|
||||
/*
|
||||
* release buffers, if it was a root split then
|
||||
* release all buffers because we create all buffers
|
||||
/*
|
||||
* release buffers, if it was a root split then release all buffers
|
||||
* because we create all buffers
|
||||
*/
|
||||
ptr = ( state->stack->blkno == GIST_ROOT_BLKNO ) ? dist : dist->next;
|
||||
for(; ptr; ptr = ptr->next)
|
||||
ptr = (state->stack->blkno == GIST_ROOT_BLKNO) ? dist : dist->next;
|
||||
for (; ptr; ptr = ptr->next)
|
||||
UnlockReleaseBuffer(ptr->buffer);
|
||||
|
||||
if (state->stack->blkno == GIST_ROOT_BLKNO) {
|
||||
if (state->stack->blkno == GIST_ROOT_BLKNO)
|
||||
{
|
||||
gistnewroot(state->r, state->stack->buffer, state->itup, state->ituplen, &(state->key));
|
||||
state->needInsertComplete = false;
|
||||
}
|
||||
@@ -470,7 +480,7 @@ gistplacetopage(GISTInsertState *state, GISTSTATE *giststate)
|
||||
}
|
||||
|
||||
rdata = formUpdateRdata(state->r->rd_node, state->stack->buffer,
|
||||
offs, noffs,
|
||||
offs, noffs,
|
||||
state->itup, state->ituplen,
|
||||
&(state->key));
|
||||
|
||||
@@ -922,16 +932,16 @@ gistSplit(Relation r,
|
||||
GistSplitVector v;
|
||||
GistEntryVector *entryvec;
|
||||
int i;
|
||||
SplitedPageLayout *res = NULL;
|
||||
SplitedPageLayout *res = NULL;
|
||||
|
||||
/* generate the item array */
|
||||
entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
|
||||
entryvec->n = len + 1;
|
||||
|
||||
memset( v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts );
|
||||
memset( v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts );
|
||||
gistSplitByKey(r, page, itup, len, giststate,
|
||||
&v, entryvec, 0);
|
||||
memset(v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
|
||||
memset(v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
|
||||
gistSplitByKey(r, page, itup, len, giststate,
|
||||
&v, entryvec, 0);
|
||||
|
||||
/* form left and right vector */
|
||||
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
|
||||
@@ -952,19 +962,20 @@ gistSplit(Relation r,
|
||||
{
|
||||
ROTATEDIST(res);
|
||||
res->block.num = v.splitVector.spl_nright;
|
||||
res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &( res->lenlist ) );
|
||||
res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
|
||||
res->itup = (v.spl_rightvalid) ? gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false)
|
||||
: gist_form_invalid_tuple(GIST_ROOT_BLKNO);
|
||||
}
|
||||
|
||||
if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
|
||||
{
|
||||
SplitedPageLayout *resptr, *subres;
|
||||
SplitedPageLayout *resptr,
|
||||
*subres;
|
||||
|
||||
resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
|
||||
|
||||
/* install on list's tail */
|
||||
while( resptr->next )
|
||||
/* install on list's tail */
|
||||
while (resptr->next)
|
||||
resptr = resptr->next;
|
||||
|
||||
resptr->next = res;
|
||||
@@ -974,7 +985,7 @@ gistSplit(Relation r,
|
||||
{
|
||||
ROTATEDIST(res);
|
||||
res->block.num = v.splitVector.spl_nleft;
|
||||
res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &( res->lenlist ) );
|
||||
res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
|
||||
res->itup = (v.spl_leftvalid) ? gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false)
|
||||
: gist_form_invalid_tuple(GIST_ROOT_BLKNO);
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.60 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.61 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -360,8 +360,7 @@ gistindex_keytest(IndexTuple tuple,
|
||||
IncrIndexProcessed();
|
||||
|
||||
/*
|
||||
* Tuple doesn't restore after crash recovery because of incomplete
|
||||
* insert
|
||||
* Tuple doesn't restore after crash recovery because of incomplete insert
|
||||
*/
|
||||
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
|
||||
return true;
|
||||
@@ -378,14 +377,18 @@ gistindex_keytest(IndexTuple tuple,
|
||||
giststate->tupdesc,
|
||||
&isNull);
|
||||
|
||||
if ( key->sk_flags & SK_ISNULL ) {
|
||||
/* is the compared-to datum NULL? on non-leaf page it's possible
|
||||
to have nulls in childs :( */
|
||||
if (key->sk_flags & SK_ISNULL)
|
||||
{
|
||||
/*
|
||||
* is the compared-to datum NULL? on non-leaf page it's possible
|
||||
* to have nulls in childs :(
|
||||
*/
|
||||
|
||||
if ( isNull || !GistPageIsLeaf(p) )
|
||||
if (isNull || !GistPageIsLeaf(p))
|
||||
return true;
|
||||
return false;
|
||||
} else if ( isNull )
|
||||
}
|
||||
else if (isNull)
|
||||
return false;
|
||||
|
||||
gistdentryinit(giststate, key->sk_attno - 1, &de,
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.8 2006/09/10 00:29:34 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistproc.c,v 1.9 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -112,7 +112,8 @@ gist_box_consistent(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
static void
|
||||
adjustBox( BOX *b, BOX *addon ) {
|
||||
adjustBox(BOX *b, BOX *addon)
|
||||
{
|
||||
if (b->high.x < addon->high.x)
|
||||
b->high.x = addon->high.x;
|
||||
if (b->low.x > addon->low.x)
|
||||
@@ -146,7 +147,7 @@ gist_box_union(PG_FUNCTION_ARGS)
|
||||
for (i = 1; i < numranges; i++)
|
||||
{
|
||||
cur = DatumGetBoxP(entryvec->vector[i].key);
|
||||
adjustBox( pageunion, cur );
|
||||
adjustBox(pageunion, cur);
|
||||
}
|
||||
*sizep = sizeof(BOX);
|
||||
|
||||
@@ -210,67 +211,79 @@ compare_KB(const void *a, const void *b)
|
||||
}
|
||||
|
||||
static void
|
||||
chooseLR( GIST_SPLITVEC *v,
|
||||
OffsetNumber *list1, int nlist1, BOX *union1,
|
||||
OffsetNumber *list2, int nlist2, BOX *union2 )
|
||||
chooseLR(GIST_SPLITVEC *v,
|
||||
OffsetNumber *list1, int nlist1, BOX *union1,
|
||||
OffsetNumber *list2, int nlist2, BOX *union2)
|
||||
{
|
||||
bool firstToLeft = true;
|
||||
bool firstToLeft = true;
|
||||
|
||||
if ( v->spl_ldatum_exists || v->spl_rdatum_exists ) {
|
||||
if ( v->spl_ldatum_exists && v->spl_rdatum_exists ) {
|
||||
BOX LRl = *union1, LRr = *union2;
|
||||
BOX RLl = *union2, RLr = *union1;
|
||||
double sizeLR, sizeRL;
|
||||
if (v->spl_ldatum_exists || v->spl_rdatum_exists)
|
||||
{
|
||||
if (v->spl_ldatum_exists && v->spl_rdatum_exists)
|
||||
{
|
||||
BOX LRl = *union1,
|
||||
LRr = *union2;
|
||||
BOX RLl = *union2,
|
||||
RLr = *union1;
|
||||
double sizeLR,
|
||||
sizeRL;
|
||||
|
||||
adjustBox( &LRl, DatumGetBoxP( v->spl_ldatum ) );
|
||||
adjustBox( &LRr, DatumGetBoxP( v->spl_rdatum ) );
|
||||
adjustBox( &RLl, DatumGetBoxP( v->spl_ldatum ) );
|
||||
adjustBox( &RLr, DatumGetBoxP( v->spl_rdatum ) );
|
||||
adjustBox(&LRl, DatumGetBoxP(v->spl_ldatum));
|
||||
adjustBox(&LRr, DatumGetBoxP(v->spl_rdatum));
|
||||
adjustBox(&RLl, DatumGetBoxP(v->spl_ldatum));
|
||||
adjustBox(&RLr, DatumGetBoxP(v->spl_rdatum));
|
||||
|
||||
sizeLR = size_box( DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr)) );
|
||||
sizeRL = size_box( DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr)) );
|
||||
sizeLR = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&LRl), BoxPGetDatum(&LRr)));
|
||||
sizeRL = size_box(DirectFunctionCall2(rt_box_inter, BoxPGetDatum(&RLl), BoxPGetDatum(&RLr)));
|
||||
|
||||
if ( sizeLR > sizeRL )
|
||||
if (sizeLR > sizeRL)
|
||||
firstToLeft = false;
|
||||
|
||||
} else {
|
||||
float p1, p2;
|
||||
GISTENTRY oldUnion, addon;
|
||||
}
|
||||
else
|
||||
{
|
||||
float p1,
|
||||
p2;
|
||||
GISTENTRY oldUnion,
|
||||
addon;
|
||||
|
||||
gistentryinit(oldUnion, ( v->spl_ldatum_exists ) ? v->spl_ldatum : v->spl_rdatum,
|
||||
gistentryinit(oldUnion, (v->spl_ldatum_exists) ? v->spl_ldatum : v->spl_rdatum,
|
||||
NULL, NULL, InvalidOffsetNumber, FALSE);
|
||||
|
||||
|
||||
gistentryinit(addon, BoxPGetDatum(union1), NULL, NULL, InvalidOffsetNumber, FALSE);
|
||||
DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&union1), PointerGetDatum(&p1));
|
||||
DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&union1), PointerGetDatum(&p1));
|
||||
gistentryinit(addon, BoxPGetDatum(union2), NULL, NULL, InvalidOffsetNumber, FALSE);
|
||||
DirectFunctionCall3(gist_box_penalty, PointerGetDatum(&oldUnion), PointerGetDatum(&union2), PointerGetDatum(&p2));
|
||||
|
||||
if ( (v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2) )
|
||||
firstToLeft = false;
|
||||
if ((v->spl_ldatum_exists && p1 > p2) || (v->spl_rdatum_exists && p1 < p2))
|
||||
firstToLeft = false;
|
||||
}
|
||||
}
|
||||
|
||||
if ( firstToLeft ) {
|
||||
if (firstToLeft)
|
||||
{
|
||||
v->spl_left = list1;
|
||||
v->spl_right = list2;
|
||||
v->spl_nleft = nlist1;
|
||||
v->spl_nright = nlist2;
|
||||
if ( v->spl_ldatum_exists )
|
||||
adjustBox(union1, DatumGetBoxP( v->spl_ldatum ) );
|
||||
if (v->spl_ldatum_exists)
|
||||
adjustBox(union1, DatumGetBoxP(v->spl_ldatum));
|
||||
v->spl_ldatum = BoxPGetDatum(union1);
|
||||
if ( v->spl_rdatum_exists )
|
||||
adjustBox(union2, DatumGetBoxP( v->spl_rdatum ) );
|
||||
if (v->spl_rdatum_exists)
|
||||
adjustBox(union2, DatumGetBoxP(v->spl_rdatum));
|
||||
v->spl_rdatum = BoxPGetDatum(union2);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
v->spl_left = list2;
|
||||
v->spl_right = list1;
|
||||
v->spl_nleft = nlist2;
|
||||
v->spl_nright = nlist1;
|
||||
if ( v->spl_ldatum_exists )
|
||||
adjustBox(union2, DatumGetBoxP( v->spl_ldatum ) );
|
||||
if (v->spl_ldatum_exists)
|
||||
adjustBox(union2, DatumGetBoxP(v->spl_ldatum));
|
||||
v->spl_ldatum = BoxPGetDatum(union2);
|
||||
if ( v->spl_rdatum_exists )
|
||||
adjustBox(union1, DatumGetBoxP( v->spl_rdatum ) );
|
||||
if (v->spl_rdatum_exists)
|
||||
adjustBox(union1, DatumGetBoxP(v->spl_rdatum));
|
||||
v->spl_rdatum = BoxPGetDatum(union1);
|
||||
}
|
||||
|
||||
@@ -326,7 +339,7 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
|
||||
))
|
||||
allisequal = false;
|
||||
|
||||
adjustBox( &pageunion, cur );
|
||||
adjustBox(&pageunion, cur);
|
||||
}
|
||||
|
||||
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
||||
@@ -359,12 +372,12 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
|
||||
}
|
||||
}
|
||||
|
||||
if ( v->spl_ldatum_exists )
|
||||
adjustBox( unionL, DatumGetBoxP( v->spl_ldatum ) );
|
||||
if (v->spl_ldatum_exists)
|
||||
adjustBox(unionL, DatumGetBoxP(v->spl_ldatum));
|
||||
v->spl_ldatum = BoxPGetDatum(unionL);
|
||||
|
||||
if ( v->spl_rdatum_exists )
|
||||
adjustBox( unionR, DatumGetBoxP( v->spl_rdatum ) );
|
||||
if (v->spl_rdatum_exists)
|
||||
adjustBox(unionR, DatumGetBoxP(v->spl_rdatum));
|
||||
v->spl_rdatum = BoxPGetDatum(unionR);
|
||||
|
||||
v->spl_ldatum_exists = v->spl_rdatum_exists = false;
|
||||
@@ -471,13 +484,13 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
if (direction == 'x')
|
||||
chooseLR( v,
|
||||
listL, posL, unionL,
|
||||
listR, posR, unionR );
|
||||
else
|
||||
chooseLR( v,
|
||||
listB, posB, unionB,
|
||||
listT, posT, unionT );
|
||||
chooseLR(v,
|
||||
listL, posL, unionL,
|
||||
listR, posR, unionR);
|
||||
else
|
||||
chooseLR(v,
|
||||
listB, posB, unionB,
|
||||
listT, posT, unionT);
|
||||
|
||||
PG_RETURN_POINTER(v);
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.64 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistscan.c,v 1.65 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -228,12 +228,12 @@ gistendscan(PG_FUNCTION_ARGS)
|
||||
|
||||
static void
|
||||
gistfreestack(GISTSearchStack *s)
|
||||
{
|
||||
{
|
||||
while (s != NULL)
|
||||
{
|
||||
GISTSearchStack *p = s->next;
|
||||
|
||||
pfree(s);
|
||||
s = p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistsplit.c,v 1.2 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistsplit.c,v 1.3 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -16,12 +16,13 @@
|
||||
|
||||
#include "access/gist_private.h"
|
||||
|
||||
typedef struct {
|
||||
Datum *attr;
|
||||
int len;
|
||||
typedef struct
|
||||
{
|
||||
Datum *attr;
|
||||
int len;
|
||||
OffsetNumber *entries;
|
||||
bool *isnull;
|
||||
bool *equiv;
|
||||
bool *isnull;
|
||||
bool *equiv;
|
||||
} GistSplitUnion;
|
||||
|
||||
|
||||
@@ -29,25 +30,28 @@ typedef struct {
|
||||
* Forms unions of subkeys after page split, but
|
||||
* uses only tuples aren't in groups of equalent tuples
|
||||
*/
|
||||
static void
|
||||
gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
|
||||
GistSplitUnion *gsvp, int startkey) {
|
||||
IndexTuple *cleanedItVec;
|
||||
int i, cleanedLen=0;
|
||||
static void
|
||||
gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
|
||||
GistSplitUnion *gsvp, int startkey)
|
||||
{
|
||||
IndexTuple *cleanedItVec;
|
||||
int i,
|
||||
cleanedLen = 0;
|
||||
|
||||
cleanedItVec = (IndexTuple*)palloc(sizeof(IndexTuple) * gsvp->len);
|
||||
cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
|
||||
|
||||
for(i=0;i<gsvp->len;i++) {
|
||||
if ( gsvp->equiv && gsvp->equiv[gsvp->entries[i]])
|
||||
for (i = 0; i < gsvp->len; i++)
|
||||
{
|
||||
if (gsvp->equiv && gsvp->equiv[gsvp->entries[i]])
|
||||
continue;
|
||||
|
||||
cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
|
||||
}
|
||||
|
||||
gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, startkey,
|
||||
gsvp->attr, gsvp->isnull);
|
||||
gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, startkey,
|
||||
gsvp->attr, gsvp->isnull);
|
||||
|
||||
pfree( cleanedItVec );
|
||||
pfree(cleanedItVec);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -56,7 +60,7 @@ gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
|
||||
static void
|
||||
gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl, int attno)
|
||||
{
|
||||
GistSplitUnion gsvp;
|
||||
GistSplitUnion gsvp;
|
||||
|
||||
gsvp.equiv = spl->spl_equiv;
|
||||
|
||||
@@ -76,34 +80,40 @@ gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl, i
|
||||
}
|
||||
|
||||
/*
|
||||
* find group in vector with equivalent value
|
||||
* find group in vector with equivalent value
|
||||
*/
|
||||
static int
|
||||
gistfindgroup(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, GistSplitVector *spl, int attno)
|
||||
{
|
||||
int i;
|
||||
GISTENTRY entry;
|
||||
int len=0;
|
||||
int len = 0;
|
||||
|
||||
/*
|
||||
* attno key is always not null (see gistSplitByKey), so we may not check for
|
||||
* nulls
|
||||
* attno key is always not null (see gistSplitByKey), so we may not check
|
||||
* for nulls
|
||||
*/
|
||||
gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, (OffsetNumber) 0, FALSE);
|
||||
for (i = 0; i < spl->splitVector.spl_nleft; i++) {
|
||||
float penalty = gistpenalty(giststate, attno, &entry, false,
|
||||
&valvec[spl->splitVector.spl_left[i]], false);
|
||||
if ( penalty == 0.0 ) {
|
||||
for (i = 0; i < spl->splitVector.spl_nleft; i++)
|
||||
{
|
||||
float penalty = gistpenalty(giststate, attno, &entry, false,
|
||||
&valvec[spl->splitVector.spl_left[i]], false);
|
||||
|
||||
if (penalty == 0.0)
|
||||
{
|
||||
spl->spl_equiv[spl->splitVector.spl_left[i]] = true;
|
||||
len++;
|
||||
}
|
||||
}
|
||||
|
||||
gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, (OffsetNumber) 0, FALSE);
|
||||
for (i = 0; i < spl->splitVector.spl_nright; i++) {
|
||||
float penalty = gistpenalty(giststate, attno, &entry, false,
|
||||
&valvec[spl->splitVector.spl_right[i]], false);
|
||||
if ( penalty == 0.0 ) {
|
||||
for (i = 0; i < spl->splitVector.spl_nright; i++)
|
||||
{
|
||||
float penalty = gistpenalty(giststate, attno, &entry, false,
|
||||
&valvec[spl->splitVector.spl_right[i]], false);
|
||||
|
||||
if (penalty == 0.0)
|
||||
{
|
||||
spl->spl_equiv[spl->splitVector.spl_right[i]] = true;
|
||||
len++;
|
||||
}
|
||||
@@ -113,24 +123,32 @@ gistfindgroup(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, GistSplitVect
|
||||
}
|
||||
|
||||
static void
|
||||
cleanupOffsets( OffsetNumber *a, int *len, bool *equiv, int *LenEquiv ) {
|
||||
int curlen,i;
|
||||
OffsetNumber *curwpos;
|
||||
cleanupOffsets(OffsetNumber *a, int *len, bool *equiv, int *LenEquiv)
|
||||
{
|
||||
int curlen,
|
||||
i;
|
||||
OffsetNumber *curwpos;
|
||||
|
||||
curlen = *len;
|
||||
curwpos = a;
|
||||
for (i = 0; i < *len; i++) {
|
||||
if ( equiv[ a[i] ] == FALSE ) {
|
||||
for (i = 0; i < *len; i++)
|
||||
{
|
||||
if (equiv[a[i]] == FALSE)
|
||||
{
|
||||
*curwpos = a[i];
|
||||
curwpos++;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
/* corner case: we shouldn't make void array */
|
||||
if ( curlen==1 ) {
|
||||
equiv[ a[i] ] = FALSE; /* mark item as non-equivalent */
|
||||
i--; /* redo the same */
|
||||
if (curlen == 1)
|
||||
{
|
||||
equiv[a[i]] = FALSE; /* mark item as non-equivalent */
|
||||
i--; /* redo the same */
|
||||
*LenEquiv -= 1;
|
||||
continue;
|
||||
} else
|
||||
}
|
||||
else
|
||||
curlen--;
|
||||
}
|
||||
}
|
||||
@@ -139,33 +157,37 @@ cleanupOffsets( OffsetNumber *a, int *len, bool *equiv, int *LenEquiv ) {
|
||||
}
|
||||
|
||||
static void
|
||||
placeOne( Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno ) {
|
||||
placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno)
|
||||
{
|
||||
GISTENTRY identry[INDEX_MAX_KEYS];
|
||||
bool isnull[INDEX_MAX_KEYS];
|
||||
bool toLeft = true;
|
||||
bool toLeft = true;
|
||||
|
||||
gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, identry, isnull);
|
||||
|
||||
for(;attno<giststate->tupdesc->natts;attno++) {
|
||||
float lpenalty, rpenalty;
|
||||
for (; attno < giststate->tupdesc->natts; attno++)
|
||||
{
|
||||
float lpenalty,
|
||||
rpenalty;
|
||||
GISTENTRY entry;
|
||||
|
||||
gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
|
||||
lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry+attno, isnull[ attno ]);
|
||||
gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
|
||||
rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry+attno, isnull[ attno ]);
|
||||
gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
|
||||
lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry + attno, isnull[attno]);
|
||||
gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
|
||||
rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry + attno, isnull[attno]);
|
||||
|
||||
if ( lpenalty != rpenalty ) {
|
||||
if ( lpenalty > rpenalty )
|
||||
if (lpenalty != rpenalty)
|
||||
{
|
||||
if (lpenalty > rpenalty)
|
||||
toLeft = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( toLeft )
|
||||
v->splitVector.spl_left[ v->splitVector.spl_nleft++ ] = off;
|
||||
if (toLeft)
|
||||
v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
|
||||
else
|
||||
v->splitVector.spl_right[ v->splitVector.spl_nright++ ] = off;
|
||||
v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
|
||||
}
|
||||
|
||||
#define SWAPVAR( s, d, t ) \
|
||||
@@ -176,71 +198,83 @@ do { \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
* adjust left and right unions according to splits by previous
|
||||
* split by firsts columns. This function is called only in case
|
||||
* adjust left and right unions according to splits by previous
|
||||
* split by firsts columns. This function is called only in case
|
||||
* when pickSplit doesn't support subspplit.
|
||||
*/
|
||||
|
||||
static void
|
||||
supportSecondarySplit( Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC *sv, Datum oldL, Datum oldR ) {
|
||||
bool leaveOnLeft = true, tmpBool;
|
||||
GISTENTRY entryL, entryR, entrySL, entrySR;
|
||||
|
||||
gistentryinit(entryL, oldL, r, NULL, 0, FALSE);
|
||||
gistentryinit(entryR, oldR, r, NULL, 0, FALSE);
|
||||
gistentryinit(entrySL, sv->spl_ldatum , r, NULL, 0, FALSE);
|
||||
gistentryinit(entrySR, sv->spl_rdatum , r, NULL, 0, FALSE);
|
||||
supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
|
||||
{
|
||||
bool leaveOnLeft = true,
|
||||
tmpBool;
|
||||
GISTENTRY entryL,
|
||||
entryR,
|
||||
entrySL,
|
||||
entrySR;
|
||||
|
||||
if ( sv->spl_ldatum_exists && sv->spl_rdatum_exists ) {
|
||||
float penalty1, penalty2;
|
||||
gistentryinit(entryL, oldL, r, NULL, 0, FALSE);
|
||||
gistentryinit(entryR, oldR, r, NULL, 0, FALSE);
|
||||
gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
|
||||
gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
|
||||
|
||||
if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
|
||||
{
|
||||
float penalty1,
|
||||
penalty2;
|
||||
|
||||
penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
|
||||
gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
|
||||
gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
|
||||
penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
|
||||
gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
|
||||
gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
|
||||
|
||||
if ( penalty1 > penalty2 )
|
||||
if (penalty1 > penalty2)
|
||||
leaveOnLeft = false;
|
||||
|
||||
} else {
|
||||
GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
|
||||
float penalty1, penalty2;
|
||||
}
|
||||
else
|
||||
{
|
||||
GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
|
||||
float penalty1,
|
||||
penalty2;
|
||||
|
||||
/*
|
||||
* there is only one previously defined union,
|
||||
* so we just choose swap or not by lowest penalty
|
||||
* there is only one previously defined union, so we just choose swap
|
||||
* or not by lowest penalty
|
||||
*/
|
||||
|
||||
penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
|
||||
penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
|
||||
|
||||
if ( penalty1 < penalty2 )
|
||||
leaveOnLeft = ( sv->spl_ldatum_exists ) ? true : false;
|
||||
if (penalty1 < penalty2)
|
||||
leaveOnLeft = (sv->spl_ldatum_exists) ? true : false;
|
||||
else
|
||||
leaveOnLeft = ( sv->spl_rdatum_exists ) ? true : false;
|
||||
leaveOnLeft = (sv->spl_rdatum_exists) ? true : false;
|
||||
}
|
||||
|
||||
if ( leaveOnLeft == false ) {
|
||||
if (leaveOnLeft == false)
|
||||
{
|
||||
/*
|
||||
* swap left and right
|
||||
* swap left and right
|
||||
*/
|
||||
OffsetNumber *off, noff;
|
||||
Datum datum;
|
||||
OffsetNumber *off,
|
||||
noff;
|
||||
Datum datum;
|
||||
|
||||
SWAPVAR( sv->spl_left, sv->spl_right, off );
|
||||
SWAPVAR( sv->spl_nleft, sv->spl_nright, noff );
|
||||
SWAPVAR( sv->spl_ldatum, sv->spl_rdatum, datum );
|
||||
gistentryinit(entrySL, sv->spl_ldatum , r, NULL, 0, FALSE);
|
||||
gistentryinit(entrySR, sv->spl_rdatum , r, NULL, 0, FALSE);
|
||||
SWAPVAR(sv->spl_left, sv->spl_right, off);
|
||||
SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
|
||||
SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
|
||||
gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
|
||||
gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
|
||||
}
|
||||
|
||||
if ( sv->spl_ldatum_exists )
|
||||
if (sv->spl_ldatum_exists)
|
||||
gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
|
||||
&sv->spl_ldatum, &tmpBool);
|
||||
&sv->spl_ldatum, &tmpBool);
|
||||
|
||||
if ( sv->spl_rdatum_exists )
|
||||
if (sv->spl_rdatum_exists)
|
||||
gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
|
||||
&sv->spl_rdatum, &tmpBool);
|
||||
&sv->spl_rdatum, &tmpBool);
|
||||
|
||||
sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
|
||||
}
|
||||
@@ -251,20 +285,21 @@ supportSecondarySplit( Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVE
|
||||
* get better split.
|
||||
* Returns TRUE and v->spl_equiv = NULL if left and right unions of attno columns are the same,
|
||||
* so caller may find better split
|
||||
* Returns TRUE and v->spl_equiv != NULL if there is tuples which may be freely moved
|
||||
* Returns TRUE and v->spl_equiv != NULL if there is tuples which may be freely moved
|
||||
*/
|
||||
static bool
|
||||
gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
|
||||
IndexTuple *itup, int len, GISTSTATE *giststate)
|
||||
{
|
||||
GIST_SPLITVEC *sv = &v->splitVector;
|
||||
|
||||
/*
|
||||
* now let the user-defined picksplit function set up the split vector; in
|
||||
* entryvec have no null value!!
|
||||
*/
|
||||
|
||||
sv->spl_ldatum_exists = ( v->spl_lisnull[ attno ] ) ? false : true;
|
||||
sv->spl_rdatum_exists = ( v->spl_risnull[ attno ] ) ? false : true;
|
||||
sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
|
||||
sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
|
||||
sv->spl_ldatum = v->spl_lattr[attno];
|
||||
sv->spl_rdatum = v->spl_rattr[attno];
|
||||
|
||||
@@ -278,11 +313,12 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
|
||||
if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
|
||||
sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
|
||||
|
||||
if( sv->spl_ldatum_exists || sv->spl_rdatum_exists ) {
|
||||
elog(LOG,"PickSplit method of %d columns of index '%s' doesn't support secondary split",
|
||||
attno + 1, RelationGetRelationName(r) );
|
||||
if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
|
||||
{
|
||||
elog(LOG, "PickSplit method of %d columns of index '%s' doesn't support secondary split",
|
||||
attno + 1, RelationGetRelationName(r));
|
||||
|
||||
supportSecondarySplit( r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno] );
|
||||
supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
|
||||
}
|
||||
|
||||
v->spl_lattr[attno] = sv->spl_ldatum;
|
||||
@@ -296,53 +332,64 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
|
||||
*/
|
||||
v->spl_equiv = NULL;
|
||||
|
||||
if (giststate->tupdesc->natts > 1 && attno+1 != giststate->tupdesc->natts)
|
||||
if (giststate->tupdesc->natts > 1 && attno + 1 != giststate->tupdesc->natts)
|
||||
{
|
||||
if ( gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum) ) {
|
||||
if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
|
||||
{
|
||||
/*
|
||||
* Left and right key's unions are equial, so
|
||||
* we can get better split by following columns. Note,
|
||||
* unions for attno columns are already done.
|
||||
* Left and right key's unions are equial, so we can get better
|
||||
* split by following columns. Note, unions for attno columns are
|
||||
* already done.
|
||||
*/
|
||||
|
||||
return true;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
int LenEquiv;
|
||||
|
||||
v->spl_equiv = (bool *) palloc0(sizeof(bool) * (entryvec->n+1));
|
||||
v->spl_equiv = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
|
||||
|
||||
LenEquiv = gistfindgroup(r, giststate, entryvec->vector, v, attno);
|
||||
|
||||
/*
|
||||
* if possible, we should distribute equivalent tuples
|
||||
*/
|
||||
if (LenEquiv == 0 ) {
|
||||
* if possible, we should distribute equivalent tuples
|
||||
*/
|
||||
if (LenEquiv == 0)
|
||||
{
|
||||
gistunionsubkey(giststate, itup, v, attno + 1);
|
||||
} else {
|
||||
cleanupOffsets( sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv );
|
||||
cleanupOffsets( sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv );
|
||||
}
|
||||
else
|
||||
{
|
||||
cleanupOffsets(sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv);
|
||||
cleanupOffsets(sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv);
|
||||
|
||||
gistunionsubkey(giststate, itup, v, attno + 1);
|
||||
if (LenEquiv == 1 ) {
|
||||
if (LenEquiv == 1)
|
||||
{
|
||||
/*
|
||||
* In case with one tuple we just choose left-right
|
||||
* by penalty. It's simplify user-defined pickSplit
|
||||
* In case with one tuple we just choose left-right by
|
||||
* penalty. It's simplify user-defined pickSplit
|
||||
*/
|
||||
OffsetNumber toMove = InvalidOffsetNumber;
|
||||
|
||||
for(toMove=FirstOffsetNumber;toMove<entryvec->n;toMove++)
|
||||
if ( v->spl_equiv[ toMove ] )
|
||||
for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
|
||||
if (v->spl_equiv[toMove])
|
||||
break;
|
||||
Assert( toMove < entryvec->n );
|
||||
|
||||
placeOne( r, giststate, v, itup[ toMove-1 ], toMove, attno+1 );
|
||||
/* redo gistunionsubkey(): it will not degradate performance,
|
||||
* because it's very rarely */
|
||||
Assert(toMove < entryvec->n);
|
||||
|
||||
placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
|
||||
|
||||
/*
|
||||
* redo gistunionsubkey(): it will not degradate
|
||||
* performance, because it's very rarely
|
||||
*/
|
||||
v->spl_equiv = NULL;
|
||||
gistunionsubkey(giststate, itup, v, attno + 1);
|
||||
|
||||
return false;
|
||||
} else if ( LenEquiv > 1 )
|
||||
}
|
||||
else if (LenEquiv > 1)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -352,60 +399,65 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
|
||||
}
|
||||
|
||||
/*
|
||||
* simple split page
|
||||
* simple split page
|
||||
*/
|
||||
static void
|
||||
gistSplitHalf(GIST_SPLITVEC *v, int len) {
|
||||
int i;
|
||||
gistSplitHalf(GIST_SPLITVEC *v, int len)
|
||||
{
|
||||
int i;
|
||||
|
||||
v->spl_nright = v->spl_nleft = 0;
|
||||
v->spl_nright = v->spl_nleft = 0;
|
||||
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
|
||||
v->spl_right= (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
|
||||
for(i = 1; i <= len; i++)
|
||||
if ( i<len/2 )
|
||||
v->spl_right[ v->spl_nright++ ] = i;
|
||||
v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
|
||||
for (i = 1; i <= len; i++)
|
||||
if (i < len / 2)
|
||||
v->spl_right[v->spl_nright++] = i;
|
||||
else
|
||||
v->spl_left[ v->spl_nleft++ ] = i;
|
||||
v->spl_left[v->spl_nleft++] = i;
|
||||
}
|
||||
|
||||
/*
|
||||
* if it was invalid tuple then we need special processing.
|
||||
* We move all invalid tuples on right page.
|
||||
* We move all invalid tuples on right page.
|
||||
*
|
||||
* if there is no place on left page, gistSplit will be called one more
|
||||
* if there is no place on left page, gistSplit will be called one more
|
||||
* time for left page.
|
||||
*
|
||||
* Normally, we never exec this code, but after crash replay it's possible
|
||||
* to get 'invalid' tuples (probability is low enough)
|
||||
*/
|
||||
static void
|
||||
gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, int len) {
|
||||
int i;
|
||||
static OffsetNumber offInvTuples[ MaxOffsetNumber ];
|
||||
int nOffInvTuples = 0;
|
||||
gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, int len)
|
||||
{
|
||||
int i;
|
||||
static OffsetNumber offInvTuples[MaxOffsetNumber];
|
||||
int nOffInvTuples = 0;
|
||||
|
||||
for (i = 1; i <= len; i++)
|
||||
if ( GistTupleIsInvalid(itup[i - 1]) )
|
||||
offInvTuples[ nOffInvTuples++ ] = i;
|
||||
if (GistTupleIsInvalid(itup[i - 1]))
|
||||
offInvTuples[nOffInvTuples++] = i;
|
||||
|
||||
if ( nOffInvTuples == len ) {
|
||||
if (nOffInvTuples == len)
|
||||
{
|
||||
/* corner case, all tuples are invalid */
|
||||
v->spl_rightvalid= v->spl_leftvalid = false;
|
||||
gistSplitHalf( &v->splitVector, len );
|
||||
} else {
|
||||
GistSplitUnion gsvp;
|
||||
|
||||
v->spl_rightvalid = v->spl_leftvalid = false;
|
||||
gistSplitHalf(&v->splitVector, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
GistSplitUnion gsvp;
|
||||
|
||||
v->splitVector.spl_right = offInvTuples;
|
||||
v->splitVector.spl_nright = nOffInvTuples;
|
||||
v->spl_rightvalid = false;
|
||||
|
||||
v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
|
||||
v->splitVector.spl_nleft = 0;
|
||||
for(i = 1; i <= len; i++)
|
||||
if ( !GistTupleIsInvalid(itup[i - 1]) )
|
||||
v->splitVector.spl_left[ v->splitVector.spl_nleft++ ] = i;
|
||||
for (i = 1; i <= len; i++)
|
||||
if (!GistTupleIsInvalid(itup[i - 1]))
|
||||
v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
|
||||
v->spl_leftvalid = true;
|
||||
|
||||
|
||||
gsvp.equiv = NULL;
|
||||
gsvp.attr = v->spl_lattr;
|
||||
gsvp.len = v->splitVector.spl_nleft;
|
||||
@@ -418,52 +470,58 @@ gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, i
|
||||
|
||||
/*
|
||||
* trys to split page by attno key, in a case of null
|
||||
* values move its to separate page.
|
||||
* values move its to separate page.
|
||||
*/
|
||||
void
|
||||
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
|
||||
GistSplitVector *v, GistEntryVector *entryvec, int attno) {
|
||||
int i;
|
||||
static OffsetNumber offNullTuples[ MaxOffsetNumber ];
|
||||
int nOffNullTuples = 0;
|
||||
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
|
||||
GistSplitVector *v, GistEntryVector *entryvec, int attno)
|
||||
{
|
||||
int i;
|
||||
static OffsetNumber offNullTuples[MaxOffsetNumber];
|
||||
int nOffNullTuples = 0;
|
||||
|
||||
for (i = 1; i <= len; i++) {
|
||||
Datum datum;
|
||||
bool IsNull;
|
||||
for (i = 1; i <= len; i++)
|
||||
{
|
||||
Datum datum;
|
||||
bool IsNull;
|
||||
|
||||
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1])) {
|
||||
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1]))
|
||||
{
|
||||
gistSplitByInvalid(giststate, v, itup, len);
|
||||
return;
|
||||
}
|
||||
|
||||
datum = index_getattr(itup[i - 1], attno+1, giststate->tupdesc, &IsNull);
|
||||
datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc, &IsNull);
|
||||
gistdentryinit(giststate, attno, &(entryvec->vector[i]),
|
||||
datum, r, page, i,
|
||||
FALSE, IsNull);
|
||||
if ( IsNull )
|
||||
offNullTuples[ nOffNullTuples++ ] = i;
|
||||
if (IsNull)
|
||||
offNullTuples[nOffNullTuples++] = i;
|
||||
}
|
||||
|
||||
v->spl_leftvalid = v->spl_rightvalid = true;
|
||||
|
||||
if ( nOffNullTuples == len ) {
|
||||
/*
|
||||
if (nOffNullTuples == len)
|
||||
{
|
||||
/*
|
||||
* Corner case: All keys in attno column are null, we should try to
|
||||
* split by keys in next column. It all keys in all columns
|
||||
* are NULL just split page half by half
|
||||
* split by keys in next column. It all keys in all columns are NULL
|
||||
* just split page half by half
|
||||
*/
|
||||
v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
|
||||
|
||||
if ( attno+1 == r->rd_att->natts )
|
||||
gistSplitHalf( &v->splitVector, len );
|
||||
else
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
|
||||
} else if ( nOffNullTuples > 0 ) {
|
||||
int j=0;
|
||||
|
||||
/*
|
||||
* We don't want to mix NULLs and not-NULLs keys
|
||||
* on one page, so move nulls to right page
|
||||
if (attno + 1 == r->rd_att->natts)
|
||||
gistSplitHalf(&v->splitVector, len);
|
||||
else
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
|
||||
}
|
||||
else if (nOffNullTuples > 0)
|
||||
{
|
||||
int j = 0;
|
||||
|
||||
/*
|
||||
* We don't want to mix NULLs and not-NULLs keys on one page, so move
|
||||
* nulls to right page
|
||||
*/
|
||||
v->splitVector.spl_right = offNullTuples;
|
||||
v->splitVector.spl_nright = nOffNullTuples;
|
||||
@@ -471,61 +529,71 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
|
||||
|
||||
v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
|
||||
v->splitVector.spl_nleft = 0;
|
||||
for(i = 1; i <= len; i++)
|
||||
if ( j<v->splitVector.spl_nright && offNullTuples[j] == i )
|
||||
for (i = 1; i <= len; i++)
|
||||
if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
|
||||
j++;
|
||||
else
|
||||
v->splitVector.spl_left[ v->splitVector.spl_nleft++ ] = i;
|
||||
v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
|
||||
|
||||
v->spl_equiv = NULL;
|
||||
gistunionsubkey(giststate, itup, v, attno);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* all keys are not-null
|
||||
*/
|
||||
entryvec->n = len+1;
|
||||
entryvec->n = len + 1;
|
||||
|
||||
if ( gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno+1 != r->rd_att->natts ) {
|
||||
if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno + 1 != r->rd_att->natts)
|
||||
{
|
||||
/*
|
||||
* Splitting on attno column is not optimized: there is a tuples which can be freely
|
||||
* left or right page, we will try to split page by
|
||||
* following columns
|
||||
* Splitting on attno column is not optimized: there is a tuples
|
||||
* which can be freely left or right page, we will try to split
|
||||
* page by following columns
|
||||
*/
|
||||
if ( v->spl_equiv == NULL ) {
|
||||
/* simple case: left and right keys for attno column are equial */
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
|
||||
} else {
|
||||
if (v->spl_equiv == NULL)
|
||||
{
|
||||
/*
|
||||
* simple case: left and right keys for attno column are
|
||||
* equial
|
||||
*/
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we should clean up vector from already distributed tuples */
|
||||
IndexTuple *newitup = (IndexTuple*)palloc((len + 1) * sizeof(IndexTuple));
|
||||
OffsetNumber *map = (OffsetNumber*)palloc((len + 1) * sizeof(IndexTuple));
|
||||
int newlen = 0;
|
||||
IndexTuple *newitup = (IndexTuple *) palloc((len + 1) * sizeof(IndexTuple));
|
||||
OffsetNumber *map = (OffsetNumber *) palloc((len + 1) * sizeof(IndexTuple));
|
||||
int newlen = 0;
|
||||
GIST_SPLITVEC backupSplit = v->splitVector;
|
||||
|
||||
for(i=0; i<len; i++)
|
||||
if ( v->spl_equiv[i+1] ) {
|
||||
map[ newlen ] = i+1;
|
||||
newitup[ newlen++ ] = itup[i];
|
||||
for (i = 0; i < len; i++)
|
||||
if (v->spl_equiv[i + 1])
|
||||
{
|
||||
map[newlen] = i + 1;
|
||||
newitup[newlen++] = itup[i];
|
||||
}
|
||||
|
||||
Assert( newlen>0 );
|
||||
Assert(newlen > 0);
|
||||
|
||||
backupSplit.spl_left = (OffsetNumber*)palloc(sizeof(OffsetNumber)*len);
|
||||
memcpy( backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber)*v->splitVector.spl_nleft);
|
||||
backupSplit.spl_right = (OffsetNumber*)palloc(sizeof(OffsetNumber)*len);
|
||||
memcpy( backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber)*v->splitVector.spl_nright);
|
||||
backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
|
||||
memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
|
||||
backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
|
||||
memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
|
||||
|
||||
gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno+1);
|
||||
gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno + 1);
|
||||
|
||||
/* merge result of subsplit */
|
||||
for(i=0;i<v->splitVector.spl_nleft;i++)
|
||||
backupSplit.spl_left[ backupSplit.spl_nleft++ ] = map[ v->splitVector.spl_left[i]-1 ];
|
||||
for(i=0;i<v->splitVector.spl_nright;i++)
|
||||
backupSplit.spl_right[ backupSplit.spl_nright++ ] = map[ v->splitVector.spl_right[i]-1 ];
|
||||
for (i = 0; i < v->splitVector.spl_nleft; i++)
|
||||
backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
|
||||
for (i = 0; i < v->splitVector.spl_nright; i++)
|
||||
backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
|
||||
|
||||
v->splitVector = backupSplit;
|
||||
/* reunion left and right datums */
|
||||
gistunionsubkey(giststate, itup, v, attno);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.19 2006/07/14 14:52:16 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistutil.c,v 1.20 2006/10/04 00:29:48 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
@@ -22,8 +22,8 @@
|
||||
* static *S used for temrorary storage (saves stack and palloc() call)
|
||||
*/
|
||||
|
||||
static Datum attrS[INDEX_MAX_KEYS];
|
||||
static bool isnullS[INDEX_MAX_KEYS];
|
||||
static Datum attrS[INDEX_MAX_KEYS];
|
||||
static bool isnullS[INDEX_MAX_KEYS];
|
||||
|
||||
/*
|
||||
* Write itup vector to page, has no control of free space
|
||||
@@ -57,14 +57,17 @@ gistfillbuffer(Relation r, Page page, IndexTuple *itup,
|
||||
bool
|
||||
gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
|
||||
{
|
||||
unsigned int size = freespace, deleted = 0;
|
||||
unsigned int size = freespace,
|
||||
deleted = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
|
||||
|
||||
if ( todelete != InvalidOffsetNumber ) {
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
|
||||
if (todelete != InvalidOffsetNumber)
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
|
||||
|
||||
deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
|
||||
}
|
||||
|
||||
@@ -72,11 +75,12 @@ gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size f
|
||||
}
|
||||
|
||||
bool
|
||||
gistfitpage(IndexTuple *itvec, int len) {
|
||||
int i;
|
||||
Size size=0;
|
||||
gistfitpage(IndexTuple *itvec, int len)
|
||||
{
|
||||
int i;
|
||||
Size size = 0;
|
||||
|
||||
for(i=0;i<len;i++)
|
||||
for (i = 0; i < len; i++)
|
||||
size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
|
||||
|
||||
/* TODO: Consider fillfactor */
|
||||
@@ -119,56 +123,64 @@ gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
|
||||
*/
|
||||
|
||||
IndexTupleData *
|
||||
gistfillitupvec(IndexTuple *vec, int veclen, int *memlen) {
|
||||
char *ptr, *ret;
|
||||
int i;
|
||||
gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
|
||||
{
|
||||
char *ptr,
|
||||
*ret;
|
||||
int i;
|
||||
|
||||
*memlen = 0;
|
||||
|
||||
*memlen=0;
|
||||
|
||||
for (i = 0; i < veclen; i++)
|
||||
*memlen += IndexTupleSize(vec[i]);
|
||||
|
||||
ptr = ret = palloc(*memlen);
|
||||
|
||||
for (i = 0; i < veclen; i++) {
|
||||
for (i = 0; i < veclen; i++)
|
||||
{
|
||||
memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
|
||||
ptr += IndexTupleSize(vec[i]);
|
||||
}
|
||||
|
||||
return (IndexTupleData*)ret;
|
||||
return (IndexTupleData *) ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make unions of keys in IndexTuple vector, return FALSE if itvec contains
|
||||
* Make unions of keys in IndexTuple vector, return FALSE if itvec contains
|
||||
* invalid tuple. Resulting Datums aren't compressed.
|
||||
*/
|
||||
|
||||
bool
|
||||
gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
|
||||
Datum *attr, bool *isnull ) {
|
||||
bool
|
||||
gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
|
||||
Datum *attr, bool *isnull)
|
||||
{
|
||||
int i;
|
||||
GistEntryVector *evec;
|
||||
int attrsize;
|
||||
int attrsize;
|
||||
|
||||
evec = (GistEntryVector *) palloc( ( len + 2 ) * sizeof(GISTENTRY) + GEVHDRSZ);
|
||||
evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
|
||||
|
||||
for (i = startkey; i < giststate->tupdesc->natts; i++) {
|
||||
int j;
|
||||
for (i = startkey; i < giststate->tupdesc->natts; i++)
|
||||
{
|
||||
int j;
|
||||
|
||||
evec->n = 0;
|
||||
if ( !isnull[i] ) {
|
||||
gistentryinit( evec->vector[evec->n], attr[i],
|
||||
NULL, NULL, (OffsetNumber) 0,
|
||||
FALSE);
|
||||
if (!isnull[i])
|
||||
{
|
||||
gistentryinit(evec->vector[evec->n], attr[i],
|
||||
NULL, NULL, (OffsetNumber) 0,
|
||||
FALSE);
|
||||
evec->n++;
|
||||
}
|
||||
|
||||
for (j = 0; j < len; j++) {
|
||||
Datum datum;
|
||||
bool IsNull;
|
||||
for (j = 0; j < len; j++)
|
||||
{
|
||||
Datum datum;
|
||||
bool IsNull;
|
||||
|
||||
if (GistTupleIsInvalid(itvec[j]))
|
||||
return FALSE; /* signals that union with invalid tuple => result is invalid */
|
||||
if (GistTupleIsInvalid(itvec[j]))
|
||||
return FALSE; /* signals that union with invalid tuple =>
|
||||
* result is invalid */
|
||||
|
||||
datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
|
||||
if (IsNull)
|
||||
@@ -183,19 +195,23 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke
|
||||
}
|
||||
|
||||
/* If this tuple vector was all NULLs, the union is NULL */
|
||||
if ( evec->n == 0 ) {
|
||||
if (evec->n == 0)
|
||||
{
|
||||
attr[i] = (Datum) 0;
|
||||
isnull[i] = TRUE;
|
||||
} else {
|
||||
if (evec->n == 1) {
|
||||
}
|
||||
else
|
||||
{
|
||||
if (evec->n == 1)
|
||||
{
|
||||
evec->n = 2;
|
||||
evec->vector[1] = evec->vector[0];
|
||||
}
|
||||
}
|
||||
|
||||
/* Make union and store in attr array */
|
||||
attr[i] = FunctionCall2(&giststate->unionFn[i],
|
||||
PointerGetDatum(evec),
|
||||
PointerGetDatum(&attrsize));
|
||||
PointerGetDatum(evec),
|
||||
PointerGetDatum(&attrsize));
|
||||
|
||||
isnull[i] = FALSE;
|
||||
}
|
||||
@@ -213,57 +229,67 @@ gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
|
||||
{
|
||||
memset(isnullS, TRUE, sizeof(bool) * giststate->tupdesc->natts);
|
||||
|
||||
if ( !gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS ) )
|
||||
return gist_form_invalid_tuple(InvalidBlockNumber);
|
||||
if (!gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS))
|
||||
return gist_form_invalid_tuple(InvalidBlockNumber);
|
||||
|
||||
return gistFormTuple(giststate, r, attrS, isnullS, false);
|
||||
return gistFormTuple(giststate, r, attrS, isnullS, false);
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* makes union of two key
|
||||
*/
|
||||
void
|
||||
gistMakeUnionKey( GISTSTATE *giststate, int attno,
|
||||
GISTENTRY *entry1, bool isnull1,
|
||||
GISTENTRY *entry2, bool isnull2,
|
||||
Datum *dst, bool *dstisnull ) {
|
||||
gistMakeUnionKey(GISTSTATE *giststate, int attno,
|
||||
GISTENTRY *entry1, bool isnull1,
|
||||
GISTENTRY *entry2, bool isnull2,
|
||||
Datum *dst, bool *dstisnull)
|
||||
{
|
||||
|
||||
int dstsize;
|
||||
int dstsize;
|
||||
|
||||
static char storage[ 2 * sizeof(GISTENTRY) + GEVHDRSZ ];
|
||||
GistEntryVector *evec = (GistEntryVector*)storage;
|
||||
static char storage[2 * sizeof(GISTENTRY) + GEVHDRSZ];
|
||||
GistEntryVector *evec = (GistEntryVector *) storage;
|
||||
|
||||
evec->n = 2;
|
||||
|
||||
if ( isnull1 && isnull2 ) {
|
||||
if (isnull1 && isnull2)
|
||||
{
|
||||
*dstisnull = TRUE;
|
||||
*dst = (Datum)0;
|
||||
} else {
|
||||
if ( isnull1 == FALSE && isnull2 == FALSE ) {
|
||||
*dst = (Datum) 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isnull1 == FALSE && isnull2 == FALSE)
|
||||
{
|
||||
evec->vector[0] = *entry1;
|
||||
evec->vector[1] = *entry2;
|
||||
} else if ( isnull1 == FALSE ) {
|
||||
}
|
||||
else if (isnull1 == FALSE)
|
||||
{
|
||||
evec->vector[0] = *entry1;
|
||||
evec->vector[1] = *entry1;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
evec->vector[0] = *entry2;
|
||||
evec->vector[1] = *entry2;
|
||||
}
|
||||
|
||||
*dstisnull = FALSE;
|
||||
*dst = FunctionCall2(&giststate->unionFn[attno],
|
||||
PointerGetDatum(evec),
|
||||
PointerGetDatum(&dstsize));
|
||||
PointerGetDatum(evec),
|
||||
PointerGetDatum(&dstsize));
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b) {
|
||||
bool result;
|
||||
gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
|
||||
{
|
||||
bool result;
|
||||
|
||||
FunctionCall3(&giststate->equalFn[attno],
|
||||
a, b,
|
||||
PointerGetDatum(&result));
|
||||
a, b,
|
||||
PointerGetDatum(&result));
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -309,22 +335,24 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
|
||||
gistDeCompressAtt(giststate, r, addtup, NULL,
|
||||
(OffsetNumber) 0, addentries, addisnull);
|
||||
|
||||
for(i = 0; i < r->rd_att->natts; i++) {
|
||||
gistMakeUnionKey( giststate, i,
|
||||
oldentries + i, oldisnull[i],
|
||||
addentries + i, addisnull[i],
|
||||
attrS + i, isnullS + i );
|
||||
for (i = 0; i < r->rd_att->natts; i++)
|
||||
{
|
||||
gistMakeUnionKey(giststate, i,
|
||||
oldentries + i, oldisnull[i],
|
||||
addentries + i, addisnull[i],
|
||||
attrS + i, isnullS + i);
|
||||
|
||||
if ( neednew )
|
||||
if (neednew)
|
||||
/* we already need new key, so we can skip check */
|
||||
continue;
|
||||
|
||||
if ( isnullS[i] )
|
||||
if (isnullS[i])
|
||||
/* union of key may be NULL if and only if both keys are NULL */
|
||||
continue;
|
||||
|
||||
if ( !addisnull[i] ) {
|
||||
if ( oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i])==false )
|
||||
if (!addisnull[i])
|
||||
{
|
||||
if (oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i]) == false)
|
||||
neednew = true;
|
||||
}
|
||||
}
|
||||
@@ -363,8 +391,8 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
|
||||
it, NULL, (OffsetNumber) 0,
|
||||
identry, isnull);
|
||||
|
||||
Assert( maxoff >= FirstOffsetNumber );
|
||||
Assert( !GistPageIsLeaf(p) );
|
||||
Assert(maxoff >= FirstOffsetNumber);
|
||||
Assert(!GistPageIsLeaf(p));
|
||||
|
||||
for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
|
||||
{
|
||||
@@ -484,7 +512,7 @@ gistFormTuple(GISTSTATE *giststate, Relation r,
|
||||
{
|
||||
gistcentryinit(giststate, i, ¢ry[i], attdata[i],
|
||||
r, NULL, (OffsetNumber) 0,
|
||||
newValues,
|
||||
newValues,
|
||||
FALSE);
|
||||
compatt[i] = centry[i].key;
|
||||
}
|
||||
@@ -500,18 +528,19 @@ gistpenalty(GISTSTATE *giststate, int attno,
|
||||
GISTENTRY *orig, bool isNullOrig,
|
||||
GISTENTRY *add, bool isNullAdd)
|
||||
{
|
||||
float penalty = 0.0;
|
||||
float penalty = 0.0;
|
||||
|
||||
if ( giststate->penaltyFn[attno].fn_strict==FALSE || ( isNullOrig == FALSE && isNullAdd == FALSE ) )
|
||||
if (giststate->penaltyFn[attno].fn_strict == FALSE || (isNullOrig == FALSE && isNullAdd == FALSE))
|
||||
FunctionCall3(&giststate->penaltyFn[attno],
|
||||
PointerGetDatum(orig),
|
||||
PointerGetDatum(add),
|
||||
PointerGetDatum(&penalty));
|
||||
else if ( isNullOrig && isNullAdd )
|
||||
else if (isNullOrig && isNullAdd)
|
||||
penalty = 0.0;
|
||||
else
|
||||
penalty = 1e10; /* try to prevent to mix null and non-null value */
|
||||
|
||||
penalty = 1e10; /* try to prevent to mix null and non-null
|
||||
* value */
|
||||
|
||||
return penalty;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.27 2006/09/21 20:31:21 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.28 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -45,19 +45,24 @@ typedef struct
|
||||
} ArrayTuple;
|
||||
|
||||
/*
|
||||
* Make union of keys on page
|
||||
* Make union of keys on page
|
||||
*/
|
||||
static IndexTuple
|
||||
PageMakeUnionKey(GistVacuum *gv, Buffer buffer) {
|
||||
Page page = BufferGetPage( buffer );
|
||||
PageMakeUnionKey(GistVacuum *gv, Buffer buffer)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
IndexTuple *vec,
|
||||
tmp, res;
|
||||
tmp,
|
||||
res;
|
||||
int veclen = 0;
|
||||
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
|
||||
|
||||
vec = gistextractpage(page, &veclen);
|
||||
/* we call gistunion() in temprorary context because user-defined functions called in gistunion()
|
||||
may do not free all memory */
|
||||
|
||||
/*
|
||||
* we call gistunion() in temprorary context because user-defined
|
||||
* functions called in gistunion() may do not free all memory
|
||||
*/
|
||||
tmp = gistunion(gv->index, vec, veclen, &(gv->giststate));
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
@@ -73,21 +78,25 @@ PageMakeUnionKey(GistVacuum *gv, Buffer buffer) {
|
||||
}
|
||||
|
||||
static void
|
||||
gistDeleteSubtree( GistVacuum *gv, BlockNumber blkno ) {
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
gistDeleteSubtree(GistVacuum *gv, BlockNumber blkno)
|
||||
{
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
buffer = ReadBuffer(gv->index, blkno);
|
||||
LockBuffer(buffer, GIST_EXCLUSIVE);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if ( !GistPageIsLeaf(page) ) {
|
||||
int i;
|
||||
if (!GistPageIsLeaf(page))
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i = OffsetNumberNext(i)) {
|
||||
ItemId iid = PageGetItemId(page, i);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
gistDeleteSubtree(gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)));
|
||||
for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i = OffsetNumberNext(i))
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, i);
|
||||
IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
|
||||
|
||||
gistDeleteSubtree(gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,7 +112,7 @@ gistDeleteSubtree( GistVacuum *gv, BlockNumber blkno ) {
|
||||
{
|
||||
XLogRecData rdata[2];
|
||||
XLogRecPtr recptr;
|
||||
gistxlogPageDelete xlrec;
|
||||
gistxlogPageDelete xlrec;
|
||||
|
||||
xlrec.node = gv->index->rd_node;
|
||||
xlrec.blkno = blkno;
|
||||
@@ -125,31 +134,34 @@ gistDeleteSubtree( GistVacuum *gv, BlockNumber blkno ) {
|
||||
}
|
||||
else
|
||||
PageSetLSN(page, XLogRecPtrForTemp);
|
||||
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
static Page
|
||||
GistPageGetCopyPage( Page page ) {
|
||||
Size pageSize = PageGetPageSize( page );
|
||||
Page tmppage;
|
||||
static Page
|
||||
GistPageGetCopyPage(Page page)
|
||||
{
|
||||
Size pageSize = PageGetPageSize(page);
|
||||
Page tmppage;
|
||||
|
||||
tmppage=(Page)palloc( pageSize );
|
||||
memcpy( tmppage, page, pageSize );
|
||||
tmppage = (Page) palloc(pageSize);
|
||||
memcpy(tmppage, page, pageSize);
|
||||
|
||||
return tmppage;
|
||||
}
|
||||
|
||||
static ArrayTuple
|
||||
vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon, int curlenaddon) {
|
||||
vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon, int curlenaddon)
|
||||
{
|
||||
ArrayTuple res = {NULL, 0, false};
|
||||
IndexTuple *vec;
|
||||
SplitedPageLayout *dist = NULL,
|
||||
*ptr;
|
||||
int i, veclen=0;
|
||||
BlockNumber blkno = BufferGetBlockNumber(buffer);
|
||||
*ptr;
|
||||
int i,
|
||||
veclen = 0;
|
||||
BlockNumber blkno = BufferGetBlockNumber(buffer);
|
||||
MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
|
||||
|
||||
vec = gistextractpage(tempPage, &veclen);
|
||||
@@ -158,67 +170,73 @@ vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon,
|
||||
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
if (blkno != GIST_ROOT_BLKNO) {
|
||||
if (blkno != GIST_ROOT_BLKNO)
|
||||
{
|
||||
/* if non-root split then we should not allocate new buffer */
|
||||
dist->buffer = buffer;
|
||||
dist->page = tempPage;
|
||||
/* during vacuum we never split leaf page */
|
||||
GistPageGetOpaque(dist->page)->flags = 0;
|
||||
} else
|
||||
}
|
||||
else
|
||||
pfree(tempPage);
|
||||
|
||||
res.itup = (IndexTuple *) palloc(sizeof(IndexTuple) * veclen);
|
||||
res.ituplen = 0;
|
||||
|
||||
/* make new pages and fills them */
|
||||
for (ptr = dist; ptr; ptr = ptr->next) {
|
||||
char *data;
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
{
|
||||
char *data;
|
||||
|
||||
if ( ptr->buffer == InvalidBuffer ) {
|
||||
ptr->buffer = gistNewBuffer( gv->index );
|
||||
GISTInitBuffer( ptr->buffer, 0 );
|
||||
if (ptr->buffer == InvalidBuffer)
|
||||
{
|
||||
ptr->buffer = gistNewBuffer(gv->index);
|
||||
GISTInitBuffer(ptr->buffer, 0);
|
||||
ptr->page = BufferGetPage(ptr->buffer);
|
||||
}
|
||||
ptr->block.blkno = BufferGetBlockNumber( ptr->buffer );
|
||||
ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
|
||||
|
||||
data = (char*)(ptr->list);
|
||||
for(i=0;i<ptr->block.num;i++) {
|
||||
if ( PageAddItem(ptr->page, (Item)data, IndexTupleSize((IndexTuple)data), i+FirstOffsetNumber, LP_USED) == InvalidOffsetNumber )
|
||||
data = (char *) (ptr->list);
|
||||
for (i = 0; i < ptr->block.num; i++)
|
||||
{
|
||||
if (PageAddItem(ptr->page, (Item) data, IndexTupleSize((IndexTuple) data), i + FirstOffsetNumber, LP_USED) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(gv->index));
|
||||
data += IndexTupleSize((IndexTuple)data);
|
||||
data += IndexTupleSize((IndexTuple) data);
|
||||
}
|
||||
|
||||
ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
|
||||
res.itup[ res.ituplen ] = (IndexTuple)palloc(IndexTupleSize(ptr->itup));
|
||||
memcpy( res.itup[ res.ituplen ], ptr->itup, IndexTupleSize(ptr->itup) );
|
||||
res.itup[res.ituplen] = (IndexTuple) palloc(IndexTupleSize(ptr->itup));
|
||||
memcpy(res.itup[res.ituplen], ptr->itup, IndexTupleSize(ptr->itup));
|
||||
res.ituplen++;
|
||||
}
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
for (ptr = dist; ptr; ptr = ptr->next) {
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
{
|
||||
MarkBufferDirty(ptr->buffer);
|
||||
GistPageGetOpaque(ptr->page)->rightlink = InvalidBlockNumber;
|
||||
}
|
||||
|
||||
/* restore splitted non-root page */
|
||||
if (blkno != GIST_ROOT_BLKNO) {
|
||||
PageRestoreTempPage( dist->page, BufferGetPage( dist->buffer ) );
|
||||
dist->page = BufferGetPage( dist->buffer );
|
||||
if (blkno != GIST_ROOT_BLKNO)
|
||||
{
|
||||
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
|
||||
dist->page = BufferGetPage(dist->buffer);
|
||||
}
|
||||
|
||||
if (!gv->index->rd_istemp)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData *rdata;
|
||||
ItemPointerData key; /* set key for incomplete
|
||||
* insert */
|
||||
ItemPointerData key; /* set key for incomplete insert */
|
||||
char *xlinfo;
|
||||
|
||||
ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
|
||||
|
||||
rdata = formSplitRdata(gv->index->rd_node, blkno,
|
||||
false, &key, dist);
|
||||
false, &key, dist);
|
||||
xlinfo = rdata->data;
|
||||
|
||||
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
|
||||
@@ -241,13 +259,12 @@ vacuumSplitPage(GistVacuum *gv, Page tempPage, Buffer buffer, IndexTuple *addon,
|
||||
{
|
||||
/* we must keep the buffer pin on the head page */
|
||||
if (BufferGetBlockNumber(ptr->buffer) != blkno)
|
||||
UnlockReleaseBuffer( ptr->buffer );
|
||||
UnlockReleaseBuffer(ptr->buffer);
|
||||
}
|
||||
|
||||
if (blkno == GIST_ROOT_BLKNO)
|
||||
{
|
||||
ItemPointerData key; /* set key for incomplete
|
||||
* insert */
|
||||
ItemPointerData key; /* set key for incomplete insert */
|
||||
|
||||
ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
|
||||
|
||||
@@ -266,7 +283,8 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
|
||||
{
|
||||
ArrayTuple res = {NULL, 0, false};
|
||||
Buffer buffer;
|
||||
Page page, tempPage = NULL;
|
||||
Page page,
|
||||
tempPage = NULL;
|
||||
OffsetNumber i,
|
||||
maxoff;
|
||||
ItemId iid;
|
||||
@@ -278,7 +296,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
|
||||
*addon = NULL;
|
||||
bool needwrite = false;
|
||||
OffsetNumber offToDelete[MaxOffsetNumber];
|
||||
BlockNumber blkToDelete[MaxOffsetNumber];
|
||||
BlockNumber blkToDelete[MaxOffsetNumber];
|
||||
ItemPointerData *completed = NULL;
|
||||
int ncompleted = 0,
|
||||
lencompleted = 16;
|
||||
@@ -322,7 +340,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
|
||||
if (chldtuple.ituplen || chldtuple.emptypage)
|
||||
{
|
||||
/* update tuple or/and inserts new */
|
||||
if ( chldtuple.emptypage )
|
||||
if (chldtuple.emptypage)
|
||||
blkToDelete[nBlkToDelete++] = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
|
||||
offToDelete[nOffToDelete++] = i;
|
||||
PageIndexTupleDelete(tempPage, i);
|
||||
@@ -333,7 +351,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
|
||||
if (chldtuple.ituplen)
|
||||
{
|
||||
|
||||
Assert( chldtuple.emptypage == false );
|
||||
Assert(chldtuple.emptypage == false);
|
||||
while (curlenaddon + chldtuple.ituplen >= lenaddon)
|
||||
{
|
||||
lenaddon *= 2;
|
||||
@@ -367,56 +385,63 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Assert( maxoff == PageGetMaxOffsetNumber(tempPage) );
|
||||
|
||||
Assert(maxoff == PageGetMaxOffsetNumber(tempPage));
|
||||
|
||||
if (curlenaddon)
|
||||
{
|
||||
/* insert updated tuples */
|
||||
if (gistnospace(tempPage, addon, curlenaddon, InvalidOffsetNumber, 0)) {
|
||||
if (gistnospace(tempPage, addon, curlenaddon, InvalidOffsetNumber, 0))
|
||||
{
|
||||
/* there is no space on page to insert tuples */
|
||||
res = vacuumSplitPage(gv, tempPage, buffer, addon, curlenaddon);
|
||||
tempPage=NULL; /* vacuumSplitPage() free tempPage */
|
||||
needwrite = needunion = false; /* gistSplit already forms unions and writes pages */
|
||||
} else
|
||||
tempPage = NULL; /* vacuumSplitPage() free tempPage */
|
||||
needwrite = needunion = false; /* gistSplit already forms
|
||||
* unions and writes pages */
|
||||
}
|
||||
else
|
||||
/* enough free space */
|
||||
gistfillbuffer(gv->index, tempPage, addon, curlenaddon, InvalidOffsetNumber);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If page is empty, we should remove pointer to it before
|
||||
* deleting page (except root)
|
||||
/*
|
||||
* If page is empty, we should remove pointer to it before deleting page
|
||||
* (except root)
|
||||
*/
|
||||
|
||||
if ( blkno != GIST_ROOT_BLKNO && ( PageIsEmpty(page) || (tempPage && PageIsEmpty(tempPage)) ) ) {
|
||||
if (blkno != GIST_ROOT_BLKNO && (PageIsEmpty(page) || (tempPage && PageIsEmpty(tempPage))))
|
||||
{
|
||||
/*
|
||||
* New version of page is empty, so leave it unchanged,
|
||||
* upper call will mark our page as deleted.
|
||||
* In case of page split we never will be here...
|
||||
* New version of page is empty, so leave it unchanged, upper call
|
||||
* will mark our page as deleted. In case of page split we never will
|
||||
* be here...
|
||||
*
|
||||
* If page was empty it can't become non-empty during processing
|
||||
* If page was empty it can't become non-empty during processing
|
||||
*/
|
||||
res.emptypage = true;
|
||||
UnlockReleaseBuffer(buffer);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
/* write page and remove its childs if it need */
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
if ( tempPage && needwrite ) {
|
||||
if (tempPage && needwrite)
|
||||
{
|
||||
PageRestoreTempPage(tempPage, page);
|
||||
tempPage = NULL;
|
||||
}
|
||||
|
||||
/* Empty index */
|
||||
if (PageIsEmpty(page) && blkno == GIST_ROOT_BLKNO )
|
||||
/* Empty index */
|
||||
if (PageIsEmpty(page) && blkno == GIST_ROOT_BLKNO)
|
||||
{
|
||||
needwrite = true;
|
||||
GistPageSetLeaf(page);
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (needwrite)
|
||||
{
|
||||
MarkBufferDirty(buffer);
|
||||
@@ -446,7 +471,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
if ( needunion && !PageIsEmpty(page) )
|
||||
if (needunion && !PageIsEmpty(page))
|
||||
{
|
||||
res.itup = (IndexTuple *) palloc(sizeof(IndexTuple));
|
||||
res.ituplen = 1;
|
||||
@@ -456,7 +481,7 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/* delete empty children, now we havn't any links to pointed subtrees */
|
||||
for(i=0;i<nBlkToDelete;i++)
|
||||
for (i = 0; i < nBlkToDelete; i++)
|
||||
gistDeleteSubtree(gv, blkToDelete[i]);
|
||||
|
||||
if (ncompleted && !gv->index->rd_istemp)
|
||||
@@ -506,9 +531,10 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
/* use heap's tuple count */
|
||||
Assert(info->num_heap_tuples >= 0);
|
||||
stats->std.num_index_tuples = info->num_heap_tuples;
|
||||
|
||||
/*
|
||||
* XXX the above is wrong if index is partial. Would it be OK to
|
||||
* just return NULL, or is there work we must do below?
|
||||
* XXX the above is wrong if index is partial. Would it be OK to just
|
||||
* return NULL, or is there work we must do below?
|
||||
*/
|
||||
}
|
||||
|
||||
@@ -545,8 +571,8 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
RelationGetRelationName(rel))));
|
||||
|
||||
/*
|
||||
* If vacuum full, we already have exclusive lock on the index.
|
||||
* Otherwise, need lock unless it's local to this backend.
|
||||
* If vacuum full, we already have exclusive lock on the index. Otherwise,
|
||||
* need lock unless it's local to this backend.
|
||||
*/
|
||||
if (info->vacuum_full)
|
||||
needLock = false;
|
||||
@@ -725,7 +751,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
|
||||
|
||||
if (callback(&(idxtuple->t_tid), callback_state))
|
||||
{
|
||||
todelete[ntodelete] = i-ntodelete;
|
||||
todelete[ntodelete] = i - ntodelete;
|
||||
ntodelete++;
|
||||
stats->std.tuples_removed += 1;
|
||||
}
|
||||
@@ -739,7 +765,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
|
||||
for(i=0;i<ntodelete;i++)
|
||||
for (i = 0; i < ntodelete; i++)
|
||||
PageIndexTupleDelete(page, todelete[i]);
|
||||
GistMarkTuplesDeleted(page);
|
||||
|
||||
@@ -750,7 +776,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
|
||||
gistxlogPageUpdate *xlinfo;
|
||||
|
||||
rdata = formUpdateRdata(rel->rd_node, buffer,
|
||||
todelete, ntodelete,
|
||||
todelete, ntodelete,
|
||||
NULL, 0,
|
||||
NULL);
|
||||
xlinfo = (gistxlogPageUpdate *) rdata->next->data;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.23 2006/08/07 16:57:56 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/gist/gistxlog.c,v 1.24 2006/10/04 00:29:48 momjian Exp $
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
@@ -55,11 +55,11 @@ typedef struct gistIncompleteInsert
|
||||
|
||||
|
||||
static MemoryContext opCtx; /* working memory for operations */
|
||||
static MemoryContext insertCtx; /* holds incomplete_inserts list */
|
||||
static MemoryContext insertCtx; /* holds incomplete_inserts list */
|
||||
static List *incomplete_inserts;
|
||||
|
||||
|
||||
#define ItemPointerEQ(a, b) \
|
||||
#define ItemPointerEQ(a, b) \
|
||||
( ItemPointerGetOffsetNumber(a) == ItemPointerGetOffsetNumber(b) && \
|
||||
ItemPointerGetBlockNumber (a) == ItemPointerGetBlockNumber(b) )
|
||||
|
||||
@@ -72,8 +72,9 @@ pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
|
||||
MemoryContext oldCxt;
|
||||
gistIncompleteInsert *ninsert;
|
||||
|
||||
if ( !ItemPointerIsValid(&key) )
|
||||
/*
|
||||
if (!ItemPointerIsValid(&key))
|
||||
|
||||
/*
|
||||
* if key is null then we should not store insertion as incomplete,
|
||||
* because it's a vacuum operation..
|
||||
*/
|
||||
@@ -108,8 +109,8 @@ pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key,
|
||||
|
||||
/*
|
||||
* Stick the new incomplete insert onto the front of the list, not the
|
||||
* back. This is so that gist_xlog_cleanup will process incompletions
|
||||
* in last-in-first-out order.
|
||||
* back. This is so that gist_xlog_cleanup will process incompletions in
|
||||
* last-in-first-out order.
|
||||
*/
|
||||
incomplete_inserts = lcons(ninsert, incomplete_inserts);
|
||||
|
||||
@@ -121,10 +122,10 @@ forgetIncompleteInsert(RelFileNode node, ItemPointerData key)
|
||||
{
|
||||
ListCell *l;
|
||||
|
||||
if ( !ItemPointerIsValid(&key) )
|
||||
if (!ItemPointerIsValid(&key))
|
||||
return;
|
||||
|
||||
if (incomplete_inserts==NIL)
|
||||
if (incomplete_inserts == NIL)
|
||||
return;
|
||||
|
||||
foreach(l, incomplete_inserts)
|
||||
@@ -241,9 +242,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record, bool isnewroot)
|
||||
if (GistPageIsLeaf(page) && xlrec.len == 0 && xlrec.data->ntodelete == 0)
|
||||
GistClearTuplesDeleted(page);
|
||||
|
||||
if ( !GistPageIsLeaf(page) && PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && xldata->blkno == GIST_ROOT_BLKNO )
|
||||
/* all links on non-leaf root page was deleted by vacuum full,
|
||||
so root page becomes a leaf */
|
||||
if (!GistPageIsLeaf(page) && PageGetMaxOffsetNumber(page) == InvalidOffsetNumber && xldata->blkno == GIST_ROOT_BLKNO)
|
||||
|
||||
/*
|
||||
* all links on non-leaf root page was deleted by vacuum full, so root
|
||||
* page becomes a leaf
|
||||
*/
|
||||
GistPageSetLeaf(page);
|
||||
|
||||
GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
|
||||
@@ -432,11 +436,11 @@ static void
|
||||
out_target(StringInfo buf, RelFileNode node, ItemPointerData key)
|
||||
{
|
||||
appendStringInfo(buf, "rel %u/%u/%u",
|
||||
node.spcNode, node.dbNode, node.relNode);
|
||||
if ( ItemPointerIsValid( &key ) )
|
||||
node.spcNode, node.dbNode, node.relNode);
|
||||
if (ItemPointerIsValid(&key))
|
||||
appendStringInfo(buf, "; tid %u/%u",
|
||||
ItemPointerGetBlockNumber(&key),
|
||||
ItemPointerGetOffsetNumber(&key));
|
||||
ItemPointerGetBlockNumber(&key),
|
||||
ItemPointerGetOffsetNumber(&key));
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -450,8 +454,8 @@ static void
|
||||
out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec)
|
||||
{
|
||||
appendStringInfo(buf, "page_delete: rel %u/%u/%u; blkno %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
|
||||
xlrec->blkno);
|
||||
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
|
||||
xlrec->blkno);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -460,7 +464,7 @@ out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
|
||||
appendStringInfo(buf, "page_split: ");
|
||||
out_target(buf, xlrec->node, xlrec->key);
|
||||
appendStringInfo(buf, "; block number %u splits to %d pages",
|
||||
xlrec->origblkno, xlrec->npage);
|
||||
xlrec->origblkno, xlrec->npage);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -486,15 +490,15 @@ gist_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
break;
|
||||
case XLOG_GIST_CREATE_INDEX:
|
||||
appendStringInfo(buf, "create_index: rel %u/%u/%u",
|
||||
((RelFileNode *) rec)->spcNode,
|
||||
((RelFileNode *) rec)->dbNode,
|
||||
((RelFileNode *) rec)->relNode);
|
||||
((RelFileNode *) rec)->spcNode,
|
||||
((RelFileNode *) rec)->dbNode,
|
||||
((RelFileNode *) rec)->relNode);
|
||||
break;
|
||||
case XLOG_GIST_INSERT_COMPLETE:
|
||||
appendStringInfo(buf, "complete_insert: rel %u/%u/%u",
|
||||
((gistxlogInsertComplete *) rec)->node.spcNode,
|
||||
((gistxlogInsertComplete *) rec)->node.dbNode,
|
||||
((gistxlogInsertComplete *) rec)->node.relNode);
|
||||
((gistxlogInsertComplete *) rec)->node.spcNode,
|
||||
((gistxlogInsertComplete *) rec)->node.dbNode,
|
||||
((gistxlogInsertComplete *) rec)->node.relNode);
|
||||
break;
|
||||
default:
|
||||
appendStringInfo(buf, "unknown gist op code %u", info);
|
||||
@@ -547,22 +551,25 @@ gistxlogFindPath(Relation index, gistIncompleteInsert *insert)
|
||||
elog(ERROR, "lost parent for block %u", insert->origblkno);
|
||||
}
|
||||
|
||||
static SplitedPageLayout*
|
||||
gistMakePageLayout(Buffer *buffers, int nbuffers) {
|
||||
SplitedPageLayout *res=NULL, *resptr;
|
||||
static SplitedPageLayout *
|
||||
gistMakePageLayout(Buffer *buffers, int nbuffers)
|
||||
{
|
||||
SplitedPageLayout *res = NULL,
|
||||
*resptr;
|
||||
|
||||
while( nbuffers-- > 0 ) {
|
||||
Page page = BufferGetPage( buffers[ nbuffers ] );
|
||||
IndexTuple* vec;
|
||||
int veclen;
|
||||
while (nbuffers-- > 0)
|
||||
{
|
||||
Page page = BufferGetPage(buffers[nbuffers]);
|
||||
IndexTuple *vec;
|
||||
int veclen;
|
||||
|
||||
resptr = (SplitedPageLayout*)palloc0( sizeof(SplitedPageLayout) );
|
||||
resptr = (SplitedPageLayout *) palloc0(sizeof(SplitedPageLayout));
|
||||
|
||||
resptr->block.blkno = BufferGetBlockNumber( buffers[ nbuffers ] );
|
||||
resptr->block.num = PageGetMaxOffsetNumber( page );
|
||||
resptr->block.blkno = BufferGetBlockNumber(buffers[nbuffers]);
|
||||
resptr->block.num = PageGetMaxOffsetNumber(page);
|
||||
|
||||
vec = gistextractpage( page, &veclen );
|
||||
resptr->list = gistfillitupvec( vec, veclen, &(resptr->lenlist) );
|
||||
vec = gistextractpage(page, &veclen);
|
||||
resptr->list = gistfillitupvec(vec, veclen, &(resptr->lenlist));
|
||||
|
||||
resptr->next = res;
|
||||
res = resptr;
|
||||
@@ -580,7 +587,7 @@ gistMakePageLayout(Buffer *buffers, int nbuffers) {
|
||||
* Note that we assume the index is now in a valid state, except for the
|
||||
* unfinished insertion. In particular it's safe to invoke gistFindPath();
|
||||
* there shouldn't be any garbage pages for it to run into.
|
||||
*
|
||||
*
|
||||
* To complete insert we can't use basic insertion algorithm because
|
||||
* during insertion we can't call user-defined support functions of opclass.
|
||||
* So, we insert 'invalid' tuples without real key and do it by separate algorithm.
|
||||
@@ -607,7 +614,7 @@ gistContinueInsert(gistIncompleteInsert *insert)
|
||||
itup[i] = gist_form_invalid_tuple(insert->blkno[i]);
|
||||
|
||||
/*
|
||||
* any insertion of itup[] should make LOG message about
|
||||
* any insertion of itup[] should make LOG message about
|
||||
*/
|
||||
|
||||
if (insert->origblkno == GIST_ROOT_BLKNO)
|
||||
@@ -626,7 +633,7 @@ gistContinueInsert(gistIncompleteInsert *insert)
|
||||
Buffer *buffers;
|
||||
Page *pages;
|
||||
int numbuffer;
|
||||
OffsetNumber *todelete;
|
||||
OffsetNumber *todelete;
|
||||
|
||||
/* construct path */
|
||||
gistxlogFindPath(index, insert);
|
||||
@@ -642,21 +649,22 @@ gistContinueInsert(gistIncompleteInsert *insert)
|
||||
int j,
|
||||
k,
|
||||
pituplen = 0;
|
||||
XLogRecData *rdata;
|
||||
XLogRecPtr recptr;
|
||||
Buffer tempbuffer = InvalidBuffer;
|
||||
int ntodelete = 0;
|
||||
XLogRecData *rdata;
|
||||
XLogRecPtr recptr;
|
||||
Buffer tempbuffer = InvalidBuffer;
|
||||
int ntodelete = 0;
|
||||
|
||||
numbuffer = 1;
|
||||
buffers[0] = ReadBuffer(index, insert->path[i]);
|
||||
LockBuffer(buffers[0], GIST_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* we check buffer, because we restored page earlier
|
||||
*/
|
||||
gistcheckpage(index, buffers[0]);
|
||||
|
||||
pages[0] = BufferGetPage(buffers[0]);
|
||||
Assert( !GistPageIsLeaf(pages[0]) );
|
||||
Assert(!GistPageIsLeaf(pages[0]));
|
||||
|
||||
pituplen = PageGetMaxOffsetNumber(pages[0]);
|
||||
|
||||
@@ -678,12 +686,12 @@ gistContinueInsert(gistIncompleteInsert *insert)
|
||||
}
|
||||
}
|
||||
|
||||
if ( ntodelete == 0 )
|
||||
elog(PANIC,"gistContinueInsert: can't find pointer to page(s)");
|
||||
if (ntodelete == 0)
|
||||
elog(PANIC, "gistContinueInsert: can't find pointer to page(s)");
|
||||
|
||||
/*
|
||||
* we check space with subtraction only first tuple to delete, hope,
|
||||
* that wiil be enough space....
|
||||
* we check space with subtraction only first tuple to delete,
|
||||
* hope, that wiil be enough space....
|
||||
*/
|
||||
|
||||
if (gistnospace(pages[0], itup, lenitup, *todelete, 0))
|
||||
@@ -699,7 +707,7 @@ gistContinueInsert(gistIncompleteInsert *insert)
|
||||
|
||||
if (BufferGetBlockNumber(buffers[0]) == GIST_ROOT_BLKNO)
|
||||
{
|
||||
Buffer tmp;
|
||||
Buffer tmp;
|
||||
|
||||
/*
|
||||
* we split root, just copy content from root to new page
|
||||
@@ -713,44 +721,48 @@ gistContinueInsert(gistIncompleteInsert *insert)
|
||||
/* fill new page, root will be changed later */
|
||||
tempbuffer = ReadBuffer(index, P_NEW);
|
||||
LockBuffer(tempbuffer, GIST_EXCLUSIVE);
|
||||
memcpy( BufferGetPage(tempbuffer), pages[0], BufferGetPageSize(tempbuffer) );
|
||||
memcpy(BufferGetPage(tempbuffer), pages[0], BufferGetPageSize(tempbuffer));
|
||||
|
||||
/* swap buffers[0] (was root) and temp buffer */
|
||||
tmp = buffers[0];
|
||||
buffers[0] = tempbuffer;
|
||||
tempbuffer = tmp; /* now in tempbuffer GIST_ROOT_BLKNO, it is still unchanged */
|
||||
tempbuffer = tmp; /* now in tempbuffer GIST_ROOT_BLKNO,
|
||||
* it is still unchanged */
|
||||
|
||||
pages[0] = BufferGetPage(buffers[0]);
|
||||
}
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
for(j=0;j<ntodelete;j++)
|
||||
for (j = 0; j < ntodelete; j++)
|
||||
PageIndexTupleDelete(pages[0], todelete[j]);
|
||||
|
||||
rdata = formSplitRdata(index->rd_node, insert->path[i],
|
||||
false, &(insert->key),
|
||||
gistMakePageLayout( buffers, numbuffer ) );
|
||||
false, &(insert->key),
|
||||
gistMakePageLayout(buffers, numbuffer));
|
||||
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
START_CRIT_SECTION();
|
||||
|
||||
for(j=0;j<ntodelete;j++)
|
||||
for (j = 0; j < ntodelete; j++)
|
||||
PageIndexTupleDelete(pages[0], todelete[j]);
|
||||
gistfillbuffer(index, pages[0], itup, lenitup, InvalidOffsetNumber);
|
||||
|
||||
rdata = formUpdateRdata(index->rd_node, buffers[0],
|
||||
todelete, ntodelete,
|
||||
itup, lenitup, &(insert->key));
|
||||
rdata = formUpdateRdata(index->rd_node, buffers[0],
|
||||
todelete, ntodelete,
|
||||
itup, lenitup, &(insert->key));
|
||||
}
|
||||
|
||||
/*
|
||||
* use insert->key as mark for completion of insert (form*Rdata() above)
|
||||
* for following possible replays
|
||||
/*
|
||||
* use insert->key as mark for completion of insert (form*Rdata()
|
||||
* above) for following possible replays
|
||||
*/
|
||||
|
||||
/* write pages, we should mark it dirty befor XLogInsert() */
|
||||
for (j = 0; j < numbuffer; j++) {
|
||||
for (j = 0; j < numbuffer; j++)
|
||||
{
|
||||
GistPageGetOpaque(pages[j])->rightlink = InvalidBlockNumber;
|
||||
MarkBufferDirty(buffers[j]);
|
||||
}
|
||||
@@ -764,12 +776,14 @@ gistContinueInsert(gistIncompleteInsert *insert)
|
||||
END_CRIT_SECTION();
|
||||
|
||||
lenitup = numbuffer;
|
||||
for (j = 0; j < numbuffer; j++) {
|
||||
for (j = 0; j < numbuffer; j++)
|
||||
{
|
||||
itup[j] = gist_form_invalid_tuple(BufferGetBlockNumber(buffers[j]));
|
||||
UnlockReleaseBuffer(buffers[j]);
|
||||
}
|
||||
|
||||
if ( tempbuffer != InvalidBuffer ) {
|
||||
if (tempbuffer != InvalidBuffer)
|
||||
{
|
||||
/*
|
||||
* it was a root split, so fill it by new values
|
||||
*/
|
||||
@@ -780,9 +794,9 @@ gistContinueInsert(gistIncompleteInsert *insert)
|
||||
}
|
||||
|
||||
ereport(LOG,
|
||||
(errmsg("index %u/%u/%u needs VACUUM FULL or REINDEX to finish crash recovery",
|
||||
(errmsg("index %u/%u/%u needs VACUUM FULL or REINDEX to finish crash recovery",
|
||||
insert->node.spcNode, insert->node.dbNode, insert->node.relNode),
|
||||
errdetail("Incomplete insertion detected during crash replay.")));
|
||||
errdetail("Incomplete insertion detected during crash replay.")));
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.47 2006/03/05 15:58:20 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.48 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* These functions are stored in pg_amproc. For each operator class
|
||||
@@ -138,9 +138,9 @@ hashtext(PG_FUNCTION_ARGS)
|
||||
Datum result;
|
||||
|
||||
/*
|
||||
* Note: this is currently identical in behavior to hashvarlena, but
|
||||
* keep it as a separate function in case we someday want to do something
|
||||
* different in non-C locales. (See also hashbpchar, if so.)
|
||||
* Note: this is currently identical in behavior to hashvarlena, but keep
|
||||
* it as a separate function in case we someday want to do something
|
||||
* different in non-C locales. (See also hashbpchar, if so.)
|
||||
*/
|
||||
result = hash_any((unsigned char *) VARDATA(key),
|
||||
VARSIZE(key) - VARHDRSZ);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.59 2006/07/03 22:45:36 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.60 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres hash pages look like ordinary relation pages. The opaque
|
||||
@@ -224,7 +224,7 @@ _hash_metapinit(Relation rel)
|
||||
/*
|
||||
* Determine the target fill factor (in tuples per bucket) for this index.
|
||||
* The idea is to make the fill factor correspond to pages about as full
|
||||
* as the user-settable fillfactor parameter says. We can compute it
|
||||
* as the user-settable fillfactor parameter says. We can compute it
|
||||
* exactly if the index datatype is fixed-width, but for var-width there's
|
||||
* some guessing involved.
|
||||
*/
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.219 2006/08/18 16:09:08 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.220 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@@ -133,9 +133,9 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
|
||||
snapshot = scan->rs_snapshot;
|
||||
|
||||
/*
|
||||
* We must hold share lock on the buffer content while examining
|
||||
* tuple visibility. Afterwards, however, the tuples we have found
|
||||
* to be visible are guaranteed good as long as we hold the buffer pin.
|
||||
* We must hold share lock on the buffer content while examining tuple
|
||||
* visibility. Afterwards, however, the tuples we have found to be
|
||||
* visible are guaranteed good as long as we hold the buffer pin.
|
||||
*/
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
|
||||
@@ -223,7 +223,7 @@ heapgettup(HeapScanDesc scan,
|
||||
tuple->t_data = NULL;
|
||||
return;
|
||||
}
|
||||
page = 0; /* first page */
|
||||
page = 0; /* first page */
|
||||
heapgetpage(scan, page);
|
||||
lineoff = FirstOffsetNumber; /* first offnum */
|
||||
scan->rs_inited = true;
|
||||
@@ -231,8 +231,8 @@ heapgettup(HeapScanDesc scan,
|
||||
else
|
||||
{
|
||||
/* continue from previously returned page/tuple */
|
||||
page = scan->rs_cblock; /* current page */
|
||||
lineoff = /* next offnum */
|
||||
page = scan->rs_cblock; /* current page */
|
||||
lineoff = /* next offnum */
|
||||
OffsetNumberNext(ItemPointerGetOffsetNumber(&(tuple->t_self)));
|
||||
}
|
||||
|
||||
@@ -263,7 +263,7 @@ heapgettup(HeapScanDesc scan,
|
||||
else
|
||||
{
|
||||
/* continue from previously returned page/tuple */
|
||||
page = scan->rs_cblock; /* current page */
|
||||
page = scan->rs_cblock; /* current page */
|
||||
}
|
||||
|
||||
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
|
||||
@@ -273,12 +273,12 @@ heapgettup(HeapScanDesc scan,
|
||||
|
||||
if (!scan->rs_inited)
|
||||
{
|
||||
lineoff = lines; /* final offnum */
|
||||
lineoff = lines; /* final offnum */
|
||||
scan->rs_inited = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
lineoff = /* previous offnum */
|
||||
lineoff = /* previous offnum */
|
||||
OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self)));
|
||||
}
|
||||
/* page and lineoff now reference the physically previous tid */
|
||||
@@ -450,7 +450,7 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
tuple->t_data = NULL;
|
||||
return;
|
||||
}
|
||||
page = 0; /* first page */
|
||||
page = 0; /* first page */
|
||||
heapgetpage(scan, page);
|
||||
lineindex = 0;
|
||||
scan->rs_inited = true;
|
||||
@@ -458,7 +458,7 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
else
|
||||
{
|
||||
/* continue from previously returned page/tuple */
|
||||
page = scan->rs_cblock; /* current page */
|
||||
page = scan->rs_cblock; /* current page */
|
||||
lineindex = scan->rs_cindex + 1;
|
||||
}
|
||||
|
||||
@@ -487,7 +487,7 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
else
|
||||
{
|
||||
/* continue from previously returned page/tuple */
|
||||
page = scan->rs_cblock; /* current page */
|
||||
page = scan->rs_cblock; /* current page */
|
||||
}
|
||||
|
||||
dp = (Page) BufferGetPage(scan->rs_cbuf);
|
||||
@@ -721,8 +721,8 @@ try_relation_open(Oid relationId, LOCKMODE lockmode)
|
||||
LockRelationOid(relationId, lockmode);
|
||||
|
||||
/*
|
||||
* Now that we have the lock, probe to see if the relation really
|
||||
* exists or not.
|
||||
* Now that we have the lock, probe to see if the relation really exists
|
||||
* or not.
|
||||
*/
|
||||
if (!SearchSysCacheExists(RELOID,
|
||||
ObjectIdGetDatum(relationId),
|
||||
@@ -764,7 +764,7 @@ relation_open_nowait(Oid relationId, LOCKMODE lockmode)
|
||||
if (!ConditionalLockRelationOid(relationId, lockmode))
|
||||
{
|
||||
/* try to throw error by name; relation could be deleted... */
|
||||
char *relname = get_rel_name(relationId);
|
||||
char *relname = get_rel_name(relationId);
|
||||
|
||||
if (relname)
|
||||
ereport(ERROR,
|
||||
@@ -774,8 +774,8 @@ relation_open_nowait(Oid relationId, LOCKMODE lockmode)
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
|
||||
errmsg("could not obtain lock on relation with OID %u",
|
||||
relationId)));
|
||||
errmsg("could not obtain lock on relation with OID %u",
|
||||
relationId)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -801,8 +801,8 @@ relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
|
||||
|
||||
/*
|
||||
* Check for shared-cache-inval messages before trying to open the
|
||||
* relation. This is needed to cover the case where the name identifies
|
||||
* a rel that has been dropped and recreated since the start of our
|
||||
* relation. This is needed to cover the case where the name identifies a
|
||||
* rel that has been dropped and recreated since the start of our
|
||||
* transaction: if we don't flush the old syscache entry then we'll latch
|
||||
* onto that entry and suffer an error when we do RelationIdGetRelation.
|
||||
* Note that relation_open does not need to do this, since a relation's
|
||||
@@ -2723,7 +2723,7 @@ l3:
|
||||
* heap_inplace_update - update a tuple "in place" (ie, overwrite it)
|
||||
*
|
||||
* Overwriting violates both MVCC and transactional safety, so the uses
|
||||
* of this function in Postgres are extremely limited. Nonetheless we
|
||||
* of this function in Postgres are extremely limited. Nonetheless we
|
||||
* find some places to use it.
|
||||
*
|
||||
* The tuple cannot change size, and therefore it's reasonable to assume
|
||||
@@ -2840,6 +2840,7 @@ heap_restrpos(HeapScanDesc scan)
|
||||
if (!ItemPointerIsValid(&scan->rs_mctid))
|
||||
{
|
||||
scan->rs_ctup.t_data = NULL;
|
||||
|
||||
/*
|
||||
* unpin scan buffers
|
||||
*/
|
||||
@@ -2852,7 +2853,7 @@ heap_restrpos(HeapScanDesc scan)
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If we reached end of scan, rs_inited will now be false. We must
|
||||
* If we reached end of scan, rs_inited will now be false. We must
|
||||
* reset it to true to keep heapgettup from doing the wrong thing.
|
||||
*/
|
||||
scan->rs_inited = true;
|
||||
@@ -2862,13 +2863,13 @@ heap_restrpos(HeapScanDesc scan)
|
||||
scan->rs_cindex = scan->rs_mindex;
|
||||
heapgettup_pagemode(scan,
|
||||
NoMovementScanDirection,
|
||||
0, /* needn't recheck scan keys */
|
||||
0, /* needn't recheck scan keys */
|
||||
NULL);
|
||||
}
|
||||
else
|
||||
heapgettup(scan,
|
||||
NoMovementScanDirection,
|
||||
0, /* needn't recheck scan keys */
|
||||
0, /* needn't recheck scan keys */
|
||||
NULL);
|
||||
}
|
||||
}
|
||||
@@ -2920,7 +2921,7 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform XLogInsert for a heap-update operation. Caller must already
|
||||
* Perform XLogInsert for a heap-update operation. Caller must already
|
||||
* have modified the buffer(s) and marked them dirty.
|
||||
*/
|
||||
static XLogRecPtr
|
||||
@@ -3173,8 +3174,8 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
|
||||
if (record->xl_info & XLOG_HEAP_INIT_PAGE)
|
||||
{
|
||||
buffer = XLogReadBuffer(reln,
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
true);
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
@@ -3183,13 +3184,13 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
|
||||
else
|
||||
{
|
||||
buffer = XLogReadBuffer(reln,
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
false);
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
false);
|
||||
if (!BufferIsValid(buffer))
|
||||
return;
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
|
||||
if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
|
||||
{
|
||||
UnlockReleaseBuffer(buffer);
|
||||
return;
|
||||
@@ -3308,6 +3309,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move)
|
||||
/* Set forward chain link in t_ctid */
|
||||
htup->t_ctid = xlrec->newtid;
|
||||
}
|
||||
|
||||
/*
|
||||
* this test is ugly, but necessary to avoid thinking that insert change
|
||||
* is already applied
|
||||
@@ -3345,7 +3347,7 @@ newt:;
|
||||
return;
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
|
||||
if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */
|
||||
{
|
||||
UnlockReleaseBuffer(buffer);
|
||||
return;
|
||||
@@ -3548,9 +3550,9 @@ static void
|
||||
out_target(StringInfo buf, xl_heaptid *target)
|
||||
{
|
||||
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
ItemPointerGetBlockNumber(&(target->tid)),
|
||||
ItemPointerGetOffsetNumber(&(target->tid)));
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
ItemPointerGetBlockNumber(&(target->tid)),
|
||||
ItemPointerGetOffsetNumber(&(target->tid)));
|
||||
}
|
||||
|
||||
void
|
||||
@@ -3586,8 +3588,8 @@ heap_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
appendStringInfo(buf, "update: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "; new %u/%u",
|
||||
ItemPointerGetBlockNumber(&(xlrec->newtid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->newtid)));
|
||||
ItemPointerGetBlockNumber(&(xlrec->newtid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->newtid)));
|
||||
}
|
||||
else if (info == XLOG_HEAP_MOVE)
|
||||
{
|
||||
@@ -3599,24 +3601,24 @@ heap_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
appendStringInfo(buf, "move: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "; new %u/%u",
|
||||
ItemPointerGetBlockNumber(&(xlrec->newtid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->newtid)));
|
||||
ItemPointerGetBlockNumber(&(xlrec->newtid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->newtid)));
|
||||
}
|
||||
else if (info == XLOG_HEAP_CLEAN)
|
||||
{
|
||||
xl_heap_clean *xlrec = (xl_heap_clean *) rec;
|
||||
|
||||
appendStringInfo(buf, "clean: rel %u/%u/%u; blk %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block);
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block);
|
||||
}
|
||||
else if (info == XLOG_HEAP_NEWPAGE)
|
||||
{
|
||||
xl_heap_newpage *xlrec = (xl_heap_newpage *) rec;
|
||||
|
||||
appendStringInfo(buf, "newpage: rel %u/%u/%u; blk %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->blkno);
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->blkno);
|
||||
}
|
||||
else if (info == XLOG_HEAP_LOCK)
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.64 2006/09/10 23:33:22 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.65 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@@ -1331,7 +1331,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
|
||||
VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED;
|
||||
|
||||
if (length == 0)
|
||||
return result; /* Can save a lot of work at this point! */
|
||||
return result; /* Can save a lot of work at this point! */
|
||||
|
||||
startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
|
||||
endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.58 2006/07/31 20:08:59 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.59 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* many of the old access method routines have been turned into
|
||||
@@ -86,7 +86,7 @@ RelationGetIndexScan(Relation indexRelation,
|
||||
else
|
||||
scan->keyData = NULL;
|
||||
|
||||
scan->is_multiscan = false; /* caller may change this */
|
||||
scan->is_multiscan = false; /* caller may change this */
|
||||
scan->kill_prior_tuple = false;
|
||||
scan->ignore_killed_tuples = true; /* default setting */
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.94 2006/07/31 20:08:59 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.95 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
* index_open - open an index relation by relation OID
|
||||
@@ -122,7 +122,7 @@ static IndexScanDesc index_beginscan_internal(Relation indexRelation,
|
||||
* index_open - open an index relation by relation OID
|
||||
*
|
||||
* If lockmode is not "NoLock", the specified kind of lock is
|
||||
* obtained on the index. (Generally, NoLock should only be
|
||||
* obtained on the index. (Generally, NoLock should only be
|
||||
* used if the caller knows it has some appropriate lock on the
|
||||
* index already.)
|
||||
*
|
||||
@@ -209,7 +209,7 @@ index_insert(Relation indexRelation,
|
||||
* index_getnext on this scan; index_getnext_indexitem will not use the
|
||||
* heapRelation link (nor the snapshot). However, the caller had better
|
||||
* be holding some kind of lock on the heap relation in any case, to ensure
|
||||
* no one deletes it (or the index) out from under us. Caller must also
|
||||
* no one deletes it (or the index) out from under us. Caller must also
|
||||
* be holding a lock on the index.
|
||||
*/
|
||||
IndexScanDesc
|
||||
@@ -553,7 +553,7 @@ index_getmulti(IndexScanDesc scan,
|
||||
*
|
||||
* callback routine tells whether a given main-heap tuple is
|
||||
* to be deleted
|
||||
*
|
||||
*
|
||||
* return value is an optional palloc'd struct of statistics
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.143 2006/08/25 04:06:46 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.144 2006/10/04 00:29:48 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -252,7 +252,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
|
||||
*/
|
||||
htup.t_self = itup->t_tid;
|
||||
if (heap_fetch(heapRel, SnapshotSelf, &htup, &hbuffer,
|
||||
false, NULL))
|
||||
false, NULL))
|
||||
{
|
||||
/* Normal case --- it's still live */
|
||||
ReleaseBuffer(hbuffer);
|
||||
@@ -355,7 +355,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
|
||||
* + updates the metapage if a true root or fast root is split.
|
||||
*
|
||||
* On entry, we must have the right buffer in which to do the
|
||||
* insertion, and the buffer must be pinned and write-locked. On return,
|
||||
* insertion, and the buffer must be pinned and write-locked. On return,
|
||||
* we will have dropped both the pin and the lock on the buffer.
|
||||
*
|
||||
* If 'afteritem' is >0 then the new tuple must be inserted after the
|
||||
@@ -608,7 +608,7 @@ _bt_insertonpg(Relation rel,
|
||||
if (!rel->rd_istemp)
|
||||
{
|
||||
xl_btree_insert xlrec;
|
||||
BlockNumber xldownlink;
|
||||
BlockNumber xldownlink;
|
||||
xl_btree_metadata xlmeta;
|
||||
uint8 xlinfo;
|
||||
XLogRecPtr recptr;
|
||||
@@ -888,16 +888,17 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
|
||||
sopaque = (BTPageOpaque) PageGetSpecialPointer(spage);
|
||||
if (sopaque->btpo_prev != ropaque->btpo_prev)
|
||||
elog(PANIC, "right sibling's left-link doesn't match");
|
||||
|
||||
/*
|
||||
* Check to see if we can set the SPLIT_END flag in the right-hand
|
||||
* split page; this can save some I/O for vacuum since it need not
|
||||
* proceed to the right sibling. We can set the flag if the right
|
||||
* sibling has a different cycleid: that means it could not be part
|
||||
* of a group of pages that were all split off from the same ancestor
|
||||
* sibling has a different cycleid: that means it could not be part of
|
||||
* a group of pages that were all split off from the same ancestor
|
||||
* page. If you're confused, imagine that page A splits to A B and
|
||||
* then again, yielding A C B, while vacuum is in progress. Tuples
|
||||
* originally in A could now be in either B or C, hence vacuum must
|
||||
* examine both pages. But if D, our right sibling, has a different
|
||||
* examine both pages. But if D, our right sibling, has a different
|
||||
* cycleid then it could not contain any tuples that were in A when
|
||||
* the vacuum started.
|
||||
*/
|
||||
@@ -911,8 +912,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
|
||||
*
|
||||
* NO EREPORT(ERROR) till right sibling is updated. We can get away with
|
||||
* not starting the critical section till here because we haven't been
|
||||
* scribbling on the original page yet, and we don't care about the
|
||||
* new sibling until it's linked into the btree.
|
||||
* scribbling on the original page yet, and we don't care about the new
|
||||
* sibling until it's linked into the btree.
|
||||
*/
|
||||
START_CRIT_SECTION();
|
||||
|
||||
@@ -947,8 +948,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
|
||||
* Direct access to page is not good but faster - we should implement
|
||||
* some new func in page API. Note we only store the tuples
|
||||
* themselves, knowing that the item pointers are in the same order
|
||||
* and can be reconstructed by scanning the tuples. See comments
|
||||
* for _bt_restore_page().
|
||||
* and can be reconstructed by scanning the tuples. See comments for
|
||||
* _bt_restore_page().
|
||||
*/
|
||||
xlrec.leftlen = ((PageHeader) leftpage)->pd_special -
|
||||
((PageHeader) leftpage)->pd_upper;
|
||||
@@ -1708,17 +1709,17 @@ _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
|
||||
static void
|
||||
_bt_vacuum_one_page(Relation rel, Buffer buffer)
|
||||
{
|
||||
OffsetNumber deletable[MaxOffsetNumber];
|
||||
int ndeletable = 0;
|
||||
OffsetNumber offnum,
|
||||
minoff,
|
||||
maxoff;
|
||||
Page page = BufferGetPage(buffer);
|
||||
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
OffsetNumber deletable[MaxOffsetNumber];
|
||||
int ndeletable = 0;
|
||||
OffsetNumber offnum,
|
||||
minoff,
|
||||
maxoff;
|
||||
Page page = BufferGetPage(buffer);
|
||||
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* Scan over all items to see which ones need deleted
|
||||
* according to LP_DELETE flags.
|
||||
* Scan over all items to see which ones need deleted according to
|
||||
* LP_DELETE flags.
|
||||
*/
|
||||
minoff = P_FIRSTDATAKEY(opaque);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
@@ -1726,7 +1727,7 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer)
|
||||
offnum <= maxoff;
|
||||
offnum = OffsetNumberNext(offnum))
|
||||
{
|
||||
ItemId itemId = PageGetItemId(page, offnum);
|
||||
ItemId itemId = PageGetItemId(page, offnum);
|
||||
|
||||
if (ItemIdDeleted(itemId))
|
||||
deletable[ndeletable++] = offnum;
|
||||
@@ -1734,10 +1735,11 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer)
|
||||
|
||||
if (ndeletable > 0)
|
||||
_bt_delitems(rel, buffer, deletable, ndeletable);
|
||||
|
||||
/*
|
||||
* Note: if we didn't find any LP_DELETE items, then the page's
|
||||
* BTP_HAS_GARBAGE hint bit is falsely set. We do not bother
|
||||
* expending a separate write to clear it, however. We will clear
|
||||
* it when we split the page.
|
||||
* BTP_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
|
||||
* separate write to clear it, however. We will clear it when we split
|
||||
* the page.
|
||||
*/
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.99 2006/07/25 19:13:00 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.100 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@@ -124,10 +124,10 @@ _bt_getroot(Relation rel, int access)
|
||||
|
||||
/*
|
||||
* Since the cache might be stale, we check the page more carefully
|
||||
* here than normal. We *must* check that it's not deleted.
|
||||
* If it's not alone on its level, then we reject too --- this
|
||||
* may be overly paranoid but better safe than sorry. Note we
|
||||
* don't check P_ISROOT, because that's not set in a "fast root".
|
||||
* here than normal. We *must* check that it's not deleted. If it's
|
||||
* not alone on its level, then we reject too --- this may be overly
|
||||
* paranoid but better safe than sorry. Note we don't check P_ISROOT,
|
||||
* because that's not set in a "fast root".
|
||||
*/
|
||||
if (!P_IGNORE(rootopaque) &&
|
||||
rootopaque->btpo.level == rootlevel &&
|
||||
@@ -662,18 +662,18 @@ _bt_delitems(Relation rel, Buffer buf,
|
||||
PageIndexMultiDelete(page, itemnos, nitems);
|
||||
|
||||
/*
|
||||
* We can clear the vacuum cycle ID since this page has certainly
|
||||
* been processed by the current vacuum scan.
|
||||
* We can clear the vacuum cycle ID since this page has certainly been
|
||||
* processed by the current vacuum scan.
|
||||
*/
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
opaque->btpo_cycleid = 0;
|
||||
|
||||
/*
|
||||
* Mark the page as not containing any LP_DELETE items. This is not
|
||||
* certainly true (there might be some that have recently been marked,
|
||||
* but weren't included in our target-item list), but it will almost
|
||||
* always be true and it doesn't seem worth an additional page scan
|
||||
* to check it. Remember that BTP_HAS_GARBAGE is only a hint anyway.
|
||||
* certainly true (there might be some that have recently been marked, but
|
||||
* weren't included in our target-item list), but it will almost always be
|
||||
* true and it doesn't seem worth an additional page scan to check it.
|
||||
* Remember that BTP_HAS_GARBAGE is only a hint anyway.
|
||||
*/
|
||||
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.151 2006/09/21 20:31:22 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.152 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -55,7 +55,7 @@ typedef struct
|
||||
BlockNumber *freePages;
|
||||
int nFreePages; /* number of entries in freePages[] */
|
||||
int maxFreePages; /* allocated size of freePages[] */
|
||||
BlockNumber totFreePages; /* true total # of free pages */
|
||||
BlockNumber totFreePages; /* true total # of free pages */
|
||||
MemoryContext pagedelcontext;
|
||||
} BTVacState;
|
||||
|
||||
@@ -70,7 +70,7 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||
IndexBulkDeleteCallback callback, void *callback_state,
|
||||
BTCycleId cycleid);
|
||||
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
|
||||
BlockNumber orig_blkno);
|
||||
BlockNumber orig_blkno);
|
||||
|
||||
|
||||
/*
|
||||
@@ -109,8 +109,8 @@ btbuild(PG_FUNCTION_ARGS)
|
||||
buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false);
|
||||
|
||||
/*
|
||||
* If building a unique index, put dead tuples in a second spool to
|
||||
* keep them out of the uniqueness check.
|
||||
* If building a unique index, put dead tuples in a second spool to keep
|
||||
* them out of the uniqueness check.
|
||||
*/
|
||||
if (indexInfo->ii_Unique)
|
||||
buildstate.spool2 = _bt_spoolinit(index, false, true);
|
||||
@@ -146,11 +146,11 @@ btbuild(PG_FUNCTION_ARGS)
|
||||
#endif /* BTREE_BUILD_STATS */
|
||||
|
||||
/*
|
||||
* If we are reindexing a pre-existing index, it is critical to send out
|
||||
* a relcache invalidation SI message to ensure all backends re-read the
|
||||
* index metapage. We expect that the caller will ensure that happens
|
||||
* (typically as a side effect of updating index stats, but it must
|
||||
* happen even if the stats don't change!)
|
||||
* If we are reindexing a pre-existing index, it is critical to send out a
|
||||
* relcache invalidation SI message to ensure all backends re-read the
|
||||
* index metapage. We expect that the caller will ensure that happens
|
||||
* (typically as a side effect of updating index stats, but it must happen
|
||||
* even if the stats don't change!)
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -252,11 +252,11 @@ btgettuple(PG_FUNCTION_ARGS)
|
||||
if (scan->kill_prior_tuple)
|
||||
{
|
||||
/*
|
||||
* Yes, remember it for later. (We'll deal with all such tuples
|
||||
* Yes, remember it for later. (We'll deal with all such tuples
|
||||
* at once right before leaving the index page.) The test for
|
||||
* numKilled overrun is not just paranoia: if the caller reverses
|
||||
* direction in the indexscan then the same item might get entered
|
||||
* multiple times. It's not worth trying to optimize that, so we
|
||||
* multiple times. It's not worth trying to optimize that, so we
|
||||
* don't detect it, but instead just forget any excess entries.
|
||||
*/
|
||||
if (so->killedItems == NULL)
|
||||
@@ -316,8 +316,8 @@ btgetmulti(PG_FUNCTION_ARGS)
|
||||
while (ntids < max_tids)
|
||||
{
|
||||
/*
|
||||
* Advance to next tuple within page. This is the same as the
|
||||
* easy case in _bt_next().
|
||||
* Advance to next tuple within page. This is the same as the easy
|
||||
* case in _bt_next().
|
||||
*/
|
||||
if (++so->currPos.itemIndex > so->currPos.lastItem)
|
||||
{
|
||||
@@ -373,7 +373,7 @@ btrescan(PG_FUNCTION_ARGS)
|
||||
so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
|
||||
else
|
||||
so->keyData = NULL;
|
||||
so->killedItems = NULL; /* until needed */
|
||||
so->killedItems = NULL; /* until needed */
|
||||
so->numKilled = 0;
|
||||
scan->opaque = so;
|
||||
}
|
||||
@@ -461,9 +461,9 @@ btmarkpos(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* Just record the current itemIndex. If we later step to next page
|
||||
* before releasing the marked position, _bt_steppage makes a full copy
|
||||
* of the currPos struct in markPos. If (as often happens) the mark is
|
||||
* moved before we leave the page, we don't have to do that work.
|
||||
* before releasing the marked position, _bt_steppage makes a full copy of
|
||||
* the currPos struct in markPos. If (as often happens) the mark is moved
|
||||
* before we leave the page, we don't have to do that work.
|
||||
*/
|
||||
if (BTScanPosIsValid(so->currPos))
|
||||
so->markItemIndex = so->currPos.itemIndex;
|
||||
@@ -485,11 +485,11 @@ btrestrpos(PG_FUNCTION_ARGS)
|
||||
if (so->markItemIndex >= 0)
|
||||
{
|
||||
/*
|
||||
* The mark position is on the same page we are currently on.
|
||||
* Just restore the itemIndex.
|
||||
* The mark position is on the same page we are currently on. Just
|
||||
* restore the itemIndex.
|
||||
*/
|
||||
so->currPos.itemIndex = so->markItemIndex;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we aren't holding any read locks, but gotta drop the pin */
|
||||
@@ -527,7 +527,7 @@ Datum
|
||||
btbulkdelete(PG_FUNCTION_ARGS)
|
||||
{
|
||||
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
|
||||
IndexBulkDeleteResult * volatile stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
||||
IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
||||
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
|
||||
void *callback_state = (void *) PG_GETARG_POINTER(3);
|
||||
Relation rel = info->index;
|
||||
@@ -569,10 +569,10 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
|
||||
|
||||
/*
|
||||
* If btbulkdelete was called, we need not do anything, just return
|
||||
* the stats from the latest btbulkdelete call. If it wasn't called,
|
||||
* we must still do a pass over the index, to recycle any newly-recyclable
|
||||
* pages and to obtain index statistics.
|
||||
* If btbulkdelete was called, we need not do anything, just return the
|
||||
* stats from the latest btbulkdelete call. If it wasn't called, we must
|
||||
* still do a pass over the index, to recycle any newly-recyclable pages
|
||||
* and to obtain index statistics.
|
||||
*
|
||||
* Since we aren't going to actually delete any leaf items, there's no
|
||||
* need to go through all the vacuum-cycle-ID pushups.
|
||||
@@ -586,8 +586,8 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
/*
|
||||
* During a non-FULL vacuum it's quite possible for us to be fooled by
|
||||
* concurrent page splits into double-counting some index tuples, so
|
||||
* disbelieve any total that exceeds the underlying heap's count.
|
||||
* (We can't check this during btbulkdelete.)
|
||||
* disbelieve any total that exceeds the underlying heap's count. (We
|
||||
* can't check this during btbulkdelete.)
|
||||
*/
|
||||
if (!info->vacuum_full)
|
||||
{
|
||||
@@ -622,8 +622,8 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||
bool needLock;
|
||||
|
||||
/*
|
||||
* Reset counts that will be incremented during the scan; needed in
|
||||
* case of multiple scans during a single VACUUM command
|
||||
* Reset counts that will be incremented during the scan; needed in case
|
||||
* of multiple scans during a single VACUUM command
|
||||
*/
|
||||
stats->num_index_tuples = 0;
|
||||
stats->pages_deleted = 0;
|
||||
@@ -647,24 +647,24 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
|
||||
/*
|
||||
* The outer loop iterates over all index pages except the metapage,
|
||||
* in physical order (we hope the kernel will cooperate in providing
|
||||
* The outer loop iterates over all index pages except the metapage, in
|
||||
* physical order (we hope the kernel will cooperate in providing
|
||||
* read-ahead for speed). It is critical that we visit all leaf pages,
|
||||
* including ones added after we start the scan, else we might fail to
|
||||
* delete some deletable tuples. Hence, we must repeatedly check the
|
||||
* relation length. We must acquire the relation-extension lock while
|
||||
* doing so to avoid a race condition: if someone else is extending the
|
||||
* relation, there is a window where bufmgr/smgr have created a new
|
||||
* all-zero page but it hasn't yet been write-locked by _bt_getbuf().
|
||||
* If we manage to scan such a page here, we'll improperly assume it can
|
||||
* be recycled. Taking the lock synchronizes things enough to prevent a
|
||||
* all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If
|
||||
* we manage to scan such a page here, we'll improperly assume it can be
|
||||
* recycled. Taking the lock synchronizes things enough to prevent a
|
||||
* problem: either num_pages won't include the new page, or _bt_getbuf
|
||||
* already has write lock on the buffer and it will be fully initialized
|
||||
* before we can examine it. (See also vacuumlazy.c, which has the same
|
||||
* issue.) Also, we need not worry if a page is added immediately after
|
||||
* issue.) Also, we need not worry if a page is added immediately after
|
||||
* we look; the page splitting code already has write-lock on the left
|
||||
* page before it adds a right page, so we must already have processed
|
||||
* any tuples due to be moved into such a page.
|
||||
* page before it adds a right page, so we must already have processed any
|
||||
* tuples due to be moved into such a page.
|
||||
*
|
||||
* We can skip locking for new or temp relations, however, since no one
|
||||
* else could be accessing them.
|
||||
@@ -771,7 +771,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
|
||||
void *callback_state = vstate->callback_state;
|
||||
Relation rel = info->index;
|
||||
bool delete_now;
|
||||
BlockNumber recurse_to;
|
||||
BlockNumber recurse_to;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
@@ -796,10 +796,10 @@ restart:
|
||||
_bt_checkpage(rel, buf);
|
||||
|
||||
/*
|
||||
* If we are recursing, the only case we want to do anything with is
|
||||
* a live leaf page having the current vacuum cycle ID. Any other state
|
||||
* implies we already saw the page (eg, deleted it as being empty).
|
||||
* In particular, we don't want to risk adding it to freePages twice.
|
||||
* If we are recursing, the only case we want to do anything with is a
|
||||
* live leaf page having the current vacuum cycle ID. Any other state
|
||||
* implies we already saw the page (eg, deleted it as being empty). In
|
||||
* particular, we don't want to risk adding it to freePages twice.
|
||||
*/
|
||||
if (blkno != orig_blkno)
|
||||
{
|
||||
@@ -838,25 +838,24 @@ restart:
|
||||
OffsetNumber deletable[MaxOffsetNumber];
|
||||
int ndeletable;
|
||||
OffsetNumber offnum,
|
||||
minoff,
|
||||
maxoff;
|
||||
minoff,
|
||||
maxoff;
|
||||
|
||||
/*
|
||||
* Trade in the initial read lock for a super-exclusive write
|
||||
* lock on this page. We must get such a lock on every leaf page
|
||||
* over the course of the vacuum scan, whether or not it actually
|
||||
* contains any deletable tuples --- see nbtree/README.
|
||||
* Trade in the initial read lock for a super-exclusive write lock on
|
||||
* this page. We must get such a lock on every leaf page over the
|
||||
* course of the vacuum scan, whether or not it actually contains any
|
||||
* deletable tuples --- see nbtree/README.
|
||||
*/
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
LockBufferForCleanup(buf);
|
||||
|
||||
/*
|
||||
* Check whether we need to recurse back to earlier pages. What
|
||||
* we are concerned about is a page split that happened since we
|
||||
* started the vacuum scan. If the split moved some tuples to a
|
||||
* lower page then we might have missed 'em. If so, set up for
|
||||
* tail recursion. (Must do this before possibly clearing
|
||||
* btpo_cycleid below!)
|
||||
* Check whether we need to recurse back to earlier pages. What we
|
||||
* are concerned about is a page split that happened since we started
|
||||
* the vacuum scan. If the split moved some tuples to a lower page
|
||||
* then we might have missed 'em. If so, set up for tail recursion.
|
||||
* (Must do this before possibly clearing btpo_cycleid below!)
|
||||
*/
|
||||
if (vstate->cycleid != 0 &&
|
||||
opaque->btpo_cycleid == vstate->cycleid &&
|
||||
@@ -866,8 +865,8 @@ restart:
|
||||
recurse_to = opaque->btpo_next;
|
||||
|
||||
/*
|
||||
* Scan over all items to see which ones need deleted
|
||||
* according to the callback function.
|
||||
* Scan over all items to see which ones need deleted according to the
|
||||
* callback function.
|
||||
*/
|
||||
ndeletable = 0;
|
||||
minoff = P_FIRSTDATAKEY(opaque);
|
||||
@@ -890,8 +889,8 @@ restart:
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply any needed deletes. We issue just one _bt_delitems()
|
||||
* call per page, so as to minimize WAL traffic.
|
||||
* Apply any needed deletes. We issue just one _bt_delitems() call
|
||||
* per page, so as to minimize WAL traffic.
|
||||
*/
|
||||
if (ndeletable > 0)
|
||||
{
|
||||
@@ -908,8 +907,8 @@ restart:
|
||||
* have any deletions to do. (If we do, _bt_delitems takes care
|
||||
* of this.) This ensures we won't process the page again.
|
||||
*
|
||||
* We treat this like a hint-bit update because there's no need
|
||||
* to WAL-log it.
|
||||
* We treat this like a hint-bit update because there's no need to
|
||||
* WAL-log it.
|
||||
*/
|
||||
if (vstate->cycleid != 0 &&
|
||||
opaque->btpo_cycleid == vstate->cycleid)
|
||||
@@ -920,10 +919,10 @@ restart:
|
||||
}
|
||||
|
||||
/*
|
||||
* If it's now empty, try to delete; else count the live tuples.
|
||||
* We don't delete when recursing, though, to avoid putting entries
|
||||
* into freePages out-of-order (doesn't seem worth any extra code to
|
||||
* handle the case).
|
||||
* If it's now empty, try to delete; else count the live tuples. We
|
||||
* don't delete when recursing, though, to avoid putting entries into
|
||||
* freePages out-of-order (doesn't seem worth any extra code to handle
|
||||
* the case).
|
||||
*/
|
||||
if (minoff > maxoff)
|
||||
delete_now = (blkno == orig_blkno);
|
||||
@@ -947,13 +946,12 @@ restart:
|
||||
stats->pages_deleted++;
|
||||
|
||||
/*
|
||||
* During VACUUM FULL it's okay to recycle deleted pages
|
||||
* immediately, since there can be no other transactions scanning
|
||||
* the index. Note that we will only recycle the current page and
|
||||
* not any parent pages that _bt_pagedel might have recursed to;
|
||||
* this seems reasonable in the name of simplicity. (Trying to do
|
||||
* otherwise would mean we'd have to sort the list of recyclable
|
||||
* pages we're building.)
|
||||
* During VACUUM FULL it's okay to recycle deleted pages immediately,
|
||||
* since there can be no other transactions scanning the index. Note
|
||||
* that we will only recycle the current page and not any parent pages
|
||||
* that _bt_pagedel might have recursed to; this seems reasonable in
|
||||
* the name of simplicity. (Trying to do otherwise would mean we'd
|
||||
* have to sort the list of recyclable pages we're building.)
|
||||
*/
|
||||
if (ndel && info->vacuum_full)
|
||||
{
|
||||
@@ -969,11 +967,11 @@ restart:
|
||||
_bt_relbuf(rel, buf);
|
||||
|
||||
/*
|
||||
* This is really tail recursion, but if the compiler is too stupid
|
||||
* to optimize it as such, we'd eat an uncomfortably large amount of
|
||||
* stack space per recursion level (due to the deletable[] array).
|
||||
* A failure is improbable since the number of levels isn't likely to be
|
||||
* large ... but just in case, let's hand-optimize into a loop.
|
||||
* This is really tail recursion, but if the compiler is too stupid to
|
||||
* optimize it as such, we'd eat an uncomfortably large amount of stack
|
||||
* space per recursion level (due to the deletable[] array). A failure is
|
||||
* improbable since the number of levels isn't likely to be large ... but
|
||||
* just in case, let's hand-optimize into a loop.
|
||||
*/
|
||||
if (recurse_to != P_NONE)
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.106 2006/08/24 01:18:34 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.107 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
|
||||
static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
|
||||
OffsetNumber offnum);
|
||||
OffsetNumber offnum);
|
||||
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
|
||||
static Buffer _bt_walk_left(Relation rel, Buffer buf);
|
||||
static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
|
||||
@@ -417,7 +417,7 @@ _bt_compare(Relation rel,
|
||||
* _bt_first() -- Find the first item in a scan.
|
||||
*
|
||||
* We need to be clever about the direction of scan, the search
|
||||
* conditions, and the tree ordering. We find the first item (or,
|
||||
* conditions, and the tree ordering. We find the first item (or,
|
||||
* if backwards scan, the last item) in the tree that satisfies the
|
||||
* qualifications in the scan key. On success exit, the page containing
|
||||
* the current index tuple is pinned but not locked, and data about
|
||||
@@ -604,7 +604,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
{
|
||||
ScanKey cur = startKeys[i];
|
||||
|
||||
Assert(cur->sk_attno == i+1);
|
||||
Assert(cur->sk_attno == i + 1);
|
||||
|
||||
if (cur->sk_flags & SK_ROW_HEADER)
|
||||
{
|
||||
@@ -612,16 +612,17 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
* Row comparison header: look to the first row member instead.
|
||||
*
|
||||
* The member scankeys are already in insertion format (ie, they
|
||||
* have sk_func = 3-way-comparison function), but we have to
|
||||
* watch out for nulls, which _bt_preprocess_keys didn't check.
|
||||
* A null in the first row member makes the condition unmatchable,
|
||||
* just like qual_ok = false.
|
||||
* have sk_func = 3-way-comparison function), but we have to watch
|
||||
* out for nulls, which _bt_preprocess_keys didn't check. A null
|
||||
* in the first row member makes the condition unmatchable, just
|
||||
* like qual_ok = false.
|
||||
*/
|
||||
cur = (ScanKey) DatumGetPointer(cur->sk_argument);
|
||||
Assert(cur->sk_flags & SK_ROW_MEMBER);
|
||||
if (cur->sk_flags & SK_ISNULL)
|
||||
return false;
|
||||
memcpy(scankeys + i, cur, sizeof(ScanKeyData));
|
||||
|
||||
/*
|
||||
* If the row comparison is the last positioning key we accepted,
|
||||
* try to add additional keys from the lower-order row members.
|
||||
@@ -833,10 +834,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
|
||||
*
|
||||
* The actually desired starting point is either this item or the prior
|
||||
* one, or in the end-of-page case it's the first item on the next page or
|
||||
* the last item on this page. Adjust the starting offset if needed.
|
||||
* (If this results in an offset before the first item or after the last
|
||||
* one, _bt_readpage will report no items found, and then we'll step to
|
||||
* the next page as needed.)
|
||||
* the last item on this page. Adjust the starting offset if needed. (If
|
||||
* this results in an offset before the first item or after the last one,
|
||||
* _bt_readpage will report no items found, and then we'll step to the
|
||||
* next page as needed.)
|
||||
*/
|
||||
if (goback)
|
||||
offnum = OffsetNumberPrev(offnum);
|
||||
@@ -882,8 +883,8 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
|
||||
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
||||
|
||||
/*
|
||||
* Advance to next tuple on current page; or if there's no more,
|
||||
* try to step to the next page with data.
|
||||
* Advance to next tuple on current page; or if there's no more, try to
|
||||
* step to the next page with data.
|
||||
*/
|
||||
if (ScanDirectionIsForward(dir))
|
||||
{
|
||||
@@ -954,8 +955,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
|
||||
|
||||
/*
|
||||
* we must save the page's right-link while scanning it; this tells us
|
||||
* where to step right to after we're done with these items. There is
|
||||
* no corresponding need for the left-link, since splits always go right.
|
||||
* where to step right to after we're done with these items. There is no
|
||||
* corresponding need for the left-link, since splits always go right.
|
||||
*/
|
||||
so->currPos.nextPage = opaque->btpo_next;
|
||||
|
||||
@@ -1055,8 +1056,8 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
||||
_bt_killitems(scan, true);
|
||||
|
||||
/*
|
||||
* Before we modify currPos, make a copy of the page data if there
|
||||
* was a mark position that needs it.
|
||||
* Before we modify currPos, make a copy of the page data if there was a
|
||||
* mark position that needs it.
|
||||
*/
|
||||
if (so->markItemIndex >= 0)
|
||||
{
|
||||
@@ -1112,11 +1113,11 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
||||
so->currPos.moreRight = true;
|
||||
|
||||
/*
|
||||
* Walk left to the next page with data. This is much more
|
||||
* complex than the walk-right case because of the possibility
|
||||
* that the page to our left splits while we are in flight to it,
|
||||
* plus the possibility that the page we were on gets deleted
|
||||
* after we leave it. See nbtree/README for details.
|
||||
* Walk left to the next page with data. This is much more complex
|
||||
* than the walk-right case because of the possibility that the page
|
||||
* to our left splits while we are in flight to it, plus the
|
||||
* possibility that the page we were on gets deleted after we leave
|
||||
* it. See nbtree/README for details.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
@@ -1136,9 +1137,9 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Okay, we managed to move left to a non-deleted page.
|
||||
* Done if it's not half-dead and contains matching tuples.
|
||||
* Else loop back and do it all again.
|
||||
* Okay, we managed to move left to a non-deleted page. Done if
|
||||
* it's not half-dead and contains matching tuples. Else loop back
|
||||
* and do it all again.
|
||||
*/
|
||||
page = BufferGetPage(so->currPos.buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.106 2006/07/14 14:52:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.107 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -125,7 +125,7 @@ static void _bt_slideleft(Page page);
|
||||
static void _bt_sortaddtup(Page page, Size itemsize,
|
||||
IndexTuple itup, OffsetNumber itup_off);
|
||||
static void _bt_buildadd(BTWriteState *wstate, BTPageState *state,
|
||||
IndexTuple itup);
|
||||
IndexTuple itup);
|
||||
static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state);
|
||||
static void _bt_load(BTWriteState *wstate,
|
||||
BTSpool *btspool, BTSpool *btspool2);
|
||||
@@ -351,7 +351,7 @@ _bt_pagestate(BTWriteState *wstate, uint32 level)
|
||||
state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
|
||||
else
|
||||
state->btps_full = RelationGetTargetPageFreeSpace(wstate->index,
|
||||
BTREE_DEFAULT_FILLFACTOR);
|
||||
BTREE_DEFAULT_FILLFACTOR);
|
||||
/* no parent level, yet */
|
||||
state->btps_next = NULL;
|
||||
|
||||
@@ -464,8 +464,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
|
||||
Size itupsz;
|
||||
|
||||
/*
|
||||
* This is a handy place to check for cancel interrupts during the
|
||||
* btree load phase of index creation.
|
||||
* This is a handy place to check for cancel interrupts during the btree
|
||||
* load phase of index creation.
|
||||
*/
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
@@ -499,10 +499,10 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
|
||||
"or use full text indexing.")));
|
||||
|
||||
/*
|
||||
* Check to see if page is "full". It's definitely full if the item
|
||||
* won't fit. Otherwise, compare to the target freespace derived from
|
||||
* the fillfactor. However, we must put at least two items on each
|
||||
* page, so disregard fillfactor if we don't have that many.
|
||||
* Check to see if page is "full". It's definitely full if the item won't
|
||||
* fit. Otherwise, compare to the target freespace derived from the
|
||||
* fillfactor. However, we must put at least two items on each page, so
|
||||
* disregard fillfactor if we don't have that many.
|
||||
*/
|
||||
if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY))
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.78 2006/07/25 19:13:00 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.79 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -28,8 +28,8 @@
|
||||
|
||||
static void _bt_mark_scankey_required(ScanKey skey);
|
||||
static bool _bt_check_rowcompare(ScanKey skey,
|
||||
IndexTuple tuple, TupleDesc tupdesc,
|
||||
ScanDirection dir, bool *continuescan);
|
||||
IndexTuple tuple, TupleDesc tupdesc,
|
||||
ScanDirection dir, bool *continuescan);
|
||||
|
||||
|
||||
/*
|
||||
@@ -83,7 +83,7 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
|
||||
* comparison data ultimately used must match the key datatypes.
|
||||
*
|
||||
* The result cannot be used with _bt_compare(), unless comparison
|
||||
* data is first stored into the key entries. Currently this
|
||||
* data is first stored into the key entries. Currently this
|
||||
* routine is only called by nbtsort.c and tuplesort.c, which have
|
||||
* their own comparison routines.
|
||||
*/
|
||||
@@ -388,7 +388,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
|
||||
/*
|
||||
* Emit the cleaned-up keys into the outkeys[] array, and then
|
||||
* mark them if they are required. They are required (possibly
|
||||
* mark them if they are required. They are required (possibly
|
||||
* only in one direction) if all attrs before this one had "=".
|
||||
*/
|
||||
for (j = BTMaxStrategyNumber; --j >= 0;)
|
||||
@@ -461,7 +461,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
* Mark a scankey as "required to continue the scan".
|
||||
*
|
||||
* Depending on the operator type, the key may be required for both scan
|
||||
* directions or just one. Also, if the key is a row comparison header,
|
||||
* directions or just one. Also, if the key is a row comparison header,
|
||||
* we have to mark the appropriate subsidiary ScanKeys as required. In
|
||||
* such cases, the first subsidiary key is required, but subsequent ones
|
||||
* are required only as long as they correspond to successive index columns.
|
||||
@@ -472,12 +472,12 @@ _bt_preprocess_keys(IndexScanDesc scan)
|
||||
* scribbling on a data structure belonging to the index AM's caller, not on
|
||||
* our private copy. This should be OK because the marking will not change
|
||||
* from scan to scan within a query, and so we'd just re-mark the same way
|
||||
* anyway on a rescan. Something to keep an eye on though.
|
||||
* anyway on a rescan. Something to keep an eye on though.
|
||||
*/
|
||||
static void
|
||||
_bt_mark_scankey_required(ScanKey skey)
|
||||
{
|
||||
int addflags;
|
||||
int addflags;
|
||||
|
||||
switch (skey->sk_strategy)
|
||||
{
|
||||
@@ -503,8 +503,8 @@ _bt_mark_scankey_required(ScanKey skey)
|
||||
|
||||
if (skey->sk_flags & SK_ROW_HEADER)
|
||||
{
|
||||
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
|
||||
AttrNumber attno = skey->sk_attno;
|
||||
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
|
||||
AttrNumber attno = skey->sk_attno;
|
||||
|
||||
/* First subkey should be same as the header says */
|
||||
Assert(subkey->sk_attno == attno);
|
||||
@@ -558,12 +558,12 @@ _bt_checkkeys(IndexScanDesc scan,
|
||||
*continuescan = true; /* default assumption */
|
||||
|
||||
/*
|
||||
* If the scan specifies not to return killed tuples, then we treat
|
||||
* a killed tuple as not passing the qual. Most of the time, it's a
|
||||
* win to not bother examining the tuple's index keys, but just return
|
||||
* If the scan specifies not to return killed tuples, then we treat a
|
||||
* killed tuple as not passing the qual. Most of the time, it's a win to
|
||||
* not bother examining the tuple's index keys, but just return
|
||||
* immediately with continuescan = true to proceed to the next tuple.
|
||||
* However, if this is the last tuple on the page, we should check
|
||||
* the index keys to prevent uselessly advancing to the next page.
|
||||
* However, if this is the last tuple on the page, we should check the
|
||||
* index keys to prevent uselessly advancing to the next page.
|
||||
*/
|
||||
if (scan->ignore_killed_tuples && ItemIdDeleted(iid))
|
||||
{
|
||||
@@ -580,9 +580,10 @@ _bt_checkkeys(IndexScanDesc scan,
|
||||
if (offnum > P_FIRSTDATAKEY(opaque))
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* OK, we want to check the keys, but we'll return FALSE even
|
||||
* if the tuple passes the key tests.
|
||||
* OK, we want to check the keys, but we'll return FALSE even if the
|
||||
* tuple passes the key tests.
|
||||
*/
|
||||
tuple_valid = false;
|
||||
}
|
||||
@@ -734,10 +735,9 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
{
|
||||
/*
|
||||
* Unlike the simple-scankey case, this isn't a disallowed case.
|
||||
* But it can never match. If all the earlier row comparison
|
||||
* columns are required for the scan direction, we can stop
|
||||
* the scan, because there can't be another tuple that will
|
||||
* succeed.
|
||||
* But it can never match. If all the earlier row comparison
|
||||
* columns are required for the scan direction, we can stop the
|
||||
* scan, because there can't be another tuple that will succeed.
|
||||
*/
|
||||
if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument))
|
||||
subkey--;
|
||||
@@ -771,7 +771,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
*/
|
||||
switch (subkey->sk_strategy)
|
||||
{
|
||||
/* EQ and NE cases aren't allowed here */
|
||||
/* EQ and NE cases aren't allowed here */
|
||||
case BTLessStrategyNumber:
|
||||
result = (cmpresult < 0);
|
||||
break;
|
||||
@@ -795,8 +795,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
{
|
||||
/*
|
||||
* Tuple fails this qual. If it's a required qual for the current
|
||||
* scan direction, then we can conclude no further tuples will
|
||||
* pass, either. Note we have to look at the deciding column, not
|
||||
* scan direction, then we can conclude no further tuples will pass,
|
||||
* either. Note we have to look at the deciding column, not
|
||||
* necessarily the first or last column of the row condition.
|
||||
*/
|
||||
if ((subkey->sk_flags & SK_BT_REQFWD) &&
|
||||
@@ -822,7 +822,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
|
||||
* is sufficient for setting LP_DELETE hint bits.
|
||||
*
|
||||
* We match items by heap TID before assuming they are the right ones to
|
||||
* delete. We cope with cases where items have moved right due to insertions.
|
||||
* delete. We cope with cases where items have moved right due to insertions.
|
||||
* If an item has moved off the current page due to a split, we'll fail to
|
||||
* find it and do nothing (this is not an error case --- we assume the item
|
||||
* will eventually get marked in a future indexscan). Note that because we
|
||||
@@ -856,9 +856,9 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
|
||||
|
||||
for (i = 0; i < so->numKilled; i++)
|
||||
{
|
||||
int itemIndex = so->killedItems[i];
|
||||
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
|
||||
OffsetNumber offnum = kitem->indexOffset;
|
||||
int itemIndex = so->killedItems[i];
|
||||
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
|
||||
OffsetNumber offnum = kitem->indexOffset;
|
||||
|
||||
Assert(itemIndex >= so->currPos.firstItem &&
|
||||
itemIndex <= so->currPos.lastItem);
|
||||
@@ -881,9 +881,9 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
|
||||
}
|
||||
|
||||
/*
|
||||
* Since this can be redone later if needed, it's treated the same
|
||||
* as a commit-hint-bit status update for heap tuples: we mark the
|
||||
* buffer dirty but don't make a WAL log entry.
|
||||
* Since this can be redone later if needed, it's treated the same as a
|
||||
* commit-hint-bit status update for heap tuples: we mark the buffer dirty
|
||||
* but don't make a WAL log entry.
|
||||
*
|
||||
* Whenever we mark anything LP_DELETEd, we also set the page's
|
||||
* BTP_HAS_GARBAGE flag, which is likewise just a hint.
|
||||
@@ -898,8 +898,8 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
|
||||
LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
/*
|
||||
* Always reset the scan state, so we don't look for same items
|
||||
* on other pages.
|
||||
* Always reset the scan state, so we don't look for same items on other
|
||||
* pages.
|
||||
*/
|
||||
so->numKilled = 0;
|
||||
}
|
||||
@@ -908,8 +908,8 @@ _bt_killitems(IndexScanDesc scan, bool haveLock)
|
||||
/*
|
||||
* The following routines manage a shared-memory area in which we track
|
||||
* assignment of "vacuum cycle IDs" to currently-active btree vacuuming
|
||||
* operations. There is a single counter which increments each time we
|
||||
* start a vacuum to assign it a cycle ID. Since multiple vacuums could
|
||||
* operations. There is a single counter which increments each time we
|
||||
* start a vacuum to assign it a cycle ID. Since multiple vacuums could
|
||||
* be active concurrently, we have to track the cycle ID for each active
|
||||
* vacuum; this requires at most MaxBackends entries (usually far fewer).
|
||||
* We assume at most one vacuum can be active for a given index.
|
||||
@@ -987,7 +987,8 @@ _bt_start_vacuum(Relation rel)
|
||||
LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
|
||||
|
||||
/* Assign the next cycle ID, being careful to avoid zero */
|
||||
do {
|
||||
do
|
||||
{
|
||||
result = ++(btvacinfo->cycle_ctr);
|
||||
} while (result == 0);
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.37 2006/08/07 16:57:56 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.38 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -82,7 +82,7 @@ forget_matching_split(RelFileNode node, BlockNumber downlink, bool is_root)
|
||||
* in correct itemno sequence, but physically the opposite order from the
|
||||
* original, because we insert them in the opposite of itemno order. This
|
||||
* does not matter in any current btree code, but it's something to keep an
|
||||
* eye on. Is it worth changing just on general principles?
|
||||
* eye on. Is it worth changing just on general principles?
|
||||
*/
|
||||
static void
|
||||
_bt_restore_page(Page page, char *from, int len)
|
||||
@@ -155,7 +155,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||
char *datapos;
|
||||
int datalen;
|
||||
xl_btree_metadata md;
|
||||
BlockNumber downlink = 0;
|
||||
BlockNumber downlink = 0;
|
||||
|
||||
datapos = (char *) xlrec + SizeOfBtreeInsert;
|
||||
datalen = record->xl_len - SizeOfBtreeInsert;
|
||||
@@ -180,7 +180,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||
if (!(record->xl_info & XLR_BKP_BLOCK_1))
|
||||
{
|
||||
buffer = XLogReadBuffer(reln,
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
false);
|
||||
if (BufferIsValid(buffer))
|
||||
{
|
||||
@@ -193,7 +193,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||
else
|
||||
{
|
||||
if (PageAddItem(page, (Item) datapos, datalen,
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
LP_USED) == InvalidOffsetNumber)
|
||||
elog(PANIC, "btree_insert_redo: failed to add item");
|
||||
|
||||
@@ -225,7 +225,7 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
OffsetNumber targetoff;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
BlockNumber downlink = 0;
|
||||
BlockNumber downlink = 0;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
BTPageOpaque pageop;
|
||||
@@ -376,8 +376,8 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the page as not containing any LP_DELETE items --- see comments
|
||||
* in _bt_delitems().
|
||||
* Mark the page as not containing any LP_DELETE items --- see comments in
|
||||
* _bt_delitems().
|
||||
*/
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
|
||||
@@ -543,7 +543,7 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
BTPageOpaque pageop;
|
||||
BlockNumber downlink = 0;
|
||||
BlockNumber downlink = 0;
|
||||
|
||||
reln = XLogOpenRelation(xlrec->node);
|
||||
buffer = XLogReadBuffer(reln, xlrec->rootblk, true);
|
||||
@@ -637,9 +637,9 @@ static void
|
||||
out_target(StringInfo buf, xl_btreetid *target)
|
||||
{
|
||||
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
ItemPointerGetBlockNumber(&(target->tid)),
|
||||
ItemPointerGetOffsetNumber(&(target->tid)));
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
ItemPointerGetBlockNumber(&(target->tid)),
|
||||
ItemPointerGetOffsetNumber(&(target->tid)));
|
||||
}
|
||||
|
||||
void
|
||||
@@ -680,7 +680,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
appendStringInfo(buf, "split_l: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_SPLIT_R:
|
||||
@@ -690,7 +690,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
appendStringInfo(buf, "split_r: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_SPLIT_L_ROOT:
|
||||
@@ -700,7 +700,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
appendStringInfo(buf, "split_l_root: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_SPLIT_R_ROOT:
|
||||
@@ -710,7 +710,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
appendStringInfo(buf, "split_r_root: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "; oth %u; rgh %u",
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
xlrec->otherblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_DELETE:
|
||||
@@ -718,8 +718,8 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
|
||||
|
||||
appendStringInfo(buf, "delete: rel %u/%u/%u; blk %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block);
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_DELETE_PAGE:
|
||||
@@ -730,7 +730,7 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
appendStringInfo(buf, "delete_page: ");
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "; dead %u; left %u; right %u",
|
||||
xlrec->deadblk, xlrec->leftblk, xlrec->rightblk);
|
||||
xlrec->deadblk, xlrec->leftblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_NEWROOT:
|
||||
@@ -738,9 +738,9 @@ btree_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
|
||||
|
||||
appendStringInfo(buf, "newroot: rel %u/%u/%u; root %u lev %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode,
|
||||
xlrec->rootblk, xlrec->level);
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode,
|
||||
xlrec->rootblk, xlrec->level);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.39 2006/07/13 16:49:12 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.40 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -414,7 +414,7 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
void
|
||||
clog_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
{
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == CLOG_ZEROPAGE)
|
||||
{
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.20 2006/07/20 00:46:42 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.21 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1493,10 +1493,10 @@ CheckPointMultiXact(void)
|
||||
|
||||
/*
|
||||
* Truncate the SLRU files. This could be done at any time, but
|
||||
* checkpoint seems a reasonable place for it. There is one exception:
|
||||
* if we are called during xlog recovery, then shared->latest_page_number
|
||||
* isn't valid (because StartupMultiXact hasn't been called yet) and
|
||||
* so SimpleLruTruncate would get confused. It seems best not to risk
|
||||
* checkpoint seems a reasonable place for it. There is one exception: if
|
||||
* we are called during xlog recovery, then shared->latest_page_number
|
||||
* isn't valid (because StartupMultiXact hasn't been called yet) and so
|
||||
* SimpleLruTruncate would get confused. It seems best not to risk
|
||||
* removing any data during recovery anyway, so don't truncate.
|
||||
*/
|
||||
if (!InRecovery)
|
||||
@@ -1917,7 +1917,7 @@ multixact_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
int i;
|
||||
|
||||
appendStringInfo(buf, "create multixact %u offset %u:",
|
||||
xlrec->mid, xlrec->moff);
|
||||
xlrec->mid, xlrec->moff);
|
||||
for (i = 0; i < xlrec->nxids; i++)
|
||||
appendStringInfo(buf, " %u", xlrec->xids[i]);
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.38 2006/07/14 14:52:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.39 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -92,8 +92,8 @@
|
||||
|
||||
typedef struct SlruFlushData
|
||||
{
|
||||
int num_files; /* # files actually open */
|
||||
int fd[MAX_FLUSH_BUFFERS]; /* their FD's */
|
||||
int num_files; /* # files actually open */
|
||||
int fd[MAX_FLUSH_BUFFERS]; /* their FD's */
|
||||
int segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */
|
||||
} SlruFlushData;
|
||||
|
||||
@@ -113,7 +113,7 @@ typedef struct SlruFlushData
|
||||
* page_lru_count entries to be "reset" to lower values than they should have,
|
||||
* in case a process is delayed while it executes this macro. With care in
|
||||
* SlruSelectLRUPage(), this does little harm, and in any case the absolute
|
||||
* worst possible consequence is a nonoptimal choice of page to evict. The
|
||||
* worst possible consequence is a nonoptimal choice of page to evict. The
|
||||
* gain from allowing concurrent reads of SLRU pages seems worth it.
|
||||
*/
|
||||
#define SlruRecentlyUsed(shared, slotno) \
|
||||
@@ -158,13 +158,13 @@ SimpleLruShmemSize(int nslots)
|
||||
|
||||
/* we assume nslots isn't so large as to risk overflow */
|
||||
sz = MAXALIGN(sizeof(SlruSharedData));
|
||||
sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
|
||||
sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */
|
||||
sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */
|
||||
sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
|
||||
sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
|
||||
sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
|
||||
sz += MAXALIGN(nslots * sizeof(LWLockId)); /* buffer_locks[] */
|
||||
|
||||
sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
|
||||
sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
|
||||
sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
|
||||
sz += MAXALIGN(nslots * sizeof(LWLockId)); /* buffer_locks[] */
|
||||
|
||||
return BUFFERALIGN(sz) + BLCKSZ * nslots;
|
||||
}
|
||||
|
||||
@@ -653,9 +653,9 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
|
||||
* Easiest way to deal with that is to accept references to
|
||||
* nonexistent files here and in SlruPhysicalReadPage.)
|
||||
*
|
||||
* Note: it is possible for more than one backend to be executing
|
||||
* this code simultaneously for different pages of the same file.
|
||||
* Hence, don't use O_EXCL or O_TRUNC or anything like that.
|
||||
* Note: it is possible for more than one backend to be executing this
|
||||
* code simultaneously for different pages of the same file. Hence,
|
||||
* don't use O_EXCL or O_TRUNC or anything like that.
|
||||
*/
|
||||
SlruFileName(ctl, path, segno);
|
||||
fd = BasicOpenFile(path, O_RDWR | O_CREAT | PG_BINARY,
|
||||
@@ -759,22 +759,22 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not access status of transaction %u", xid),
|
||||
errdetail("Could not seek in file \"%s\" to offset %u: %m.",
|
||||
path, offset)));
|
||||
errdetail("Could not seek in file \"%s\" to offset %u: %m.",
|
||||
path, offset)));
|
||||
break;
|
||||
case SLRU_READ_FAILED:
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not access status of transaction %u", xid),
|
||||
errdetail("Could not read from file \"%s\" at offset %u: %m.",
|
||||
path, offset)));
|
||||
errdetail("Could not read from file \"%s\" at offset %u: %m.",
|
||||
path, offset)));
|
||||
break;
|
||||
case SLRU_WRITE_FAILED:
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not access status of transaction %u", xid),
|
||||
errdetail("Could not write to file \"%s\" at offset %u: %m.",
|
||||
path, offset)));
|
||||
errdetail("Could not write to file \"%s\" at offset %u: %m.",
|
||||
path, offset)));
|
||||
break;
|
||||
case SLRU_FSYNC_FAILED:
|
||||
ereport(ERROR,
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.23 2006/10/03 21:21:35 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.24 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Each global transaction is associated with a global transaction
|
||||
@@ -1250,8 +1250,8 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
|
||||
if (errno != ENOENT || giveWarning)
|
||||
ereport(WARNING,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not remove twophase state file \"%s\": %m",
|
||||
path)));
|
||||
errmsg("could not remove twophase state file \"%s\": %m",
|
||||
path)));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
* Copyright (c) 2000-2006, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.74 2006/09/26 17:21:39 alvherre Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.75 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -133,10 +133,10 @@ GetNewTransactionId(bool isSubXact)
|
||||
{
|
||||
/*
|
||||
* Use volatile pointer to prevent code rearrangement; other backends
|
||||
* could be examining my subxids info concurrently, and we don't
|
||||
* want them to see an invalid intermediate state, such as
|
||||
* incrementing nxids before filling the array entry. Note we are
|
||||
* assuming that TransactionId and int fetch/store are atomic.
|
||||
* could be examining my subxids info concurrently, and we don't want
|
||||
* them to see an invalid intermediate state, such as incrementing
|
||||
* nxids before filling the array entry. Note we are assuming that
|
||||
* TransactionId and int fetch/store are atomic.
|
||||
*/
|
||||
volatile PGPROC *myproc = MyProc;
|
||||
|
||||
@@ -144,7 +144,7 @@ GetNewTransactionId(bool isSubXact)
|
||||
myproc->xid = xid;
|
||||
else
|
||||
{
|
||||
int nxids = myproc->subxids.nxids;
|
||||
int nxids = myproc->subxids.nxids;
|
||||
|
||||
if (nxids < PGPROC_MAX_CACHED_SUBXIDS)
|
||||
{
|
||||
@@ -196,7 +196,7 @@ SetTransactionIdLimit(TransactionId oldest_datminxid,
|
||||
* The place where we actually get into deep trouble is halfway around
|
||||
* from the oldest existing XID. (This calculation is probably off by one
|
||||
* or two counts, because the special XIDs reduce the size of the loop a
|
||||
* little bit. But we throw in plenty of slop below, so it doesn't
|
||||
* little bit. But we throw in plenty of slop below, so it doesn't
|
||||
* matter.)
|
||||
*/
|
||||
xidWrapLimit = oldest_datminxid + (MaxTransactionId >> 1);
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.226 2006/08/27 19:11:46 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.227 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1376,12 +1376,12 @@ StartTransaction(void)
|
||||
|
||||
XactLockTableInsert(s->transactionId);
|
||||
|
||||
PG_TRACE1 (transaction__start, s->transactionId);
|
||||
PG_TRACE1(transaction__start, s->transactionId);
|
||||
|
||||
/*
|
||||
* set transaction_timestamp() (a/k/a now()). We want this to be the
|
||||
* same as the first command's statement_timestamp(), so don't do a
|
||||
* fresh GetCurrentTimestamp() call (which'd be expensive anyway).
|
||||
* set transaction_timestamp() (a/k/a now()). We want this to be the same
|
||||
* as the first command's statement_timestamp(), so don't do a fresh
|
||||
* GetCurrentTimestamp() call (which'd be expensive anyway).
|
||||
*/
|
||||
xactStartTimestamp = stmtStartTimestamp;
|
||||
|
||||
@@ -1521,7 +1521,7 @@ CommitTransaction(void)
|
||||
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
|
||||
MyProc->xid = InvalidTransactionId;
|
||||
MyProc->xmin = InvalidTransactionId;
|
||||
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
|
||||
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
|
||||
|
||||
/* Clear the subtransaction-XID cache too while holding the lock */
|
||||
MyProc->subxids.nxids = 0;
|
||||
@@ -1530,7 +1530,7 @@ CommitTransaction(void)
|
||||
LWLockRelease(ProcArrayLock);
|
||||
}
|
||||
|
||||
PG_TRACE1 (transaction__commit, s->transactionId);
|
||||
PG_TRACE1(transaction__commit, s->transactionId);
|
||||
|
||||
/*
|
||||
* This is all post-commit cleanup. Note that if an error is raised here,
|
||||
@@ -1921,7 +1921,7 @@ AbortTransaction(void)
|
||||
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
|
||||
MyProc->xid = InvalidTransactionId;
|
||||
MyProc->xmin = InvalidTransactionId;
|
||||
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
|
||||
MyProc->inVacuum = false; /* must be cleared with xid/xmin */
|
||||
|
||||
/* Clear the subtransaction-XID cache too while holding the lock */
|
||||
MyProc->subxids.nxids = 0;
|
||||
@@ -1930,7 +1930,7 @@ AbortTransaction(void)
|
||||
LWLockRelease(ProcArrayLock);
|
||||
}
|
||||
|
||||
PG_TRACE1 (transaction__abort, s->transactionId);
|
||||
PG_TRACE1(transaction__abort, s->transactionId);
|
||||
|
||||
/*
|
||||
* Post-abort cleanup. See notes in CommitTransaction() concerning
|
||||
@@ -4206,8 +4206,8 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
|
||||
int i;
|
||||
|
||||
appendStringInfo(buf, "%04u-%02u-%02u %02u:%02u:%02u",
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
|
||||
tm->tm_hour, tm->tm_min, tm->tm_sec);
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
|
||||
tm->tm_hour, tm->tm_min, tm->tm_sec);
|
||||
if (xlrec->nrels > 0)
|
||||
{
|
||||
appendStringInfo(buf, "; rels:");
|
||||
@@ -4216,7 +4216,7 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
|
||||
RelFileNode rnode = xlrec->xnodes[i];
|
||||
|
||||
appendStringInfo(buf, " %u/%u/%u",
|
||||
rnode.spcNode, rnode.dbNode, rnode.relNode);
|
||||
rnode.spcNode, rnode.dbNode, rnode.relNode);
|
||||
}
|
||||
}
|
||||
if (xlrec->nsubxacts > 0)
|
||||
@@ -4237,8 +4237,8 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
|
||||
int i;
|
||||
|
||||
appendStringInfo(buf, "%04u-%02u-%02u %02u:%02u:%02u",
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
|
||||
tm->tm_hour, tm->tm_min, tm->tm_sec);
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
|
||||
tm->tm_hour, tm->tm_min, tm->tm_sec);
|
||||
if (xlrec->nrels > 0)
|
||||
{
|
||||
appendStringInfo(buf, "; rels:");
|
||||
@@ -4247,7 +4247,7 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
|
||||
RelFileNode rnode = xlrec->xnodes[i];
|
||||
|
||||
appendStringInfo(buf, " %u/%u/%u",
|
||||
rnode.spcNode, rnode.dbNode, rnode.relNode);
|
||||
rnode.spcNode, rnode.dbNode, rnode.relNode);
|
||||
}
|
||||
}
|
||||
if (xlrec->nsubxacts > 0)
|
||||
@@ -4264,7 +4264,7 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
|
||||
void
|
||||
xact_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
{
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_XACT_COMMIT)
|
||||
{
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.249 2006/08/21 16:16:31 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.250 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -349,9 +349,9 @@ typedef struct XLogCtlInsert
|
||||
*/
|
||||
typedef struct XLogCtlWrite
|
||||
{
|
||||
XLogwrtResult LogwrtResult; /* current value of LogwrtResult */
|
||||
int curridx; /* cache index of next block to write */
|
||||
time_t lastSegSwitchTime; /* time of last xlog segment switch */
|
||||
XLogwrtResult LogwrtResult; /* current value of LogwrtResult */
|
||||
int curridx; /* cache index of next block to write */
|
||||
time_t lastSegSwitchTime; /* time of last xlog segment switch */
|
||||
} XLogCtlWrite;
|
||||
|
||||
/*
|
||||
@@ -481,7 +481,7 @@ static bool InstallXLogFileSegment(uint32 *log, uint32 *seg, char *tmppath,
|
||||
bool use_lock);
|
||||
static int XLogFileOpen(uint32 log, uint32 seg);
|
||||
static int XLogFileRead(uint32 log, uint32 seg, int emode);
|
||||
static void XLogFileClose(void);
|
||||
static void XLogFileClose(void);
|
||||
static bool RestoreArchivedFile(char *path, const char *xlogfname,
|
||||
const char *recovername, off_t expectedSize);
|
||||
static int PreallocXlogFiles(XLogRecPtr endptr);
|
||||
@@ -506,7 +506,7 @@ static void issue_xlog_fsync(void);
|
||||
static void xlog_outrec(StringInfo buf, XLogRecord *record);
|
||||
#endif
|
||||
static bool read_backup_label(XLogRecPtr *checkPointLoc,
|
||||
XLogRecPtr *minRecoveryLoc);
|
||||
XLogRecPtr *minRecoveryLoc);
|
||||
static void rm_redo_error_callback(void *arg);
|
||||
|
||||
|
||||
@@ -697,9 +697,9 @@ begin:;
|
||||
/*
|
||||
* NOTE: We disallow len == 0 because it provides a useful bit of extra
|
||||
* error checking in ReadRecord. This means that all callers of
|
||||
* XLogInsert must supply at least some not-in-a-buffer data. However,
|
||||
* we make an exception for XLOG SWITCH records because we don't want
|
||||
* them to ever cross a segment boundary.
|
||||
* XLogInsert must supply at least some not-in-a-buffer data. However, we
|
||||
* make an exception for XLOG SWITCH records because we don't want them to
|
||||
* ever cross a segment boundary.
|
||||
*/
|
||||
if (len == 0 && !isLogSwitch)
|
||||
elog(PANIC, "invalid xlog record length %u", len);
|
||||
@@ -752,8 +752,8 @@ begin:;
|
||||
* checkpoint, so it's better to be slow in this case and fast otherwise.
|
||||
*
|
||||
* If we aren't doing full-page writes then RedoRecPtr doesn't actually
|
||||
* affect the contents of the XLOG record, so we'll update our local
|
||||
* copy but not force a recomputation.
|
||||
* affect the contents of the XLOG record, so we'll update our local copy
|
||||
* but not force a recomputation.
|
||||
*/
|
||||
if (!XLByteEQ(RedoRecPtr, Insert->RedoRecPtr))
|
||||
{
|
||||
@@ -782,10 +782,10 @@ begin:;
|
||||
}
|
||||
|
||||
/*
|
||||
* Also check to see if forcePageWrites was just turned on; if we
|
||||
* weren't already doing full-page writes then go back and recompute.
|
||||
* (If it was just turned off, we could recompute the record without
|
||||
* full pages, but we choose not to bother.)
|
||||
* Also check to see if forcePageWrites was just turned on; if we weren't
|
||||
* already doing full-page writes then go back and recompute. (If it was
|
||||
* just turned off, we could recompute the record without full pages, but
|
||||
* we choose not to bother.)
|
||||
*/
|
||||
if (Insert->forcePageWrites && !doPageWrites)
|
||||
{
|
||||
@@ -870,11 +870,11 @@ begin:;
|
||||
INSERT_RECPTR(RecPtr, Insert, curridx);
|
||||
|
||||
/*
|
||||
* If the record is an XLOG_SWITCH, and we are exactly at the start
|
||||
* of a segment, we need not insert it (and don't want to because
|
||||
* we'd like consecutive switch requests to be no-ops). Instead,
|
||||
* make sure everything is written and flushed through the end of
|
||||
* the prior segment, and return the prior segment's end address.
|
||||
* If the record is an XLOG_SWITCH, and we are exactly at the start of a
|
||||
* segment, we need not insert it (and don't want to because we'd like
|
||||
* consecutive switch requests to be no-ops). Instead, make sure
|
||||
* everything is written and flushed through the end of the prior segment,
|
||||
* and return the prior segment's end address.
|
||||
*/
|
||||
if (isLogSwitch &&
|
||||
(RecPtr.xrecoff % XLogSegSize) == SizeOfXLogLongPHD)
|
||||
@@ -926,7 +926,7 @@ begin:;
|
||||
#ifdef WAL_DEBUG
|
||||
if (XLOG_DEBUG)
|
||||
{
|
||||
StringInfoData buf;
|
||||
StringInfoData buf;
|
||||
|
||||
initStringInfo(&buf);
|
||||
appendStringInfo(&buf, "INSERT @ %X/%X: ",
|
||||
@@ -1019,8 +1019,8 @@ begin:;
|
||||
LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
|
||||
|
||||
/*
|
||||
* Flush through the end of the page containing XLOG_SWITCH,
|
||||
* and perform end-of-segment actions (eg, notifying archiver).
|
||||
* Flush through the end of the page containing XLOG_SWITCH, and
|
||||
* perform end-of-segment actions (eg, notifying archiver).
|
||||
*/
|
||||
WriteRqst = XLogCtl->xlblocks[curridx];
|
||||
FlushRqst.Write = WriteRqst;
|
||||
@@ -1667,8 +1667,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
|
||||
* switch.
|
||||
*
|
||||
* This is also the right place to notify the Archiver that the
|
||||
* segment is ready to copy to archival storage, and to update
|
||||
* the timer for archive_timeout.
|
||||
* segment is ready to copy to archival storage, and to update the
|
||||
* timer for archive_timeout.
|
||||
*/
|
||||
if (finishing_seg || (xlog_switch && last_iteration))
|
||||
{
|
||||
@@ -2300,36 +2300,35 @@ XLogFileClose(void)
|
||||
Assert(openLogFile >= 0);
|
||||
|
||||
/*
|
||||
* posix_fadvise is problematic on many platforms: on older x86 Linux
|
||||
* it just dumps core, and there are reports of problems on PPC platforms
|
||||
* as well. The following is therefore disabled for the time being.
|
||||
* We could consider some kind of configure test to see if it's safe to
|
||||
* use, but since we lack hard evidence that there's any useful performance
|
||||
* gain to be had, spending time on that seems unprofitable for now.
|
||||
* posix_fadvise is problematic on many platforms: on older x86 Linux it
|
||||
* just dumps core, and there are reports of problems on PPC platforms as
|
||||
* well. The following is therefore disabled for the time being. We could
|
||||
* consider some kind of configure test to see if it's safe to use, but
|
||||
* since we lack hard evidence that there's any useful performance gain to
|
||||
* be had, spending time on that seems unprofitable for now.
|
||||
*/
|
||||
#ifdef NOT_USED
|
||||
|
||||
/*
|
||||
* WAL segment files will not be re-read in normal operation, so we advise
|
||||
* OS to release any cached pages. But do not do so if WAL archiving is
|
||||
* OS to release any cached pages. But do not do so if WAL archiving is
|
||||
* active, because archiver process could use the cache to read the WAL
|
||||
* segment.
|
||||
*
|
||||
* While O_DIRECT works for O_SYNC, posix_fadvise() works for fsync()
|
||||
* and O_SYNC, and some platforms only have posix_fadvise().
|
||||
* While O_DIRECT works for O_SYNC, posix_fadvise() works for fsync() and
|
||||
* O_SYNC, and some platforms only have posix_fadvise().
|
||||
*/
|
||||
#if defined(HAVE_DECL_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
|
||||
if (!XLogArchivingActive())
|
||||
posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
|
||||
#endif
|
||||
|
||||
#endif /* NOT_USED */
|
||||
#endif /* NOT_USED */
|
||||
|
||||
if (close(openLogFile))
|
||||
ereport(PANIC,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not close log file %u, segment %u: %m",
|
||||
openLogId, openLogSeg)));
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not close log file %u, segment %u: %m",
|
||||
openLogId, openLogSeg)));
|
||||
openLogFile = -1;
|
||||
}
|
||||
|
||||
@@ -2978,8 +2977,8 @@ ReadRecord(XLogRecPtr *RecPtr, int emode)
|
||||
got_record:;
|
||||
|
||||
/*
|
||||
* xl_len == 0 is bad data for everything except XLOG SWITCH, where
|
||||
* it is required.
|
||||
* xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
|
||||
* required.
|
||||
*/
|
||||
if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
|
||||
{
|
||||
@@ -3168,6 +3167,7 @@ got_record:;
|
||||
EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len);
|
||||
ReadRecPtr = *RecPtr;
|
||||
memcpy(buffer, record, total_len);
|
||||
|
||||
/*
|
||||
* Special processing if it's an XLOG SWITCH record
|
||||
*/
|
||||
@@ -3177,10 +3177,11 @@ got_record:;
|
||||
EndRecPtr.xrecoff += XLogSegSize - 1;
|
||||
EndRecPtr.xrecoff -= EndRecPtr.xrecoff % XLogSegSize;
|
||||
nextRecord = NULL; /* definitely not on same page */
|
||||
|
||||
/*
|
||||
* Pretend that readBuf contains the last page of the segment.
|
||||
* This is just to avoid Assert failure in StartupXLOG if XLOG
|
||||
* ends with this segment.
|
||||
* Pretend that readBuf contains the last page of the segment. This is
|
||||
* just to avoid Assert failure in StartupXLOG if XLOG ends with this
|
||||
* segment.
|
||||
*/
|
||||
readOff = XLogSegSize - XLOG_BLCKSZ;
|
||||
}
|
||||
@@ -3661,7 +3662,7 @@ static void
|
||||
WriteControlFile(void)
|
||||
{
|
||||
int fd;
|
||||
char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
|
||||
char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
|
||||
char *localeptr;
|
||||
|
||||
/*
|
||||
@@ -3846,9 +3847,9 @@ ReadControlFile(void)
|
||||
if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
|
||||
ereport(FATAL,
|
||||
(errmsg("database files are incompatible with server"),
|
||||
errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
|
||||
" but the server was compiled with XLOG_BLCKSZ %d.",
|
||||
ControlFile->xlog_blcksz, XLOG_BLCKSZ),
|
||||
errdetail("The database cluster was initialized with XLOG_BLCKSZ %d,"
|
||||
" but the server was compiled with XLOG_BLCKSZ %d.",
|
||||
ControlFile->xlog_blcksz, XLOG_BLCKSZ),
|
||||
errhint("It looks like you need to recompile or initdb.")));
|
||||
if (ControlFile->xlog_seg_size != XLOG_SEG_SIZE)
|
||||
ereport(FATAL,
|
||||
@@ -4027,7 +4028,7 @@ XLOGShmemInit(void)
|
||||
* Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
|
||||
* in additional info.)
|
||||
*/
|
||||
XLogCtl->XLogCacheByte = (Size) XLOG_BLCKSZ * XLOGbuffers;
|
||||
XLogCtl->XLogCacheByte = (Size) XLOG_BLCKSZ *XLOGbuffers;
|
||||
|
||||
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
|
||||
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
|
||||
@@ -4649,10 +4650,10 @@ StartupXLOG(void)
|
||||
" you will have to use the last backup for recovery.")));
|
||||
else if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY)
|
||||
ereport(LOG,
|
||||
(errmsg("database system was interrupted while in recovery at log time %s",
|
||||
str_time(ControlFile->checkPointCopy.time)),
|
||||
errhint("If this has occurred more than once some data may be corrupted"
|
||||
" and you may need to choose an earlier recovery target.")));
|
||||
(errmsg("database system was interrupted while in recovery at log time %s",
|
||||
str_time(ControlFile->checkPointCopy.time)),
|
||||
errhint("If this has occurred more than once some data may be corrupted"
|
||||
" and you may need to choose an earlier recovery target.")));
|
||||
else if (ControlFile->state == DB_IN_PRODUCTION)
|
||||
ereport(LOG,
|
||||
(errmsg("database system was interrupted at %s",
|
||||
@@ -4812,10 +4813,10 @@ StartupXLOG(void)
|
||||
int rmid;
|
||||
|
||||
/*
|
||||
* Update pg_control to show that we are recovering and to show
|
||||
* the selected checkpoint as the place we are starting from.
|
||||
* We also mark pg_control with any minimum recovery stop point
|
||||
* obtained from a backup history file.
|
||||
* Update pg_control to show that we are recovering and to show the
|
||||
* selected checkpoint as the place we are starting from. We also mark
|
||||
* pg_control with any minimum recovery stop point obtained from a
|
||||
* backup history file.
|
||||
*/
|
||||
if (InArchiveRecovery)
|
||||
{
|
||||
@@ -4839,12 +4840,12 @@ StartupXLOG(void)
|
||||
UpdateControlFile();
|
||||
|
||||
/*
|
||||
* If there was a backup label file, it's done its job and the
|
||||
* info has now been propagated into pg_control. We must get rid of
|
||||
* the label file so that if we crash during recovery, we'll pick up
|
||||
* at the latest recovery restartpoint instead of going all the way
|
||||
* back to the backup start point. It seems prudent though to just
|
||||
* rename the file out of the way rather than delete it completely.
|
||||
* If there was a backup label file, it's done its job and the info
|
||||
* has now been propagated into pg_control. We must get rid of the
|
||||
* label file so that if we crash during recovery, we'll pick up at
|
||||
* the latest recovery restartpoint instead of going all the way back
|
||||
* to the backup start point. It seems prudent though to just rename
|
||||
* the file out of the way rather than delete it completely.
|
||||
*/
|
||||
if (haveBackupLabel)
|
||||
{
|
||||
@@ -4884,7 +4885,7 @@ StartupXLOG(void)
|
||||
{
|
||||
bool recoveryContinue = true;
|
||||
bool recoveryApply = true;
|
||||
ErrorContextCallback errcontext;
|
||||
ErrorContextCallback errcontext;
|
||||
|
||||
InRedo = true;
|
||||
ereport(LOG,
|
||||
@@ -4899,17 +4900,17 @@ StartupXLOG(void)
|
||||
#ifdef WAL_DEBUG
|
||||
if (XLOG_DEBUG)
|
||||
{
|
||||
StringInfoData buf;
|
||||
StringInfoData buf;
|
||||
|
||||
initStringInfo(&buf);
|
||||
appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
|
||||
ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
|
||||
EndRecPtr.xlogid, EndRecPtr.xrecoff);
|
||||
ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
|
||||
EndRecPtr.xlogid, EndRecPtr.xrecoff);
|
||||
xlog_outrec(&buf, record);
|
||||
appendStringInfo(&buf, " - ");
|
||||
RmgrTable[record->xl_rmid].rm_desc(&buf,
|
||||
record->xl_info,
|
||||
XLogRecGetData(record));
|
||||
XLogRecGetData(record));
|
||||
elog(LOG, "%s", buf.data);
|
||||
pfree(buf.data);
|
||||
}
|
||||
@@ -5383,9 +5384,9 @@ GetRecentNextXid(void)
|
||||
void
|
||||
GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch)
|
||||
{
|
||||
uint32 ckptXidEpoch;
|
||||
TransactionId ckptXid;
|
||||
TransactionId nextXid;
|
||||
uint32 ckptXidEpoch;
|
||||
TransactionId ckptXid;
|
||||
TransactionId nextXid;
|
||||
|
||||
/* Must read checkpoint info first, else have race condition */
|
||||
{
|
||||
@@ -5718,7 +5719,7 @@ CheckPointGuts(XLogRecPtr checkPointRedo)
|
||||
CheckPointCLOG();
|
||||
CheckPointSUBTRANS();
|
||||
CheckPointMultiXact();
|
||||
FlushBufferPool(); /* performs all required fsyncs */
|
||||
FlushBufferPool(); /* performs all required fsyncs */
|
||||
/* We deliberately delay 2PC checkpointing as long as possible */
|
||||
CheckPointTwoPhase(checkPointRedo);
|
||||
}
|
||||
@@ -5735,12 +5736,12 @@ CheckPointGuts(XLogRecPtr checkPointRedo)
|
||||
static void
|
||||
RecoveryRestartPoint(const CheckPoint *checkPoint)
|
||||
{
|
||||
int elapsed_secs;
|
||||
int rmid;
|
||||
int elapsed_secs;
|
||||
int rmid;
|
||||
|
||||
/*
|
||||
* Do nothing if the elapsed time since the last restartpoint is less
|
||||
* than half of checkpoint_timeout. (We use a value less than
|
||||
* Do nothing if the elapsed time since the last restartpoint is less than
|
||||
* half of checkpoint_timeout. (We use a value less than
|
||||
* checkpoint_timeout so that variations in the timing of checkpoints on
|
||||
* the master, or speed of transmission of WAL segments to a slave, won't
|
||||
* make the slave skip a restartpoint once it's synced with the master.)
|
||||
@@ -5770,9 +5771,9 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
|
||||
CheckPointGuts(checkPoint->redo);
|
||||
|
||||
/*
|
||||
* Update pg_control so that any subsequent crash will restart from
|
||||
* this checkpoint. Note: ReadRecPtr gives the XLOG address of the
|
||||
* checkpoint record itself.
|
||||
* Update pg_control so that any subsequent crash will restart from this
|
||||
* checkpoint. Note: ReadRecPtr gives the XLOG address of the checkpoint
|
||||
* record itself.
|
||||
*/
|
||||
ControlFile->prevCheckPoint = ControlFile->checkPoint;
|
||||
ControlFile->checkPoint = ReadRecPtr;
|
||||
@@ -5926,7 +5927,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
void
|
||||
xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
{
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_CHECKPOINT_SHUTDOWN ||
|
||||
info == XLOG_CHECKPOINT_ONLINE)
|
||||
@@ -5934,15 +5935,15 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
CheckPoint *checkpoint = (CheckPoint *) rec;
|
||||
|
||||
appendStringInfo(buf, "checkpoint: redo %X/%X; undo %X/%X; "
|
||||
"tli %u; xid %u/%u; oid %u; multi %u; offset %u; %s",
|
||||
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
|
||||
checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
|
||||
checkpoint->ThisTimeLineID,
|
||||
checkpoint->nextXidEpoch, checkpoint->nextXid,
|
||||
checkpoint->nextOid,
|
||||
checkpoint->nextMulti,
|
||||
checkpoint->nextMultiOffset,
|
||||
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
|
||||
"tli %u; xid %u/%u; oid %u; multi %u; offset %u; %s",
|
||||
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
|
||||
checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
|
||||
checkpoint->ThisTimeLineID,
|
||||
checkpoint->nextXidEpoch, checkpoint->nextXid,
|
||||
checkpoint->nextOid,
|
||||
checkpoint->nextMulti,
|
||||
checkpoint->nextMultiOffset,
|
||||
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
|
||||
}
|
||||
else if (info == XLOG_NEXTOID)
|
||||
{
|
||||
@@ -5973,7 +5974,7 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
|
||||
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
|
||||
{
|
||||
if (record->xl_info & XLR_SET_BKP_BLOCK(i))
|
||||
appendStringInfo(buf, "; bkpb%d", i+1);
|
||||
appendStringInfo(buf, "; bkpb%d", i + 1);
|
||||
}
|
||||
|
||||
appendStringInfo(buf, ": %s", RmgrTable[record->xl_rmid].rm_name);
|
||||
@@ -6142,18 +6143,18 @@ pg_start_backup(PG_FUNCTION_ARGS)
|
||||
* Mark backup active in shared memory. We must do full-page WAL writes
|
||||
* during an on-line backup even if not doing so at other times, because
|
||||
* it's quite possible for the backup dump to obtain a "torn" (partially
|
||||
* written) copy of a database page if it reads the page concurrently
|
||||
* with our write to the same page. This can be fixed as long as the
|
||||
* first write to the page in the WAL sequence is a full-page write.
|
||||
* Hence, we turn on forcePageWrites and then force a CHECKPOINT, to
|
||||
* ensure there are no dirty pages in shared memory that might get
|
||||
* dumped while the backup is in progress without having a corresponding
|
||||
* WAL record. (Once the backup is complete, we need not force full-page
|
||||
* writes anymore, since we expect that any pages not modified during
|
||||
* the backup interval must have been correctly captured by the backup.)
|
||||
* written) copy of a database page if it reads the page concurrently with
|
||||
* our write to the same page. This can be fixed as long as the first
|
||||
* write to the page in the WAL sequence is a full-page write. Hence, we
|
||||
* turn on forcePageWrites and then force a CHECKPOINT, to ensure there
|
||||
* are no dirty pages in shared memory that might get dumped while the
|
||||
* backup is in progress without having a corresponding WAL record. (Once
|
||||
* the backup is complete, we need not force full-page writes anymore,
|
||||
* since we expect that any pages not modified during the backup interval
|
||||
* must have been correctly captured by the backup.)
|
||||
*
|
||||
* We must hold WALInsertLock to change the value of forcePageWrites,
|
||||
* to ensure adequate interlocking against XLogInsert().
|
||||
* We must hold WALInsertLock to change the value of forcePageWrites, to
|
||||
* ensure adequate interlocking against XLogInsert().
|
||||
*/
|
||||
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
|
||||
if (XLogCtl->Insert.forcePageWrites)
|
||||
@@ -6171,7 +6172,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
|
||||
PG_TRY();
|
||||
{
|
||||
/*
|
||||
* Force a CHECKPOINT. Aside from being necessary to prevent torn
|
||||
* Force a CHECKPOINT. Aside from being necessary to prevent torn
|
||||
* page problems, this guarantees that two successive backup runs will
|
||||
* have different checkpoint positions and hence different history
|
||||
* file names, even if nothing happened in between.
|
||||
@@ -6303,10 +6304,9 @@ pg_stop_backup(PG_FUNCTION_ARGS)
|
||||
LWLockRelease(WALInsertLock);
|
||||
|
||||
/*
|
||||
* Force a switch to a new xlog segment file, so that the backup
|
||||
* is valid as soon as archiver moves out the current segment file.
|
||||
* We'll report the end address of the XLOG SWITCH record as the backup
|
||||
* stopping point.
|
||||
* Force a switch to a new xlog segment file, so that the backup is valid
|
||||
* as soon as archiver moves out the current segment file. We'll report
|
||||
* the end address of the XLOG SWITCH record as the backup stopping point.
|
||||
*/
|
||||
stoppoint = RequestXLogSwitch();
|
||||
|
||||
@@ -6392,9 +6392,9 @@ pg_stop_backup(PG_FUNCTION_ARGS)
|
||||
BACKUP_LABEL_FILE)));
|
||||
|
||||
/*
|
||||
* Clean out any no-longer-needed history files. As a side effect,
|
||||
* this will post a .ready file for the newly created history file,
|
||||
* notifying the archiver that history file may be archived immediately.
|
||||
* Clean out any no-longer-needed history files. As a side effect, this
|
||||
* will post a .ready file for the newly created history file, notifying
|
||||
* the archiver that history file may be archived immediately.
|
||||
*/
|
||||
CleanupBackupHistory();
|
||||
|
||||
@@ -6415,7 +6415,7 @@ Datum
|
||||
pg_switch_xlog(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *result;
|
||||
XLogRecPtr switchpoint;
|
||||
XLogRecPtr switchpoint;
|
||||
char location[MAXFNAMELEN];
|
||||
|
||||
if (!superuser())
|
||||
@@ -6514,17 +6514,17 @@ pg_xlogfile_name_offset(PG_FUNCTION_ARGS)
|
||||
uint32 xrecoff;
|
||||
XLogRecPtr locationpoint;
|
||||
char xlogfilename[MAXFNAMELEN];
|
||||
Datum values[2];
|
||||
bool isnull[2];
|
||||
TupleDesc resultTupleDesc;
|
||||
HeapTuple resultHeapTuple;
|
||||
Datum result;
|
||||
Datum values[2];
|
||||
bool isnull[2];
|
||||
TupleDesc resultTupleDesc;
|
||||
HeapTuple resultHeapTuple;
|
||||
Datum result;
|
||||
|
||||
/*
|
||||
* Read input and parse
|
||||
*/
|
||||
locationstr = DatumGetCString(DirectFunctionCall1(textout,
|
||||
PointerGetDatum(location)));
|
||||
PointerGetDatum(location)));
|
||||
|
||||
if (sscanf(locationstr, "%X/%X", &uxlogid, &uxrecoff) != 2)
|
||||
ereport(ERROR,
|
||||
@@ -6536,8 +6536,8 @@ pg_xlogfile_name_offset(PG_FUNCTION_ARGS)
|
||||
locationpoint.xrecoff = uxrecoff;
|
||||
|
||||
/*
|
||||
* Construct a tuple descriptor for the result row. This must match
|
||||
* this function's pg_proc entry!
|
||||
* Construct a tuple descriptor for the result row. This must match this
|
||||
* function's pg_proc entry!
|
||||
*/
|
||||
resultTupleDesc = CreateTemplateTupleDesc(2, false);
|
||||
TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "file_name",
|
||||
@@ -6593,7 +6593,7 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
|
||||
char xlogfilename[MAXFNAMELEN];
|
||||
|
||||
locationstr = DatumGetCString(DirectFunctionCall1(textout,
|
||||
PointerGetDatum(location)));
|
||||
PointerGetDatum(location)));
|
||||
|
||||
if (sscanf(locationstr, "%X/%X", &uxlogid, &uxrecoff) != 2)
|
||||
ereport(ERROR,
|
||||
@@ -6608,7 +6608,7 @@ pg_xlogfile_name(PG_FUNCTION_ARGS)
|
||||
XLogFileName(xlogfilename, ThisTimeLineID, xlogid, xlogseg);
|
||||
|
||||
result = DatumGetTextP(DirectFunctionCall1(textin,
|
||||
CStringGetDatum(xlogfilename)));
|
||||
CStringGetDatum(xlogfilename)));
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
|
||||
@@ -6734,8 +6734,8 @@ read_backup_label(XLogRecPtr *checkPointLoc, XLogRecPtr *minRecoveryLoc)
|
||||
static void
|
||||
rm_redo_error_callback(void *arg)
|
||||
{
|
||||
XLogRecord *record = (XLogRecord *) arg;
|
||||
StringInfoData buf;
|
||||
XLogRecord *record = (XLogRecord *) arg;
|
||||
StringInfoData buf;
|
||||
|
||||
initStringInfo(&buf);
|
||||
RmgrTable[record->xl_rmid].rm_desc(&buf,
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.47 2006/07/14 14:52:17 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.48 2006/10/04 00:29:49 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -172,8 +172,8 @@ XLogCheckInvalidPages(void)
|
||||
hash_seq_init(&status, invalid_page_tab);
|
||||
|
||||
/*
|
||||
* Our strategy is to emit WARNING messages for all remaining entries
|
||||
* and only PANIC after we've dumped all the available info.
|
||||
* Our strategy is to emit WARNING messages for all remaining entries and
|
||||
* only PANIC after we've dumped all the available info.
|
||||
*/
|
||||
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
@@ -255,7 +255,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
|
||||
if (!init)
|
||||
{
|
||||
/* check that page has been initialized */
|
||||
Page page = (Page) BufferGetPage(buffer);
|
||||
Page page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if (PageIsNew((PageHeader) page))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user